我有以下代码,其中我进行了一些网格搜索以寻找不同的mtry和min_n。我知道如何提取给出最高准确性的参数(请参见第二个代码框)。那么,如何从训练数据集中提取每个特征的重要性呢?我在网上找到的指南仅展示了如何使用“last_fit”在测试数据集中执行此操作。例如,指南链接:https://www.tidymodels.org/start/case-study/#data-split。
set.seed(seed_number)
data_split <- initial_split(node_strength,prop = 0.8,strata = Group)
train <- training(data_split)
test <- testing(data_split)
train_folds <- vfold_cv(train,v = 10)
rfc <- rand_forest(mode = "classification", mtry = tune(),
min_n = tune(), trees = 1500) %>%
set_engine("ranger", num.threads = 48, importance = "impurity")
rfc_recipe <- recipe(data = train, Group~.)
rfc_workflow <- workflow() %>% add_model(rfc) %>%
add_recipe(rfc_recipe)
rfc_result <- rfc_workflow %>%
tune_grid(train_folds, grid = 40, control = control_grid(save_pred = TRUE),
metrics = metric_set(accuracy))
.
best <-
rfc_result %>%
select_best(metric = "accuracy")