Code Along
log_res <- fit_resamples(
log_wf,
resamples = pokemon_folds,
metrics = metric_set(accuracy)
)
boost_res <- fit_resamples(
boost_wf,
resamples = pokemon_folds,
metrics = metric_set(accuracy)
)
bind_rows(
collect_metrics(log_res) %>% mutate(model = "Logistic Regression"),
collect_metrics(boost_res) %>% mutate(model = "XGBoost")
) %>%
select(model, mean, std_err)
We will make a pipeline for a logistic regression and a gradient boosting random forest model.
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
lr_pipeline = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='median')),
('model', LogisticRegression())
])
boost_pipeline = Pipeline(
steps=[
('imputer', SimpleImputer(strategy='median')),
('model', GradientBoostingClassifier(n_estimators=1000, learning_rate=0.05))
])
from sklearn.model_selection import StratifiedKFold, cross_val_score
random_seed = 22790
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_seed)
lr_scores = cross_val_score(lr_pipeline, X, y, cv=skf, scoring='accuracy') # can also score with 'f1' which is the harmonic mean or sensitivity (recall) and precision
boost_scores = cross_val_score(boost_pipeline, X, y, cv=skf, scoring='accuracy')