Code Along
/module-2
Loading, setting up
Split data
Engineer features
Specify recipe, model, and workflow
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
# Load and preprocess the data
starwars = pd.read_csv('path_to_starwars.csv') # Load your data file
starwars['species_human'] = starwars['species'].apply(lambda x: 'Human' if x == 'Human' else 'Not human')
# Split data
train, test = train_test_split(starwars, test_size=0.2, random_state=42)
X_train, y_train = train[['height', 'mass']], train['species_human']
X_test, y_test = test[['height', 'mass']], test['species_human']
# Specify model and fit
clf = LogisticRegression()
clf.fit(X_train, y_train)
# Evaluate accuracy
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))