1from sklearn.compose import make_column_transformer
2from sklearn.linear_model import LinearRegression
3from sklearn.pipeline import make_pipeline
4from sklearn.model_selection import cross_val_score
5
6# for the purposes of this analysis, only use a small subset of features
7
8feature_cols = [
9 'fuel_type', 'make', 'aspiration', 'highway_mpg', 'city_mpg',
10 'curb_weight', 'drive_wheels'
11]
12
13# Remove the empty price rows
14df_ml = df.dropna(subset=['price'])
15
16X = df_ml[feature_cols]
17y = df_ml['price']
18