1pd.cut(df.Age,bins=[0,2,17,65,99],labels=['Toddler/Baby','Child','Adult','Elderly'])
2# where bins is cut off points of bins for the continuous data
3# and key things here is that no. of labels is always less than 1
1pd.get_dummies(obj_df, columns=["body_style", "drive_wheels"], prefix=["body", "drive"]).head()
2
1from sklearn.preprocessing import LabelEncoder
2
3lb_make = LabelEncoder()
4obj_df["make_code"] = lb_make.fit_transform(obj_df["make"])
5obj_df[["make", "make_code"]].head(11)
6
1obj_df["body_style"] = obj_df["body_style"].astype('category')
2obj_df.dtypes
3
1import pandas as pd
2import numpy as np
3
4# Define the headers since the data does not have any
5headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration",
6 "num_doors", "body_style", "drive_wheels", "engine_location",
7 "wheel_base", "length", "width", "height", "curb_weight",
8 "engine_type", "num_cylinders", "engine_size", "fuel_system",
9 "bore", "stroke", "compression_ratio", "horsepower", "peak_rpm",
10 "city_mpg", "highway_mpg", "price"]
11
12# Read in the CSV file and convert "?" to NaN
13df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data",
14 header=None, names=headers, na_values="?" )
15df.head()
16