1# Basic syntax:
2df_onehot = pd.get_dummies(df, columns=['col_name'], prefix=['one_hot'])
3# Where:
4# - get_dummies creates a one-hot encoding for each unique categorical
5# value in the column named col_name
6# - The prefix is added at the beginning of each categorical value
7# to create new column names for the one-hot columns
8
9# Example usage:
10# Build example dataframe:
11df = pd.DataFrame(['sunny', 'rainy', 'cloudy'], columns=['weather'])
12print(df)
13 weather
140 sunny
151 rainy
162 cloudy
17
18# Convert categorical weather variable to one-hot encoding:
19df_onehot = pd.get_dummies(df, columns=['weather'], prefix=['one_hot'])
20print(df_onehot)
21 one_hot_cloudy one_hot_rainy one_hot_sunny
220 0 0 1
231 0 1 0
242 1 0 0
1from numpy import as np
2from sklearn.preprocessing import LabelEncoder
3from sklearn.preprocessing import OneHotEncoder
4
5# define example
6data = ['cold', 'cold', 'warm', 'cold', 'hot',
7 'hot', 'warm', 'cold', 'warm', 'hot']
8values = np.array(data)
9
10# first apply label encoding
11label_encoder = LabelEncoder()
12integer_encoded = label_encoder.fit_transform(values)
13
14# now we can apply one hot encoding
15onehot_encoder = OneHotEncoder(sparse=False)
16integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
17onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
18print(onehot_encoded)