# Hyper Parameter 튜닝


참고: [하이퍼파라미터 튜닝, emseoyk.log](https://velog.io/@emseoyk/%ED%95%98%EC%9D%B4%ED%8D%BC%ED%8C%8C%EB%9D%BC%EB%AF%B8%ED%84%B0-%ED%8A%9C%EB%8B%9D)

## 환경설정

In [1]:
import pandas as pd
import numpy as np

from sklearn import preprocessing # 전처리

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error as MSE

from sklearn.tree import DecisionTreeRegressor

## 데이터셋

In [2]:
# 2. 데이터셋
mpg_df = pd.read_csv('data/auto-mpg.csv', index_col='car name')
mpg_df = mpg_df[mpg_df.horsepower != '?']

# 3. 훈련/시험 데이터셋
y = mpg_df[['mpg']]
X = mpg_df.loc[:, 'cylinders':'origin']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 777)
y_train = np.ravel(y_train,order='C') 

## 기계학습

### 1. Hyper Parameters for Decision Tree

In [3]:
reg_dt = DecisionTreeRegressor(random_state = 777)

dt_hp = reg_dt.get_params()

pd.DataFrame.from_dict(dt_hp, orient='index', columns = ['초기값'] )

Unnamed: 0,초기값
ccp_alpha,0.0
criterion,squared_error
max_depth,
max_features,
max_leaf_nodes,
min_impurity_decrease,0.0
min_samples_leaf,1
min_samples_split,2
min_weight_fraction_leaf,0.0
random_state,777


###  2. 탐색

In [4]:
from sklearn.model_selection import GridSearchCV

params_dt = {'max_depth': [2, 3, 4, 5, 6],
             'min_samples_leaf': [0.02, 0.04, 0.06, 0.08, 0.10],
             'max_features': [0.1, 0.2, 0.4, 0.6, 0.8]
            }

grid_dt = GridSearchCV(estimator  = reg_dt,
                       param_grid = params_dt,
                       scoring    = 'neg_mean_squared_error',
                       cv         = 10,
                       n_jobs     = -1)


grid_dt.fit(X_train, y_train)

GridSearchCV(cv=10, estimator=DecisionTreeRegressor(random_state=777),
             n_jobs=-1,
             param_grid={'max_depth': [2, 3, 4, 5, 6],
                         'max_features': [0.1, 0.2, 0.4, 0.6, 0.8],
                         'min_samples_leaf': [0.02, 0.04, 0.06, 0.08, 0.1]},
             scoring='neg_mean_squared_error')

### 3. Hyper Parameters

In [5]:
## 초모수
best_hyperparams = grid_dt.best_params_
print('Hyerparameters:\n', best_hyperparams)
## 성능
best_MSE_score = grid_dt.best_score_
print('HP Classifier: {:.3f}'.format(best_MSE_score))

Hyerparameters:
 {'max_depth': 5, 'max_features': 0.4, 'min_samples_leaf': 0.02}
HP Classifier: -11.904


###  4. 최적 모형

In [6]:
# 최적모형 추출
top_model = grid_dt.best_estimator_

y_pred = top_model.predict(X_test)

print('Decision Tree Regression - Tuning: {:.3f}'.format(MSE(y_test, y_pred)))

Decision Tree Regression - Tuning: 12.166
