# 앙상블 모형 (배깅, Bagging)

- Voting 과 Bagging 분류모형 비교
  - Voting: 동일한 훈련 데이터, 다른 알고리즘
  - Bagging: 다른 훈련 데이터, 한가지 알고리즘

![](https://upload.wikimedia.org/wikipedia/commons/c/c8/Ensemble_Bagging.svg)

## 환경설정

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error as MSE

from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor

## 데이터셋

In [2]:
# 2. 데이터셋
mpg_df = pd.read_csv('data/auto-mpg.csv', index_col='car name')
mpg_df = mpg_df[mpg_df.horsepower != '?']

# 3. 훈련/시험 데이터셋
y = mpg_df[['mpg']]
X = mpg_df.loc[:, 'cylinders':'origin']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 777)
y_train = np.ravel(y_train,order='C') 

## 기계학습 - CV


In [3]:
reg_base = DecisionTreeRegressor(max_depth=4, min_samples_leaf = 0.1, random_state = 777)

reg_bagging = BaggingRegressor(base_estimator = reg_base, n_estimators = 300, oob_score=True, n_jobs=-1)

reg_bagging.fit(X_train, y_train)

BaggingRegressor(base_estimator=DecisionTreeRegressor(max_depth=4,
                                                      min_samples_leaf=0.1,
                                                      random_state=777),
                 n_estimators=300, n_jobs=-1, oob_score=True)

## 예측 성능

In [4]:
y_pred = reg_bagging.predict(X_test)
print('Bagging Regressor: {:.3f}'.format(MSE(y_test, y_pred)))

Bagging Regressor: 14.013
