TensorFlow 1으로 돌아와서, (왜 굳이?) 딥러닝을 처음부터 끝까지 실습해본다. Iris 장난감 데이터로 해보자

첫 번째, 우선 에러 방지와 임포트 문 부터 적어준다.

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler # 0.0 ~1.0 사이로 스케일링
from sklearn.preprocessing import LabelEncoder ## 글자를 숫자로 (one hot encoding 전단계)

from keras.models import Sequential
from keras.layers import Dense

from keras.utils import np_utils

from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping

names 지정

names = ['sl', 'sw', 'pl', 'pw','class']

데이터 불러오기

df =pd.read_csv('datasets/iris.data', names=names)
df

Y 값인 class 를 One Hot Encoding 해주기 위해, 우선 글자를 숫자로 전환한다,

Y_encoded = LabelEncoder().fit_transform(df['class'])
Y_encoded

#결과
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
# 'Iris-setosa'=>0. 'Iris-versicolor'=>1, 'Iris-virginica'=>2

그리고 One Hot Encoding

Y = np_utils.to_categorical(Y_encoded)
# 0==>[1,0,0] 1==>[0,1,0] 2==>[0,0,1]
Y

결과
array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
			 ...

DataFrame에서 X 를 만들어주려면 Y를 빼면 된다.

X = df.drop('class', axis=1)

Train, Test 를 만들어주자.

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.1, random_state=42)

그리고 체크포인터, 얼리스토퍼를 만들어주자

check_pointer = ModelCheckpoint(filepath='iris_model.h5', monitor='val_loss',
                                verbose=1, save_best_only=True)
early_stopper = EarlyStopping(monitor='val_loss', patience=3)

모델 설계

model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(4,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.summary()

출력 결과
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 256)               1280      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_3 (Dense)              (None, 32)                4128      
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 99        
=================================================================
Total params: 38,403
Trainable params: 38,403
Non-trainable params: 0
_________________________________________________________________

컴파일. 분류니까 로스는 categorical_crossentropy, 옵티마이저는 아담, 기준은 정확도

model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])

학습. 밸리데이션 스플릿은 0.1

# hist = model.fit(X_train,y_train, epochs=50)

# %%time
hist = model.fit(X_train, y_train, epochs=50, validation_split=0.1,
                 callbacks=[check_pointer, early_stopper])

시각화를 해보자. 우선 hist에 저장된 학습 기록에서 acc 와 val acc 그리고 loss 키를 뽑아서 plotting 한다.

print(hist.history.keys()) # 학습기록
#dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])

plt.figure(figsize=(10,10))

plt.subplot(1,2,1)
plt.plot(hist.history['acc']) #훈련 정확도
plt.plot(hist.history['val_acc']) #validation accuracy
plt.legend(['acc', 'val_acc'])

plt.subplot(1,2,2)
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.legend(['loss', 'val_loss'])

plt.show()

Scoring 해보자

loss_score = model.evaluate(X_test, y_test)

print(loss_score)
# [0.11835551261901855, 1.0] [loss, score]

pred = model.predict(X_test)
print(pred)
print(y_test)
print(model.predict_classes(X_test))

[[2.38980143e-03 9.17836487e-01 7.97737017e-02]
 [9.98902321e-01 1.09326723e-03 4.37249309e-06]
 [1.48646291e-06 2.49583391e-03 9.97502744e-01]
 [2.64007389e-03 8.94738078e-01 1.02621846e-01]
 [1.59821252e-03 9.65849221e-01 3.25526111e-02]
 [9.97945249e-01 2.04350031e-03 1.13113438e-05]
 [1.38808796e-02 9.77709413e-01 8.40967800e-03]
 [4.86715609e-04 4.04083759e-01 5.95429480e-01]
 [1.83559582e-03 6.92577600e-01 3.05586785e-01]
 [6.00357633e-03 9.82607782e-01 1.13886315e-02]
 [7.02619436e-04 4.53046143e-01 5.46251178e-01]
 [9.97199178e-01 2.77405675e-03 2.67299711e-05]
 [9.99348700e-01 6.47995214e-04 3.29262571e-06]
 [9.97150958e-01 2.82460591e-03 2.45001447e-05]
 [9.98912334e-01 1.08100858e-03 6.71395264e-06]]
[[0. 1. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0]