TensorFlow 1으로 돌아와서, (왜 굳이?) 딥러닝을 처음부터 끝까지 실습해본다. Iris 장난감 데이터로 해보자
첫 번째, 우선 에러 방지와 임포트 문 부터 적어준다.
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler # 0.0 ~1.0 사이로 스케일링
from sklearn.preprocessing import LabelEncoder ## 글자를 숫자로 (one hot encoding 전단계)
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
names 지정
names = ['sl', 'sw', 'pl', 'pw','class']
데이터 불러오기
df =pd.read_csv('datasets/iris.data', names=names)
df
Y 값인 class 를 One Hot Encoding 해주기 위해, 우선 글자를 숫자로 전환한다,
Y_encoded = LabelEncoder().fit_transform(df['class'])
Y_encoded
#결과
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
# 'Iris-setosa'=>0. 'Iris-versicolor'=>1, 'Iris-virginica'=>2
그리고 One Hot Encoding
Y = np_utils.to_categorical(Y_encoded)
# 0==>[1,0,0] 1==>[0,1,0] 2==>[0,0,1]
Y
결과
array([[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
...
DataFrame에서 X 를 만들어주려면 Y를 빼면 된다.
X = df.drop('class', axis=1)
Train, Test 를 만들어주자.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.1, random_state=42)
그리고 체크포인터, 얼리스토퍼를 만들어주자
check_pointer = ModelCheckpoint(filepath='iris_model.h5', monitor='val_loss',
verbose=1, save_best_only=True)
early_stopper = EarlyStopping(monitor='val_loss', patience=3)
모델 설계
model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(4,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.summary()
출력 결과
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 256) 1280
_________________________________________________________________
dense_2 (Dense) (None, 128) 32896
_________________________________________________________________
dense_3 (Dense) (None, 32) 4128
_________________________________________________________________
dense_4 (Dense) (None, 3) 99
=================================================================
Total params: 38,403
Trainable params: 38,403
Non-trainable params: 0
_________________________________________________________________
컴파일. 분류니까 로스는 categorical_crossentropy, 옵티마이저는 아담, 기준은 정확도
model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
학습. 밸리데이션 스플릿은 0.1
# hist = model.fit(X_train,y_train, epochs=50)
# %%time
hist = model.fit(X_train, y_train, epochs=50, validation_split=0.1,
callbacks=[check_pointer, early_stopper])
시각화를 해보자. 우선 hist에 저장된 학습 기록에서 acc 와 val acc 그리고 loss 키를 뽑아서 plotting 한다.
print(hist.history.keys()) # 학습기록
#dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
plt.figure(figsize=(10,10))
plt.subplot(1,2,1)
plt.plot(hist.history['acc']) #훈련 정확도
plt.plot(hist.history['val_acc']) #validation accuracy
plt.legend(['acc', 'val_acc'])
plt.subplot(1,2,2)
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.show()
Scoring 해보자
loss_score = model.evaluate(X_test, y_test)
print(loss_score)
# [0.11835551261901855, 1.0] [loss, score]
pred = model.predict(X_test)
print(pred)
print(y_test)
print(model.predict_classes(X_test))
[[2.38980143e-03 9.17836487e-01 7.97737017e-02]
[9.98902321e-01 1.09326723e-03 4.37249309e-06]
[1.48646291e-06 2.49583391e-03 9.97502744e-01]
[2.64007389e-03 8.94738078e-01 1.02621846e-01]
[1.59821252e-03 9.65849221e-01 3.25526111e-02]
[9.97945249e-01 2.04350031e-03 1.13113438e-05]
[1.38808796e-02 9.77709413e-01 8.40967800e-03]
[4.86715609e-04 4.04083759e-01 5.95429480e-01]
[1.83559582e-03 6.92577600e-01 3.05586785e-01]
[6.00357633e-03 9.82607782e-01 1.13886315e-02]
[7.02619436e-04 4.53046143e-01 5.46251178e-01]
[9.97199178e-01 2.77405675e-03 2.67299711e-05]
[9.99348700e-01 6.47995214e-04 3.29262571e-06]
[9.97150958e-01 2.82460591e-03 2.45001447e-05]
[9.98912334e-01 1.08100858e-03 6.71395264e-06]]
[[0. 1. 0.]
[1. 0. 0.]
[0. 0. 1.]
[0. 1. 0.]
[0. 1. 0.]
[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]
[0. 1. 0.]
[0. 1. 0.]
[0. 0. 1.]
[1. 0. 0.]
[1. 0. 0.]
[1. 0. 0.]
[1. 0. 0.]]
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0]
RNN
자료형 별 딥러닝 알고리즘 간단정리
'Others > 한여름 머신러닝' 카테고리의 다른 글
8월 21일의 머신러닝 (1) | 2021.01.25 |
---|---|
8월 20일의 머신러닝 (0) | 2021.01.25 |
8월 19일의 머신러닝 (0) | 2021.01.25 |
댓글