~/aiffel
--- /rock_scissor_paper
------ /dataset
------ /test1
---------/rock
---------/scissor
---------/paper
------ /dataset_original
---------/rock
---------/scissor
---------/paper
------ [E-01]RockPaperScissor.ipynb
------ [E-01]DataSplit.ipynb
In [1]:
pwd
Out[1]:
'/home/aiffel/aiffel/rock_scissor_paper'
In [2]:
ls
dataset/ '[E-01]DataSplit.ipynb' '[E-01]RockPaperScissor.ipynb'
데이터 정제하기¶
- Resize and Normalization
In [3]:
# 사용 라이브러리 import
from PIL import Image
import os, glob, shutil
import numpy as np
import matplotlib.pyplot as plt
import random
import tensorflow as tf
from tensorflow import keras
In [4]:
# random seed
np.random.seed(555)
Resize images¶
In [5]:
def resize_img(class_name):
# # 가위, 바위, 보 이미지가 저장된 디렉토리 아래의 모든 jpg 파일을 읽어들여서
image_dir_path = os.getenv("HOME") + "/aiffel/rock_scissor_paper/dataset/dataset_original/" + class_name
print("이미지 디렉토리 경로: ", image_dir_path)
images=glob.glob(image_dir_path + "/*.jpg")
# 파일마다 모두 28x28 사이즈로 바꾸어 저장합니다.
target_size=(28,28)
for img in images:
old_img=Image.open(img)
new_img=old_img.resize(target_size,Image.ANTIALIAS)
new_img.save(img,"JPEG")
print(class_name + "이미지 resize 완료!")
In [6]:
# class_names = ['rock','scissor', 'paper']
# for class_name in class_names:
# resize_img(class_name)
데이터 불러오기¶
- Data Load from directory
In [36]:
def load_data(img_path, number_of_data):
# 가위 : 0, 바위 : 1, 보 : 2
# number_of_data=300 # 가위바위보 이미지 개수 총합에 주의하세요.
img_size=28
color=3
#이미지 데이터와 라벨(가위 : 0, 바위 : 1, 보 : 2) 데이터를 담을 행렬(matrix) 영역을 생성합니다.
imgs=np.zeros(number_of_data*img_size*img_size*color,dtype=np.int32).reshape(number_of_data,img_size,img_size,color)
labels=np.zeros(number_of_data,dtype=np.int32)
idx=0
for file in glob.iglob(img_path+'/scissor/*.jpg'):
img = np.array(Image.open(file),dtype=np.int32)
imgs[idx,:,:,:]=img # 데이터 영역에 이미지 행렬을 복사
labels[idx]=0 # 가위 : 0
idx=idx+1
for file in glob.iglob(img_path+'/rock/*.jpg'):
img = np.array(Image.open(file),dtype=np.int32)
imgs[idx,:,:,:]=img # 데이터 영역에 이미지 행렬을 복사
labels[idx]=1 # 바위 : 1
idx=idx+1
for file in glob.iglob(img_path+'/paper/*.jpg'):
img = np.array(Image.open(file),dtype=np.int32)
imgs[idx,:,:,:]=img # 데이터 영역에 이미지 행렬을 복사
labels[idx]=2 # 보 : 2
idx=idx+1
# print("데이터의 이미지 개수는",idx,"입니다.")
return imgs, labels
In [37]:
number_of_train = 600
train_dir_path = os.getenv("HOME") + "/aiffel/rock_scissor_paper/dataset/dataset_200"
(x_train, y_train)=load_data(train_dir_path, number_of_train)
x_train_norm = x_train/255.0 # 입력은 0~1 사이의 값으로 정규화
number_of_test = 300
test_dir_path = os.getenv("HOME") + "/aiffel/rock_scissor_paper/dataset/test1"
(x_test, y_test)=load_data(test_dir_path, number_of_test)
x_test_norm = x_test/255.0 # 입력은 0~1 사이의 값으로 정규화
print("x_train shape: {}".format(x_train.shape))
print("y_train shape: {}".format(y_train.shape))
print("x_test shape: {}".format(x_test.shape))
print("y_test shape: {}".format(y_test.shape))
x_train shape: (600, 28, 28, 3)
y_train shape: (600,)
x_test shape: (300, 28, 28, 3)
y_test shape: (300,)
- Check Image dataset
참고자료: https://www.tensorflow.org/tutorials/load_data/images?hl=ko
In [40]:
plt.subplot(1,3,1)
plt.imshow(x_train[0])
plt.title(y_train[0])
plt.axis("off")
plt.subplot(1,3,2)
plt.imshow(x_train[200])
plt.title(y_train[200])
plt.axis("off")
plt.subplot(1,3,3)
plt.imshow(x_train[400])
plt.title(y_train[400])
plt.axis("off")
Out[40]:
(-0.5, 27.5, 27.5, -0.5)
In [41]:
#gpu 메모리 용량 할당
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
In [48]:
### 하이퍼파라미터 튜닝
n_channel_1=32
n_channel_2=32
n_dense=128
n_train_epoch=10
In [43]:
model=keras.models.Sequential()
model.add(keras.layers.Conv2D(n_channel_1, (3,3), activation='relu', input_shape=(28,28,3)))
model.add(keras.layers.MaxPool2D(2,2))
model.add(keras.layers.Conv2D(n_channel_2, (3,3), activation='relu'))
model.add(keras.layers.MaxPooling2D((2,2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(n_dense, activation='relu'))
model.add(keras.layers.Dense(3, activation='softmax'))
print('Model에 추가된 Layer 개수:', len(model.layers))
Model에 추가된 Layer 개수: 7
In [44]:
model.summary()
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_4 (Conv2D) (None, 26, 26, 32) 896
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 13, 13, 32) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 11, 11, 32) 9248
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 5, 5, 32) 0
_________________________________________________________________
flatten_2 (Flatten) (None, 800) 0
_________________________________________________________________
dense_4 (Dense) (None, 128) 102528
_________________________________________________________________
dense_5 (Dense) (None, 3) 387
=================================================================
Total params: 113,059
Trainable params: 113,059
Non-trainable params: 0
_________________________________________________________________
모델 학습하기¶
In [45]:
# # Input shape 맞춰주기
# print("Before Reshape - x_train_norm shape: {}".format(x_train_norm.shape))
# print("Before Reshape - x_test_norm shape: {}".format(x_test_norm.shape))
# x_train_reshaped=x_train_norm.reshape( -1, 28, 28, 3) # 데이터갯수에 -1을 쓰면 reshape시 자동계산됩니다.
# x_test_reshaped=x_test_norm.reshape( -1, 28, 28, 3) # 데이터갯수에 -1을 쓰면 reshape시 자동계산됩니다.
# print("After Reshape - x_train_reshaped shape: {}".format(x_train_reshaped.shape))
# print("After Reshape - x_test_reshaped shape: {}".format(x_test_reshaped.shape))
In [52]:
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# 모델 훈련
history = model.fit(x_train, y_train, epochs = n_train_epoch)
Epoch 1/10
19/19 [==============================] - 2s 127ms/step - loss: 0.5293 - accuracy: 0.7983
Epoch 2/10
19/19 [==============================] - 0s 1ms/step - loss: 0.2757 - accuracy: 0.9033
Epoch 3/10
19/19 [==============================] - 0s 1ms/step - loss: 0.1963 - accuracy: 0.9350
Epoch 4/10
19/19 [==============================] - 0s 1ms/step - loss: 0.1418 - accuracy: 0.9600
Epoch 5/10
19/19 [==============================] - 0s 1ms/step - loss: 0.0892 - accuracy: 0.9683
Epoch 6/10
19/19 [==============================] - 0s 1ms/step - loss: 0.0544 - accuracy: 0.9850
Epoch 7/10
19/19 [==============================] - 0s 1ms/step - loss: 0.0303 - accuracy: 0.9950
Epoch 8/10
19/19 [==============================] - 0s 1ms/step - loss: 0.0162 - accuracy: 0.9983
Epoch 9/10
19/19 [==============================] - 0s 1ms/step - loss: 0.0094 - accuracy: 1.0000
Epoch 10/10
19/19 [==============================] - 0s 1ms/step - loss: 0.0050 - accuracy: 1.0000
In [53]:
# 모델 시험
test_loss, test_accuracy = model.evaluate(x_test,y_test, verbose=2)
print("test_loss: {} ".format(test_loss))
print("test_accuracy: {}".format(test_accuracy))
10/10 - 0s - loss: 1.5119 - accuracy: 0.5333
test_loss: 1.5118647813796997
test_accuracy: 0.5333333611488342
predict¶
In [54]:
predicted_result = model.predict(x_test) # model이 추론한 확률값.
predicted_labels = np.argmax(predicted_result, axis=1)
idx=0 #1번째 x_test를 살펴보자.
print('model.predict() 결과 : ', predicted_result[idx])
print('model이 추론한 가장 가능성이 높은 결과 : ', predicted_labels[idx])
print('실제 데이터의 라벨 : ', y_test[idx])
model.predict() 결과 : [0.02189971 0.04297271 0.9351276 ]
model이 추론한 가장 가능성이 높은 결과 : 2
실제 데이터의 라벨 : 0
In [55]:
plt.imshow(x_test[idx],cmap=plt.cm.binary)
plt.show()
In [56]:
wrong_predict_list=[]
for i, _ in enumerate(predicted_labels):
# i번째 test_labels과 y_test이 다른 경우만 모아 봅시다.
if predicted_labels[i] != y_test[i]:
wrong_predict_list.append(i)
# wrong_predict_list 에서 랜덤하게 5개만 뽑아봅시다.
samples = random.choices(population=wrong_predict_list, k=10)
for n in samples:
print("예측확률분포: " + str(predicted_result[n]))
print("라벨: " + str(y_test[n]) + ", 예측결과: " + str(predicted_labels[n]))
plt.imshow(x_test[n], cmap=plt.cm.binary)
plt.show()
예측확률분포: [0.02071045 0.0293347 0.94995487]
라벨: 0, 예측결과: 2
예측확률분포: [6.2125881e-05 7.2409026e-03 9.9269694e-01]
라벨: 1, 예측결과: 2
예측확률분포: [0.43750122 0.31025803 0.2522408 ]
라벨: 2, 예측결과: 0
예측확률분포: [0.00256331 0.012543 0.9848936 ]
라벨: 1, 예측결과: 2
예측확률분포: [0.00592441 0.0120336 0.982042 ]
라벨: 1, 예측결과: 2
예측확률분포: [8.8905574e-05 2.0427233e-02 9.7948384e-01]
라벨: 1, 예측결과: 2
예측확률분포: [0.01515631 0.09829168 0.886552 ]
라벨: 0, 예측결과: 2
예측확률분포: [0.45459017 0.38005242 0.16535743]
라벨: 2, 예측결과: 0
예측확률분포: [0.01920319 0.04508501 0.93571186]
라벨: 0, 예측결과: 2
예측확률분포: [0.22563997 0.00113557 0.7732244 ]
라벨: 0, 예측결과: 2
In [ ]: