from tensorflow.keras.datasets import imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)


len(train_data)

25000


len(test_data)

25000


min([min(sequence) for sequence in train_data])

1


max([max(sequence) for sequence in train_data])

9999


len(train_data[0])

218


len(train_data[1])

189


train_data[0][:10]

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65]


train_labels[0]

1


test_labels[0]

0


word_index = imdb.get_word_index()


for item in list(word_index.items())[:10]:
    print(item)

('fawn', 34701)
('tsukino', 52006)
('nunnery', 52007)
('sonja', 16816)
('vani', 63951)
('woods', 1408)
('spiders', 16115)
('hanging', 2345)
('woody', 2289)
('trawling', 52008)


reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])


for item in list(reverse_word_index.items())[:10]:
    print(item)

(34701, 'fawn')
(52006, 'tsukino')
(52007, 'nunnery')
(16816, 'sonja')
(63951, 'vani')
(1408, 'woods')
(16115, 'spiders')
(2345, 'hanging')
(2289, 'woody')
(52008, 'trawling')


first_review = train_data[0]

decoded_review = " ".join(
    [reverse_word_index.get(i-3, "?") for i in first_review])

decoded_review

"? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all"


import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    
    for i, sequence in enumerate(sequences):    # 모든 샘플에 대한 멀티-핫-인코딩
        for j in sequence:
            results[i, j] = 1.
    return results


x_train = vectorize_sequences(train_data).astype("float32")
x_test = vectorize_sequences(test_data).astype("float32")


x_train[0]

array([0., 1., 1., ..., 0., 0., 0.], dtype=float32)


y_train = np.asarray(train_labels).astype("float32")
y_test = np.asarray(test_labels).astype("float32")


from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])


model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])


# 검증 세트
x_val = x_train[:10000]
y_val = y_train[:10000]

# 훈련셋
partial_x_train = x_train[10000:]
partial_y_train = y_train[10000:]


history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

Epoch 1/20
30/30 [==============================] - 1s 23ms/step - loss: 0.5248 - accuracy: 0.7867 - val_loss: 0.4199 - val_accuracy: 0.8421
Epoch 2/20
30/30 [==============================] - 0s 14ms/step - loss: 0.3199 - accuracy: 0.8997 - val_loss: 0.3450 - val_accuracy: 0.8600
Epoch 3/20
30/30 [==============================] - 0s 14ms/step - loss: 0.2284 - accuracy: 0.9260 - val_loss: 0.2778 - val_accuracy: 0.8933
Epoch 4/20
30/30 [==============================] - 0s 14ms/step - loss: 0.1817 - accuracy: 0.9399 - val_loss: 0.2725 - val_accuracy: 0.8897
Epoch 5/20
30/30 [==============================] - 0s 14ms/step - loss: 0.1458 - accuracy: 0.9549 - val_loss: 0.2945 - val_accuracy: 0.8812
Epoch 6/20
30/30 [==============================] - 0s 14ms/step - loss: 0.1191 - accuracy: 0.9623 - val_loss: 0.3541 - val_accuracy: 0.8658
Epoch 7/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0990 - accuracy: 0.9721 - val_loss: 0.3103 - val_accuracy: 0.8824
Epoch 8/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0827 - accuracy: 0.9761 - val_loss: 0.3414 - val_accuracy: 0.8814
Epoch 9/20
30/30 [==============================] - 0s 15ms/step - loss: 0.0668 - accuracy: 0.9815 - val_loss: 0.3452 - val_accuracy: 0.8809
Epoch 10/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0526 - accuracy: 0.9878 - val_loss: 0.3763 - val_accuracy: 0.8783
Epoch 11/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0444 - accuracy: 0.9889 - val_loss: 0.4313 - val_accuracy: 0.8724
Epoch 12/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0344 - accuracy: 0.9925 - val_loss: 0.4352 - val_accuracy: 0.8753
Epoch 13/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0284 - accuracy: 0.9945 - val_loss: 0.4716 - val_accuracy: 0.8709
Epoch 14/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0216 - accuracy: 0.9966 - val_loss: 0.5030 - val_accuracy: 0.8689
Epoch 15/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0164 - accuracy: 0.9976 - val_loss: 0.5267 - val_accuracy: 0.8726
Epoch 16/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0120 - accuracy: 0.9990 - val_loss: 0.5764 - val_accuracy: 0.8641
Epoch 17/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0111 - accuracy: 0.9986 - val_loss: 0.6000 - val_accuracy: 0.8709
Epoch 18/20
30/30 [==============================] - 0s 15ms/step - loss: 0.0084 - accuracy: 0.9988 - val_loss: 0.6261 - val_accuracy: 0.8671
Epoch 19/20
30/30 [==============================] - 0s 14ms/step - loss: 0.0045 - accuracy: 0.9999 - val_loss: 0.6670 - val_accuracy: 0.8657
Epoch 20/20
30/30 [==============================] - 0s 15ms/step - loss: 0.0072 - accuracy: 0.9987 - val_loss: 0.7000 - val_accuracy: 0.8648


history.params

{'verbose': 1, 'epochs': 20, 'steps': 30}


history_dict = history.history

history_dict.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


import matplotlib.pyplot as plt

history_dict = history.history
loss_values = history_dict["loss"]
val_loss_values = history_dict["val_loss"]

epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, loss_values, "bo", label="Training loss")
plt.plot(epochs, val_loss_values, "b", label="Validation loss")

plt.title("Training and validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()


plt.clf()    # 이전 이미지 삭제

acc = history_dict["accuracy"]
val_acc = history_dict["val_accuracy"]

plt.plot(epochs, acc, "bo", label="Training acc")
plt.plot(epochs, val_acc, "b", label="Validation acc")

plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()


model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
model.fit(x_train, y_train, epochs=4, batch_size=512)

Epoch 1/4
49/49 [==============================] - 1s 8ms/step - loss: 0.4426 - accuracy: 0.8233
Epoch 2/4
49/49 [==============================] - 0s 8ms/step - loss: 0.2526 - accuracy: 0.9104
Epoch 3/4
49/49 [==============================] - 0s 9ms/step - loss: 0.1966 - accuracy: 0.9282
Epoch 4/4
49/49 [==============================] - 0s 8ms/step - loss: 0.1641 - accuracy: 0.9397

<keras.callbacks.History at 0x2cf3cb63ac0>


results = model.evaluate(x_test, y_test)
results

782/782 [==============================] - 1s 2ms/step - loss: 0.3139 - accuracy: 0.8771

[0.3139097988605499, 0.8770800232887268]


model.predict(x_test)

array([[0.25440323],
       [0.9999424 ],
       [0.95840394],
       ...,
       [0.17153321],
       [0.10725482],
       [0.6672551 ]], dtype=float32)


model.predict(x_test, batch_size=512)

array([[0.25440323],
       [0.9999424 ],
       [0.95840394],
       ...,
       [0.17153329],
       [0.10725482],
       [0.6672551 ]], dtype=float32)


from tensorflow.keras.datasets import reuters

(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)


len(train_data)

8982


len(test_data)

2246


from collections import Counter

target_counter = Counter(train_labels)
target_counter

Counter({3: 3159,
         4: 1949,
         16: 444,
         19: 549,
         8: 139,
         21: 100,
         11: 390,
         1: 432,
         13: 172,
         20: 269,
         18: 66,
         25: 92,
         35: 10,
         9: 101,
         38: 19,
         10: 124,
         28: 48,
         2: 74,
         6: 48,
         12: 49,
         7: 16,
         30: 45,
         34: 50,
         15: 20,
         14: 26,
         32: 32,
         41: 30,
         40: 36,
         45: 18,
         23: 41,
         42: 13,
         26: 24,
         24: 62,
         37: 19,
         27: 15,
         31: 39,
         39: 24,
         0: 55,
         22: 15,
         33: 11,
         36: 49,
         17: 39,
         43: 21,
         29: 19,
         44: 12,
         5: 17})


print(f"최대 기사 수: {max(target_counter.values())}")
print(f"최소 기사 수: {min(target_counter.values())}")

최대 기사 수: 3159
최소 기사 수: 10


train_data[10]

[1,
 245,
 273,
 207,
 156,
 53,
 74,
 160,
 26,
 14,
 46,
 296,
 26,
 39,
 74,
 2979,
 3554,
 14,
 46,
 4689,
 4329,
 86,
 61,
 3499,
 4795,
 14,
 61,
 451,
 4329,
 17,
 12]


len(train_data[10])

31


len(train_data[11])

59


train_labels[10]

3


reuter_topics = {'cocoa': 0,
                 'grain': 1,
                 'veg-oil': 2,
                 'earn': 3,
                 'acq': 4,
                 'wheat': 5,
                 'copper': 6,
                 'housing': 7,
                 'money-supply': 8,
                 'coffee': 9,
                 'sugar': 10,
                 'trade': 11,
                 'reserves': 12,
                 'ship': 13,
                 'cotton': 14,
                 'carcass': 15,
                 'crude': 16,
                 'nat-gas': 17,
                 'cpi': 18,
                 'money-fx': 19,
                 'interest': 20,
                 'gnp': 21,
                 'meal-feed': 22,
                 'alum': 23,
                 'oilseed': 24,
                 'gold': 25,
                 'tin': 26,
                 'strategic-metal': 27,
                 'livestock': 28,
                 'retail': 29,
                 'ipi': 30,
                 'iron-steel': 31,
                 'rubber': 32,
                 'heat': 33,
                 'jobs': 34,
                 'lei': 35,
                 'bop': 36,
                 'zinc': 37,
                 'orange': 38,
                 'pet-chem': 39,
                 'dlr': 40,
                 'gas': 41,
                 'silver': 42,
                 'wpi': 43,
                 'hog': 44,
                 'lead': 45}


word_index = reuters.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])


decoded_newswire = " ".join([reverse_word_index.get(i - 3, "?") for i in train_data[10]])

decoded_newswire

'? period ended december 31 shr profit 11 cts vs loss 24 cts net profit 224 271 vs loss 511 349 revs 7 258 688 vs 7 200 349 reuter 3'


x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)


def to_one_hot(labels, dimension=46):
    results = np.zeros((len(labels), dimension))
    for i, label in enumerate(labels):
        results[i, label] = 1.
    return results


y_train = to_one_hot(train_labels)
y_test = to_one_hot(test_labels)


y_train[0]

array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


from tensorflow.keras.utils import to_categorical

y_train = to_categorical(train_labels)
y_test = to_categorical(test_labels)


y_train[0]

array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)


model = keras.Sequential([
    layers.Dense(64, activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(46, activation="softmax")
])


model.compile(optimizer="rmsprop",
              loss="categorical_crossentropy",
              metrics=["accuracy"])


# 검증 세트
x_val = x_train[:1000]
y_val = y_train[:1000]

# 훈련셋
partial_x_train = x_train[1000:]
partial_y_train = y_train[1000:]


history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

Epoch 1/20
16/16 [==============================] - 1s 25ms/step - loss: 2.7101 - accuracy: 0.4583 - val_loss: 1.7988 - val_accuracy: 0.6410
Epoch 2/20
16/16 [==============================] - 0s 12ms/step - loss: 1.4512 - accuracy: 0.7033 - val_loss: 1.3068 - val_accuracy: 0.7140
Epoch 3/20
16/16 [==============================] - 0s 11ms/step - loss: 1.0563 - accuracy: 0.7727 - val_loss: 1.1291 - val_accuracy: 0.7660
Epoch 4/20
16/16 [==============================] - 0s 11ms/step - loss: 0.8374 - accuracy: 0.8246 - val_loss: 1.0463 - val_accuracy: 0.7930
Epoch 5/20
16/16 [==============================] - 0s 10ms/step - loss: 0.6677 - accuracy: 0.8663 - val_loss: 0.9715 - val_accuracy: 0.7960
Epoch 6/20
16/16 [==============================] - 0s 10ms/step - loss: 0.5328 - accuracy: 0.8934 - val_loss: 0.9264 - val_accuracy: 0.8140
Epoch 7/20
16/16 [==============================] - 0s 11ms/step - loss: 0.4300 - accuracy: 0.9142 - val_loss: 0.9193 - val_accuracy: 0.8210
Epoch 8/20
16/16 [==============================] - 0s 10ms/step - loss: 0.3504 - accuracy: 0.9295 - val_loss: 0.8917 - val_accuracy: 0.8200
Epoch 9/20
16/16 [==============================] - 0s 10ms/step - loss: 0.2853 - accuracy: 0.9377 - val_loss: 0.9481 - val_accuracy: 0.8060
Epoch 10/20
16/16 [==============================] - 0s 11ms/step - loss: 0.2434 - accuracy: 0.9451 - val_loss: 0.9156 - val_accuracy: 0.8150
Epoch 11/20
16/16 [==============================] - 0s 10ms/step - loss: 0.2103 - accuracy: 0.9494 - val_loss: 0.9088 - val_accuracy: 0.8200
Epoch 12/20
16/16 [==============================] - 0s 10ms/step - loss: 0.1840 - accuracy: 0.9509 - val_loss: 0.9644 - val_accuracy: 0.8060
Epoch 13/20
16/16 [==============================] - 0s 10ms/step - loss: 0.1670 - accuracy: 0.9531 - val_loss: 0.9432 - val_accuracy: 0.8180
Epoch 14/20
16/16 [==============================] - 0s 10ms/step - loss: 0.1492 - accuracy: 0.9558 - val_loss: 0.9799 - val_accuracy: 0.8070
Epoch 15/20
16/16 [==============================] - 0s 10ms/step - loss: 0.1446 - accuracy: 0.9563 - val_loss: 1.0445 - val_accuracy: 0.7920
Epoch 16/20
16/16 [==============================] - 0s 12ms/step - loss: 0.1335 - accuracy: 0.9565 - val_loss: 0.9940 - val_accuracy: 0.8170
Epoch 17/20
16/16 [==============================] - 0s 9ms/step - loss: 0.1255 - accuracy: 0.9564 - val_loss: 1.0369 - val_accuracy: 0.8080
Epoch 18/20
16/16 [==============================] - 0s 10ms/step - loss: 0.1203 - accuracy: 0.9565 - val_loss: 1.0909 - val_accuracy: 0.8020
Epoch 19/20
16/16 [==============================] - 0s 10ms/step - loss: 0.1176 - accuracy: 0.9574 - val_loss: 1.0424 - val_accuracy: 0.8140
Epoch 20/20
16/16 [==============================] - 0s 10ms/step - loss: 0.1140 - accuracy: 0.9582 - val_loss: 1.0658 - val_accuracy: 0.8130


loss = history.history["loss"]

val_loss = history.history["val_loss"]
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()


plt.clf()
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
plt.plot(epochs, acc, "bo", label="Training accuracy")
plt.plot(epochs, val_acc, "b", label="Validation accuracy")
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()


model = keras.Sequential([
  layers.Dense(64, activation="relu"),
  layers.Dense(64, activation="relu"),
  layers.Dense(46, activation="softmax")
])

model.compile(optimizer="rmsprop",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

model.fit(x_train,
          y_train,
          epochs=9,
          batch_size=512)

Epoch 1/9
18/18 [==============================] - 0s 8ms/step - loss: 2.3815 - accuracy: 0.5605
Epoch 2/9
18/18 [==============================] - 0s 8ms/step - loss: 1.2837 - accuracy: 0.7190
Epoch 3/9
18/18 [==============================] - 0s 7ms/step - loss: 0.9710 - accuracy: 0.7927
Epoch 4/9
18/18 [==============================] - 0s 10ms/step - loss: 0.7652 - accuracy: 0.8370
Epoch 5/9
18/18 [==============================] - 0s 9ms/step - loss: 0.6081 - accuracy: 0.8726
Epoch 6/9
18/18 [==============================] - 0s 8ms/step - loss: 0.4839 - accuracy: 0.8952
Epoch 7/9
18/18 [==============================] - 0s 9ms/step - loss: 0.3859 - accuracy: 0.9188
Epoch 8/9
18/18 [==============================] - 0s 8ms/step - loss: 0.3175 - accuracy: 0.9323
Epoch 9/9
18/18 [==============================] - 0s 8ms/step - loss: 0.2638 - accuracy: 0.9404

<keras.callbacks.History at 0x2cf3b42d550>


results = model.evaluate(x_test, y_test)

results

71/71 [==============================] - 0s 2ms/step - loss: 0.9436 - accuracy: 0.7996

[0.9436188340187073, 0.7996438145637512]


import copy

# 원 데이터를 건드리지 않기 위해 사본 사용
test_labels_copy = copy.copy(test_labels)

# 무작위로 섞은 후 원 데이터의 순서와 비교
np.random.shuffle(test_labels_copy)
hits_array = test_labels == test_labels_copy

# 1 또는 0으로만 이루어졌기에 평균값을 계산하면 무작위 선택의 정확도를 계산함
hits_array.mean()

0.1923419412288513


predictions = model.predict(x_test)


predictions[0].shape

(46,)


np.sum(predictions[0])

0.99999994


np.argmax(predictions[0])

3


model = keras.Sequential([
    layers.Dense(64, activation="relu"),
    layers.Dense(4, activation="relu"),
    layers.Dense(46, activation="softmax")
])
model.compile(optimizer="rmsprop",
              loss="categorical_crossentropy",
              metrics=["accuracy"])
model.fit(partial_x_train,
          partial_y_train,
          epochs=20,
          batch_size=128,
          validation_data=(x_val, y_val))

Epoch 1/20
63/63 [==============================] - 1s 6ms/step - loss: 2.7547 - accuracy: 0.3588 - val_loss: 2.0262 - val_accuracy: 0.5630
Epoch 2/20
63/63 [==============================] - 0s 5ms/step - loss: 1.7311 - accuracy: 0.5808 - val_loss: 1.5582 - val_accuracy: 0.5870
Epoch 3/20
63/63 [==============================] - 0s 4ms/step - loss: 1.3904 - accuracy: 0.6163 - val_loss: 1.4050 - val_accuracy: 0.6390
Epoch 4/20
63/63 [==============================] - 0s 5ms/step - loss: 1.1950 - accuracy: 0.6695 - val_loss: 1.3178 - val_accuracy: 0.6670
Epoch 5/20
63/63 [==============================] - 0s 5ms/step - loss: 1.0543 - accuracy: 0.7235 - val_loss: 1.2940 - val_accuracy: 0.6960
Epoch 6/20
63/63 [==============================] - 0s 5ms/step - loss: 0.9497 - accuracy: 0.7512 - val_loss: 1.2820 - val_accuracy: 0.6980
Epoch 7/20
63/63 [==============================] - 0s 5ms/step - loss: 0.8585 - accuracy: 0.7786 - val_loss: 1.2812 - val_accuracy: 0.7120
Epoch 8/20
63/63 [==============================] - 0s 5ms/step - loss: 0.7729 - accuracy: 0.8036 - val_loss: 1.3157 - val_accuracy: 0.7140
Epoch 9/20
63/63 [==============================] - 0s 5ms/step - loss: 0.6988 - accuracy: 0.8227 - val_loss: 1.3189 - val_accuracy: 0.7260
Epoch 10/20
63/63 [==============================] - 0s 5ms/step - loss: 0.6370 - accuracy: 0.8444 - val_loss: 1.3645 - val_accuracy: 0.7300
Epoch 11/20
63/63 [==============================] - 0s 5ms/step - loss: 0.5862 - accuracy: 0.8535 - val_loss: 1.3981 - val_accuracy: 0.7280
Epoch 12/20
63/63 [==============================] - 0s 5ms/step - loss: 0.5424 - accuracy: 0.8596 - val_loss: 1.4963 - val_accuracy: 0.7290
Epoch 13/20
63/63 [==============================] - 0s 5ms/step - loss: 0.5066 - accuracy: 0.8681 - val_loss: 1.5458 - val_accuracy: 0.7180
Epoch 14/20
63/63 [==============================] - 0s 5ms/step - loss: 0.4722 - accuracy: 0.8735 - val_loss: 1.5992 - val_accuracy: 0.7270
Epoch 15/20
63/63 [==============================] - 0s 5ms/step - loss: 0.4450 - accuracy: 0.8807 - val_loss: 1.6689 - val_accuracy: 0.7180
Epoch 16/20
63/63 [==============================] - 0s 5ms/step - loss: 0.4192 - accuracy: 0.8854 - val_loss: 1.7057 - val_accuracy: 0.7210
Epoch 17/20
63/63 [==============================] - 0s 5ms/step - loss: 0.3994 - accuracy: 0.8920 - val_loss: 1.7751 - val_accuracy: 0.7230
Epoch 18/20
63/63 [==============================] - 0s 5ms/step - loss: 0.3830 - accuracy: 0.8948 - val_loss: 1.8543 - val_accuracy: 0.7160
Epoch 19/20
63/63 [==============================] - 0s 5ms/step - loss: 0.3646 - accuracy: 0.8993 - val_loss: 1.8992 - val_accuracy: 0.7220
Epoch 20/20
63/63 [==============================] - 0s 5ms/step - loss: 0.3506 - accuracy: 0.9018 - val_loss: 2.0408 - val_accuracy: 0.7110

<keras.callbacks.History at 0x2cf31967370>


model.evaluate(x_test, y_test)

71/71 [==============================] - 0s 2ms/step - loss: 2.1817 - accuracy: 0.6923

[2.1817328929901123, 0.6923419237136841]


from tensorflow.keras.datasets import boston_housing
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()


train_data.shape

(404, 13)


test_data.shape

(102, 13)


train_targets[:10]

array([15.2, 42.3, 50. , 21.1, 17.7, 18.5, 11.3, 15.6, 15.6, 14.4])


import pandas as df

df.DataFrame(train_data).describe()


# 훈련셋의 평균값
mean = train_data.mean(axis=0)

# 훈련셋 정규화
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

# 테스트셋 정규화: 훈련셋의 평균값과 표준편차 활용
test_data -= mean
test_data /= std


def build_model():
    model = keras.Sequential([
        layers.Dense(64, activation="relu"),
        layers.Dense(64, activation="relu"),
        layers.Dense(1)
    ])
    model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
    return model


k = 4
num_val_samples = len(train_data) // k

num_epochs = 500
all_mae_histories = []   # 모든 에포크에 대한 평균절대오차 저장

for i in range(k):       # 교차 검증
    
    print(f"{i+1}번 째 폴드(fold) 훈련 시작")

    val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]

    partial_train_data = np.concatenate(
        [train_data[:i * num_val_samples],
         train_data[(i + 1) * num_val_samples:]],
        axis=0)
    partial_train_targets = np.concatenate(
        [train_targets[:i * num_val_samples],
         train_targets[(i + 1) * num_val_samples:]],
        axis=0)
    
    model = build_model()    # 유닛 수: 64
    history = model.fit(partial_train_data, partial_train_targets,
                        validation_data=(val_data, val_targets),
                        epochs=num_epochs, batch_size=16, verbose=0)
    
    mae_history = history.history["val_mae"]
    all_mae_histories.append(mae_history)

1번 째 폴드(fold) 훈련 시작
2번 째 폴드(fold) 훈련 시작
3번 째 폴드(fold) 훈련 시작
4번 째 폴드(fold) 훈련 시작


average_mae_history = [
    np.mean([x[i] for x in all_mae_histories]) for i in range(num_epochs)]


plt.plot(range(1, len(average_mae_history) + 1), average_mae_history)

plt.xlabel("Epochs")
plt.ylabel("Validation MAE")
plt.show()


truncated_mae_history = average_mae_history[10:]

plt.plot(range(1, len(truncated_mae_history) + 1), truncated_mae_history)
plt.xlabel("Epochs")
plt.ylabel("Validation MAE")
plt.show()


model = build_model()
model.fit(train_data, train_targets,
          epochs=130, batch_size=16, verbose=0)

<keras.callbacks.History at 0x2cf386a5670>


test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)
test_mae_score

4/4 [==============================] - 0s 0s/step - loss: 15.9659 - mae: 2.5950

2.5949742794036865


predictions = model.predict(test_data)
predictions[0]

array([8.091597], dtype=float32)


from sklearn.model_selection import KFold

k = 4
num_epochs = 500

kf = KFold(n_splits=k)
all_mae_histories = []

for train_index, val_index in kf.split(train_data, train_targets):
    
    val_data, val_targets = train_data[val_index], train_targets[val_index]
    partial_train_data, partial_train_targets = train_data[train_index], train_targets[train_index]
    
    model = build_model()
    history = model.fit(partial_train_data, partial_train_targets,
                        validation_data=(val_data, val_targets),
                        epochs=num_epochs, batch_size=16, verbose=0)

    mae_history = history.history["val_mae"]    
    all_mae_histories.append(mae_history)


test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)
test_mae_score

4/4 [==============================] - 0s 0s/step - loss: 15.7864 - mae: 2.7254

2.725350856781006

	0	1	2	3	4	5	6	7	8	9	10	11	12
count	404.000000	404.000000	404.000000	404.000000	404.000000	404.000000	404.000000	404.000000	404.000000	404.000000	404.000000	404.000000	404.000000
mean	3.745111	11.480198	11.104431	0.061881	0.557356	6.267082	69.010644	3.740271	9.440594	405.898515	18.475990	354.783168	12.740817
std	9.240734	23.767711	6.811308	0.241238	0.117293	0.709788	27.940665	2.030215	8.698360	166.374543	2.200382	94.111148	7.254545
min	0.006320	0.000000	0.460000	0.000000	0.385000	3.561000	2.900000	1.129600	1.000000	188.000000	12.600000	0.320000	1.730000
25%	0.081437	0.000000	5.130000	0.000000	0.453000	5.874750	45.475000	2.077100	4.000000	279.000000	17.225000	374.672500	6.890000
50%	0.268880	0.000000	9.690000	0.000000	0.538000	6.198500	78.500000	3.142300	5.000000	330.000000	19.100000	391.250000	11.395000
75%	3.674808	12.500000	18.100000	0.000000	0.631000	6.609000	94.100000	5.118000	24.000000	666.000000	20.200000	396.157500	17.092500
max	88.976200	100.000000	27.740000	1.000000	0.871000	8.725000	100.000000	10.710300	24.000000	711.000000	22.000000	396.900000	37.970000

4장 신경망 활용: 분류와 회귀¶

주요 내용¶

4.1 영화 리뷰 분류: 이항 분류¶

IMDB 데이터셋¶

데이터 전처리: 벡터화¶

멀티-핫-인코딩¶

모델 구성¶

모델 훈련 검증¶

`History` 객체 활용¶

과대적합¶

모델 활용¶

연습문제¶

4.2 뉴스 기사 분류: 다중 클래스 분류¶

로이터(Reuter) 데이터셋¶

데이터셋 적재¶

데이터 전처리¶

타깃의 원-핫-인코딩¶

`to_categorical()` 함수¶

모델 생성¶

모델 훈련 및 검증¶

예측하기¶

정수 레이블 사용법¶

은닉층에 사용되는 유닛 개수¶

연습문제¶

4.3 주택가격 예측: 회귀¶

보스턴 주택가격 데이터셋¶

데이터 전처리¶

모델 구현¶

K-겹 교차검증 활용¶

모델 활용¶

연습문제¶

4장 신경망 활용: 분류와 회귀¶

주요 내용¶

4.1 영화 리뷰 분류: 이항 분류¶

IMDB 데이터셋¶

데이터 전처리: 벡터화¶

멀티-핫-인코딩¶

모델 구성¶

모델 훈련 검증¶

History 객체 활용¶

과대적합¶

모델 활용¶

연습문제¶

4.2 뉴스 기사 분류: 다중 클래스 분류¶

로이터(Reuter) 데이터셋¶

데이터셋 적재¶

데이터 전처리¶

타깃의 원-핫-인코딩¶

to_categorical() 함수¶

모델 생성¶

모델 훈련 및 검증¶

예측하기¶

정수 레이블 사용법¶

은닉층에 사용되는 유닛 개수¶

연습문제¶

4.3 주택가격 예측: 회귀¶

보스턴 주택가격 데이터셋¶

데이터 전처리¶

모델 구현¶

K-겹 교차검증 활용¶

모델 활용¶

연습문제¶

`History` 객체 활용¶

`to_categorical()` 함수¶