from google.colab import drive
drive.mount('/gdrive')
Mounted at /gdrive
filenames = ['/gdrive/My Drive/kogpt2/x_train_token.tfrecord']
raw_dataset = tf.data.TFRecordDataset(filenames)
feature_description = {
'input': tf.io.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
'label': tf.io.FixedLenFeature([], tf.int64)
}
parsed_dataset = raw_dataset.map(lambda x: tf.io.parse_single_example(x, feature_description))
label_dataset = parsed_dataset.map(lambda x: x['label'])
filenames = ['/gdrive/My Drive/kogpt2/x_train_embed.tfrecord']
emb_raw_dataset = tf.data.TFRecordDataset(filenames)
emb_dataset = emb_raw_dataset.map(lambda x: tf.ensure_shape(tf.io.parse_tensor(x, tf.float32), (768,)))
classifier = tf.keras.Sequential([
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(3, activation='softmax')
])
classifier.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
dataset = tf.data.Dataset.zip((emb_dataset, label_dataset)).batch(32)
classifier.fit(dataset, epochs=5)
Epoch 1/5
296/296 [==============================] - 1s 4ms/step - loss: 1.0233 - accuracy: 0.4810
Epoch 2/5
296/296 [==============================] - 1s 3ms/step - loss: 0.8996 - accuracy: 0.6231
Epoch 3/5
296/296 [==============================] - 1s 3ms/step - loss: 0.8081 - accuracy: 0.6720
Epoch 4/5
296/296 [==============================] - 1s 4ms/step - loss: 0.7369 - accuracy: 0.7085
Epoch 5/5
296/296 [==============================] - 1s 3ms/step - loss: 0.6761 - accuracy: 0.7349
<tensorflow.python.keras.callbacks.History at 0x7fd461d4f898>