Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
keras-team
GitHub Repository: keras-team/keras-io
Path: blob/master/examples/nlp/text_classification_with_transformer.py
3507 views
1
"""
2
Title: Text classification with Transformer
3
Author: [Apoorv Nandan](https://twitter.com/NandanApoorv)
4
Date created: 2020/05/10
5
Last modified: 2024/01/18
6
Description: Implement a Transformer block as a Keras layer and use it for text classification.
7
Accelerator: GPU
8
Converted to Keras 3 by: [Sitam Meur](https://github.com/sitamgithub-MSIT)
9
"""
10
11
"""
12
## Setup
13
"""
14
15
import keras
16
from keras import ops
17
from keras import layers
18
19
20
"""
21
## Implement a Transformer block as a layer
22
"""
23
24
25
class TransformerBlock(layers.Layer):
26
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
27
super().__init__()
28
self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
29
self.ffn = keras.Sequential(
30
[
31
layers.Dense(ff_dim, activation="relu"),
32
layers.Dense(embed_dim),
33
]
34
)
35
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
36
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
37
self.dropout1 = layers.Dropout(rate)
38
self.dropout2 = layers.Dropout(rate)
39
40
def call(self, inputs):
41
attn_output = self.att(inputs, inputs)
42
attn_output = self.dropout1(attn_output)
43
out1 = self.layernorm1(inputs + attn_output)
44
ffn_output = self.ffn(out1)
45
ffn_output = self.dropout2(ffn_output)
46
return self.layernorm2(out1 + ffn_output)
47
48
49
"""
50
## Implement embedding layer
51
52
Two separate embedding layers, one for tokens, one for token index (positions).
53
"""
54
55
56
class TokenAndPositionEmbedding(layers.Layer):
57
def __init__(self, maxlen, vocab_size, embed_dim):
58
super().__init__()
59
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
60
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
61
62
def call(self, x):
63
maxlen = ops.shape(x)[-1]
64
positions = ops.arange(start=0, stop=maxlen, step=1)
65
positions = self.pos_emb(positions)
66
x = self.token_emb(x)
67
return x + positions
68
69
70
"""
71
## Download and prepare dataset
72
"""
73
74
vocab_size = 20000 # Only consider the top 20k words
75
maxlen = 200 # Only consider the first 200 words of each movie review
76
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
77
print(len(x_train), "Training sequences")
78
print(len(x_val), "Validation sequences")
79
x_train = keras.utils.pad_sequences(x_train, maxlen=maxlen)
80
x_val = keras.utils.pad_sequences(x_val, maxlen=maxlen)
81
82
"""
83
## Create classifier model using transformer layer
84
85
Transformer layer outputs one vector for each time step of our input sequence.
86
Here, we take the mean across all time steps and
87
use a feed forward network on top of it to classify text.
88
"""
89
90
91
embed_dim = 32 # Embedding size for each token
92
num_heads = 2 # Number of attention heads
93
ff_dim = 32 # Hidden layer size in feed forward network inside transformer
94
95
inputs = layers.Input(shape=(maxlen,))
96
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
97
x = embedding_layer(inputs)
98
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
99
x = transformer_block(x)
100
x = layers.GlobalAveragePooling1D()(x)
101
x = layers.Dropout(0.1)(x)
102
x = layers.Dense(20, activation="relu")(x)
103
x = layers.Dropout(0.1)(x)
104
outputs = layers.Dense(2, activation="softmax")(x)
105
106
model = keras.Model(inputs=inputs, outputs=outputs)
107
108
109
"""
110
## Train and Evaluate
111
"""
112
113
model.compile(
114
optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
115
)
116
history = model.fit(
117
x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val)
118
)
119
120