CoCalc -- text_classification_with

GitHub Repository: keras-team/keras-io
Path: blob/master/examples/nlp/text_classification_with_transformer.py
³⁵⁰⁷ views
1
"""
2
Title: Text classification with Transformer
3
Author: [Apoorv Nandan](https://twitter.com/NandanApoorv)
4
Date created: 2020/05/10
5
Last modified: 2024/01/18
6
Description: Implement a Transformer block as a Keras layer and use it for text classification.
7
Accelerator: GPU
8
Converted to Keras 3 by: [Sitam Meur](https://github.com/sitamgithub-MSIT)
9
"""
10

11
"""
12
## Setup
13
"""
14

15
import keras
16
from keras import ops
17
from keras import layers
18

19

20
"""
21
## Implement a Transformer block as a layer
22
"""
23

24

25
class TransformerBlock(layers.Layer):
26
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
27
        super().__init__()
28
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
29
        self.ffn = keras.Sequential(
30
            [
31
                layers.Dense(ff_dim, activation="relu"),
32
                layers.Dense(embed_dim),
33
            ]
34
        )
35
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
36
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
37
        self.dropout1 = layers.Dropout(rate)
38
        self.dropout2 = layers.Dropout(rate)
39

40
    def call(self, inputs):
41
        attn_output = self.att(inputs, inputs)
42
        attn_output = self.dropout1(attn_output)
43
        out1 = self.layernorm1(inputs + attn_output)
44
        ffn_output = self.ffn(out1)
45
        ffn_output = self.dropout2(ffn_output)
46
        return self.layernorm2(out1 + ffn_output)
47

48

49
"""
50
## Implement embedding layer
51

52
Two separate embedding layers, one for tokens, one for token index (positions).
53
"""
54

55

56
class TokenAndPositionEmbedding(layers.Layer):
57
    def __init__(self, maxlen, vocab_size, embed_dim):
58
        super().__init__()
59
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
60
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
61

62
    def call(self, x):
63
        maxlen = ops.shape(x)[-1]
64
        positions = ops.arange(start=0, stop=maxlen, step=1)
65
        positions = self.pos_emb(positions)
66
        x = self.token_emb(x)
67
        return x + positions
68

69

70
"""
71
## Download and prepare dataset
72
"""
73

74
vocab_size = 20000  # Only consider the top 20k words
75
maxlen = 200  # Only consider the first 200 words of each movie review
76
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
77
print(len(x_train), "Training sequences")
78
print(len(x_val), "Validation sequences")
79
x_train = keras.utils.pad_sequences(x_train, maxlen=maxlen)
80
x_val = keras.utils.pad_sequences(x_val, maxlen=maxlen)
81

82
"""
83
## Create classifier model using transformer layer
84

85
Transformer layer outputs one vector for each time step of our input sequence.
86
Here, we take the mean across all time steps and
87
use a feed forward network on top of it to classify text.
88
"""
89

90

91
embed_dim = 32  # Embedding size for each token
92
num_heads = 2  # Number of attention heads
93
ff_dim = 32  # Hidden layer size in feed forward network inside transformer
94

95
inputs = layers.Input(shape=(maxlen,))
96
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
97
x = embedding_layer(inputs)
98
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
99
x = transformer_block(x)
100
x = layers.GlobalAveragePooling1D()(x)
101
x = layers.Dropout(0.1)(x)
102
x = layers.Dense(20, activation="relu")(x)
103
x = layers.Dropout(0.1)(x)
104
outputs = layers.Dense(2, activation="softmax")(x)
105

106
model = keras.Model(inputs=inputs, outputs=outputs)
107

108

109
"""
110
## Train and Evaluate
111
"""
112

113
model.compile(
114
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
115
)
116
history = model.fit(
117
    x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val)
118
)
119

120
Product

Resources

Company