-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtransformer_encoder.py
More file actions
87 lines (87 loc) · 3.9 KB
/
transformer_encoder.py
File metadata and controls
87 lines (87 loc) · 3.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import (
Input, Dense, Dropout, Add, LayerNormalization, MultiHeadAttention,
GlobalAveragePooling1D, TimeDistributed
)
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import register_keras_serializable
@register_keras_serializable(package='transformer_encoder', name='TransformerBlock')
class TransformerBlock(layers.Layer):
def __init__(self, embed_dim=128, num_heads=4, ff_dim=512, dropout_rate=0.1, **kwargs):
super().__init__(**kwargs)
self.embed_dim = embed_dim
self.num_heads = num_heads
self.ff_dim = ff_dim
self.dropout_rate = dropout_rate
self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, dropout=dropout_rate)
self.ffn = keras.Sequential([
Dense(ff_dim, activation="relu"),
Dense(embed_dim),
])
self.layernorm1 = LayerNormalization(epsilon=1e-6)
self.layernorm2 = LayerNormalization(epsilon=1e-6)
self.dropout1 = Dropout(dropout_rate)
self.dropout2 = Dropout(dropout_rate)
def call(self, inputs, training=False):
attn_output = self.att(inputs, inputs, training=training)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(out1 + ffn_output)
return out2
def get_config(self):
config = super().get_config()
config.update({
"embed_dim": self.embed_dim,
"num_heads": self.num_heads,
"ff_dim": self.ff_dim,
"dropout_rate": self.dropout_rate,
})
return config
def build_model_v23_transformer(
input_shape_players=(22, 12),
input_shape_static=(22, 10),
output_shape=(60,),
embed_dim=128,
num_heads=4,
num_transformer_layers=4,
ff_dim=512,
dropout_rate=0.1
):
input_players = Input(shape=input_shape_players, name='input_players')
input_static = Input(shape=input_shape_static, name='input_static')
x_players = TimeDistributed(Dense(embed_dim, activation='relu', name='embed_players'))(input_players)
x_static = TimeDistributed(Dense(embed_dim, activation='relu', name='embed_static'))(input_static)
x = Add(name='fuse_features')([x_players, x_static])
for i in range(num_transformer_layers):
x = TransformerBlock(
embed_dim=embed_dim,
num_heads=num_heads,
ff_dim=ff_dim,
dropout_rate=dropout_rate,
name=f'transformer_block_{i+1}'
)(x)
x = GlobalAveragePooling1D(name='global_pool')(x)
x = Dense(512, activation='relu', name='head_dense1', kernel_regularizer=l2(1e-5))(x)
x = Dropout(0.3, name='head_dropout1')(x)
x = Dense(256, activation='relu', name='head_dense2', kernel_regularizer=l2(1e-5))(x)
x = Dropout(0.2, name='head_dropout2')(x)
output = Dense(output_shape[0], activation='linear', name='output_coords')(x)
model = Model(
inputs=[input_players, input_static],
outputs=output,
name="NFL_v23_Transformer_Model_exp14"
)
return model
def masked_rmse_loss_v2(y_true, y_pred):
y_true_reshaped = tf.reshape(y_true, [-1, 30, 2])
y_pred_reshaped = tf.reshape(y_pred, [-1, 30, 2])
mask = tf.cast(tf.reduce_sum(tf.abs(y_true_reshaped), axis=-1) > 1e-6, tf.float32)
squared_diff = tf.reduce_sum(tf.square(y_true_reshaped - y_pred_reshaped), axis=-1)
masked_squared_diff = squared_diff * mask
rmse = tf.sqrt(tf.reduce_sum(masked_squared_diff) / tf.maximum(tf.reduce_sum(mask), 1.0))
return rmse