Update modeling_motif.py
Browse files- modeling_motif.py +3 -5
modeling_motif.py
CHANGED
|
@@ -811,9 +811,8 @@ class MotifDecoderLayer(nn.Module):
|
|
| 811 |
self.self_attn = MOTIF_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
|
| 812 |
self.mlp = MotifMLP(config)
|
| 813 |
|
| 814 |
-
|
| 815 |
-
self.
|
| 816 |
-
self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 817 |
|
| 818 |
|
| 819 |
def forward(
|
|
@@ -1048,8 +1047,7 @@ class MotifModel(MotifPreTrainedModel):
|
|
| 1048 |
num_hidden_layers = config.num_hidden_layers
|
| 1049 |
self.layers = nn.ModuleList([MotifDecoderLayer(config = config, layer_idx=layer_idx) for layer_idx in range(num_hidden_layers)])
|
| 1050 |
self._attn_implementation = config._attn_implementation
|
| 1051 |
-
|
| 1052 |
-
self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 1053 |
self.hidden_size = config.hidden_size
|
| 1054 |
self.num_heads = config.num_attention_heads
|
| 1055 |
self.head_dim = self.hidden_size // self.num_heads
|
|
|
|
| 811 |
self.self_attn = MOTIF_ATTENTION_CLASSES[config._attn_implementation](config, layer_idx)
|
| 812 |
self.mlp = MotifMLP(config)
|
| 813 |
|
| 814 |
+
self.input_layernorm = MotifRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 815 |
+
self.post_attention_layernorm = MotifRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
|
|
|
| 816 |
|
| 817 |
|
| 818 |
def forward(
|
|
|
|
| 1047 |
num_hidden_layers = config.num_hidden_layers
|
| 1048 |
self.layers = nn.ModuleList([MotifDecoderLayer(config = config, layer_idx=layer_idx) for layer_idx in range(num_hidden_layers)])
|
| 1049 |
self._attn_implementation = config._attn_implementation
|
| 1050 |
+
self.norm = MotifRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
|
|
|
| 1051 |
self.hidden_size = config.hidden_size
|
| 1052 |
self.num_heads = config.num_attention_heads
|
| 1053 |
self.head_dim = self.hidden_size // self.num_heads
|