Motif-Technologies
/

Motif-2.6B

@@ -1,8 +1,9 @@
 from transformers.configuration_utils import PretrainedConfig
 from transformers.modeling_rope_utils import rope_config_validation
 from transformers.utils import logging
-from typing import Optional
-import math
 logger = logging.get_logger(__name__)
@@ -13,11 +14,8 @@ class MotifConfig(PretrainedConfig):
     Motif model according to the specified arguments, defining the model architecture. Instantiating a configuration
     with the defaults will yield a similar configuration to that of
     Motif-102B [moreh/Motif-102B](https://huggingface.co/moreh/Motif-102B).
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
     Args:
         vocab_size (`int`, *optional*, defaults to 151936):
             Vocabulary size of the Motif model. Defines the number of different tokens that can be represented by the
@@ -97,16 +95,12 @@ class MotifConfig(PretrainedConfig):
             The number of layers that use SWA (Sliding Window Attention). The bottom layers use SWA while the top use full attention.
         attention_dropout (`float`, *optional*, defaults to 0.0):
             The dropout ratio for the attention probabilities.
     ```python
     >>> from transformers import MotifModel, MotifConfig
     >>> # Initializing a Motif style configuration
     >>> configuration = MotifConfig()
     >>> # Initializing a model from the Motif-102B style configuration
     >>> model = MotifModel(configuration)
     >>> # Accessing the model configuration
     >>> configuration = model.config
     ```"""
@@ -170,4 +164,4 @@ class MotifConfig(PretrainedConfig):
             tie_word_embeddings=tie_word_embeddings,
             **kwargs,
         )
-        logger.info(f' kwargs : {kwargs}')

+import math
+from typing import Optional
 from transformers.configuration_utils import PretrainedConfig
 from transformers.modeling_rope_utils import rope_config_validation
 from transformers.utils import logging
 logger = logging.get_logger(__name__)
     Motif model according to the specified arguments, defining the model architecture. Instantiating a configuration
     with the defaults will yield a similar configuration to that of
     Motif-102B [moreh/Motif-102B](https://huggingface.co/moreh/Motif-102B).
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
     Args:
         vocab_size (`int`, *optional*, defaults to 151936):
             Vocabulary size of the Motif model. Defines the number of different tokens that can be represented by the
             The number of layers that use SWA (Sliding Window Attention). The bottom layers use SWA while the top use full attention.
         attention_dropout (`float`, *optional*, defaults to 0.0):
             The dropout ratio for the attention probabilities.
     ```python
     >>> from transformers import MotifModel, MotifConfig
     >>> # Initializing a Motif style configuration
     >>> configuration = MotifConfig()
     >>> # Initializing a model from the Motif-102B style configuration
     >>> model = MotifModel(configuration)
     >>> # Accessing the model configuration
     >>> configuration = model.config
     ```"""
             tie_word_embeddings=tie_word_embeddings,
             **kwargs,
         )
+        logger.info(f' kwargs : {kwargs}')