{
  "model_type": "helion-osc",
  "architectures": ["HelionOSCForCausalLM"],
  "vocab_size": 102400,
  "hidden_size": 5120,
  "num_hidden_layers": 48,
  "num_attention_heads": 40,
  "num_key_value_heads": 8,
  "intermediate_size": 18432,
  "hidden_act": "swiglu",
  "max_position_embeddings": 262144,
  "initializer_range": 0.02,
  "rms_norm_eps": 1e-6,
  "use_cache": true,
  "pad_token_id": 0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "tie_word_embeddings": false,
  "rope_theta": 10000000.0,
  "rope_scaling": {
    "type": "linear",
    "factor": 32.0
  },
  "attention_bias": false,
  "attention_dropout": 0.0,
  "mlp_bias": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.40.0",
  "model_version": "1.0",
  "use_flash_attention": true,
  "sliding_window": null,
  "gradient_checkpointing": false,
  "task_specific_params": {
    "code_generation": {
      "max_length": 4096,
      "temperature": 0.7,
      "top_p": 0.95,
      "top_k": 50,
      "do_sample": true,
      "repetition_penalty": 1.05,
      "length_penalty": 1.0
    },
    "mathematical_reasoning": {
      "max_length": 2048,
      "temperature": 0.3,
      "top_p": 0.9,
      "top_k": 40,
      "do_sample": false,
      "repetition_penalty": 1.0,
      "length_penalty": 1.2
    },
    "code_completion": {
      "max_length": 1024,
      "temperature": 0.6,
      "top_p": 0.92,
      "top_k": 45,
      "do_sample": true,
      "repetition_penalty": 1.03,
      "stop_sequences": ["\n\n", "```", "###"]
    },
    "algorithm_design": {
      "max_length": 3072,
      "temperature": 0.5,
      "top_p": 0.93,
      "top_k": 50,
      "do_sample": true,
      "repetition_penalty": 1.08
    },
    "debugging": {
      "max_length": 2048,
      "temperature": 0.4,
      "top_p": 0.88,
      "do_sample": false,
      "repetition_penalty": 1.0
    }
  },
  "specialization": {
    "domain": "coding_and_mathematics",
    "primary_focus": "code_generation_with_mathematical_reasoning",
    "verification_enabled": true,
    "step_by_step_reasoning": true,
    "languages_supported": [
      "python",
      "javascript",
      "typescript",
      "java",
      "c",
      "cpp",
      "csharp",
      "go",
      "rust",
      "ruby",
      "php",
      "swift",
      "kotlin",
      "scala",
      "r",
      "sql",
      "bash",
      "shell"
    ],
    "features": [
      "code_generation",
      "code_completion",
      "bug_detection",
      "bug_fixing",
      "mathematical_reasoning",
      "theorem_proving",
      "algorithm_design",
      "algorithm_optimization",
      "code_refactoring",
      "documentation_generation",
      "test_generation",
      "complexity_analysis"
    ],
    "mathematical_capabilities": [
      "arithmetic",
      "algebra",
      "calculus",
      "discrete_mathematics",
      "linear_algebra",
      "probability",
      "statistics",
      "number_theory",
      "graph_theory",
      "combinatorics"
    ]
  },
  "training_config": {
    "training_precision": "bf16",
    "optimizer": "adamw",
    "learning_rate": 2e-5,
    "warmup_steps": 2000,
    "weight_decay": 0.01,
    "max_grad_norm": 1.0
  },
  "quantization_config": {
    "quant_method": "bitsandbytes",
    "load_in_8bit": false,
    "load_in_4bit": false,
    "bnb_4bit_compute_dtype": "bfloat16",
    "bnb_4bit_use_double_quant": true,
    "bnb_4bit_quant_type": "nf4"
  },
  "generation_config": {
    "temperature": 0.7,
    "top_p": 0.95,
    "top_k": 50,
    "do_sample": true,
    "max_new_tokens": 2048,
    "min_new_tokens": 1,
    "num_beams": 1,
    "early_stopping": false,
    "no_repeat_ngram_size": 3,
    "encoder_no_repeat_ngram_size": 0,
    "diversity_penalty": 0.0,
    "repetition_penalty": 1.05,
    "length_penalty": 1.0,
    "exponential_decay_length_penalty": null
  },
  "special_tokens": {
    "pad_token": "<|pad|>",
    "bos_token": "<|begin_of_text|>",
    "eos_token": "<|end_of_text|>",
    "unk_token": "<|unk|>",
    "code_start_token": "<|code_start|>",
    "code_end_token": "<|code_end|>",
    "math_start_token": "<|math_start|>",
    "math_end_token": "<|math_end|>",
    "reasoning_start_token": "<|reasoning_start|>",
    "reasoning_end_token": "<|reasoning_end|>",
    "explanation_start_token": "<|explanation_start|>",
    "explanation_end_token": "<|explanation_end|>"
  },
  "supported_frameworks": [
    "pytorch",
    "tensorflow",
    "onnx",
    "jax"
  ],
  "evaluation_metrics": {
    "humaneval_pass_at_1": 0.852,
    "humaneval_pass_at_10": 0.928,
    "mbpp_pass_at_1": 0.795,
    "mbpp_pass_at_10": 0.891,
    "gsm8k_accuracy": 0.785,
    "math_accuracy": 0.623,
    "apps_accuracy": 0.412
  },
  "hardware_requirements": {
    "minimum_vram_gb": 16,
    "recommended_vram_gb": 24,
    "minimum_ram_gb": 32,
    "recommended_ram_gb": 64,
    "cpu_cores": 8,
    "gpu_support": true,
    "multi_gpu_support": true,
    "cpu_only_support": true
  },
  "deployment_options": {
    "inference_frameworks": [
      "vllm",
      "text-generation-inference",
      "ollama",
      "llama.cpp"
    ],
    "optimization_support": [
      "quantization",
      "pruning",
      "distillation",
      "tensorrt",
      "onnx_runtime"
    ]
  }
}