{ "model_type": "helion-osc", "architectures": ["HelionOSCForCausalLM"], "vocab_size": 102400, "hidden_size": 5120, "num_hidden_layers": 48, "num_attention_heads": 40, "num_key_value_heads": 8, "intermediate_size": 18432, "hidden_act": "swiglu", "max_position_embeddings": 262144, "initializer_range": 0.02, "rms_norm_eps": 1e-6, "use_cache": true, "pad_token_id": 0, "bos_token_id": 1, "eos_token_id": 2, "tie_word_embeddings": false, "rope_theta": 10000000.0, "rope_scaling": { "type": "linear", "factor": 32.0 }, "attention_bias": false, "attention_dropout": 0.0, "mlp_bias": false, "torch_dtype": "bfloat16", "transformers_version": "4.40.0", "model_version": "1.0", "use_flash_attention": true, "sliding_window": null, "gradient_checkpointing": false, "task_specific_params": { "code_generation": { "max_length": 4096, "temperature": 0.7, "top_p": 0.95, "top_k": 50, "do_sample": true, "repetition_penalty": 1.05, "length_penalty": 1.0 }, "mathematical_reasoning": { "max_length": 2048, "temperature": 0.3, "top_p": 0.9, "top_k": 40, "do_sample": false, "repetition_penalty": 1.0, "length_penalty": 1.2 }, "code_completion": { "max_length": 1024, "temperature": 0.6, "top_p": 0.92, "top_k": 45, "do_sample": true, "repetition_penalty": 1.03, "stop_sequences": ["\n\n", "```", "###"] }, "algorithm_design": { "max_length": 3072, "temperature": 0.5, "top_p": 0.93, "top_k": 50, "do_sample": true, "repetition_penalty": 1.08 }, "debugging": { "max_length": 2048, "temperature": 0.4, "top_p": 0.88, "do_sample": false, "repetition_penalty": 1.0 } }, "specialization": { "domain": "coding_and_mathematics", "primary_focus": "code_generation_with_mathematical_reasoning", "verification_enabled": true, "step_by_step_reasoning": true, "languages_supported": [ "python", "javascript", "typescript", "java", "c", "cpp", "csharp", "go", "rust", "ruby", "php", "swift", "kotlin", "scala", "r", "sql", "bash", "shell" ], "features": [ "code_generation", "code_completion", "bug_detection", "bug_fixing", "mathematical_reasoning", "theorem_proving", "algorithm_design", "algorithm_optimization", "code_refactoring", "documentation_generation", "test_generation", "complexity_analysis" ], "mathematical_capabilities": [ "arithmetic", "algebra", "calculus", "discrete_mathematics", "linear_algebra", "probability", "statistics", "number_theory", "graph_theory", "combinatorics" ] }, "training_config": { "training_precision": "bf16", "optimizer": "adamw", "learning_rate": 2e-5, "warmup_steps": 2000, "weight_decay": 0.01, "max_grad_norm": 1.0 }, "quantization_config": { "quant_method": "bitsandbytes", "load_in_8bit": false, "load_in_4bit": false, "bnb_4bit_compute_dtype": "bfloat16", "bnb_4bit_use_double_quant": true, "bnb_4bit_quant_type": "nf4" }, "generation_config": { "temperature": 0.7, "top_p": 0.95, "top_k": 50, "do_sample": true, "max_new_tokens": 2048, "min_new_tokens": 1, "num_beams": 1, "early_stopping": false, "no_repeat_ngram_size": 3, "encoder_no_repeat_ngram_size": 0, "diversity_penalty": 0.0, "repetition_penalty": 1.05, "length_penalty": 1.0, "exponential_decay_length_penalty": null }, "special_tokens": { "pad_token": "<|pad|>", "bos_token": "<|begin_of_text|>", "eos_token": "<|end_of_text|>", "unk_token": "<|unk|>", "code_start_token": "<|code_start|>", "code_end_token": "<|code_end|>", "math_start_token": "<|math_start|>", "math_end_token": "<|math_end|>", "reasoning_start_token": "<|reasoning_start|>", "reasoning_end_token": "<|reasoning_end|>", "explanation_start_token": "<|explanation_start|>", "explanation_end_token": "<|explanation_end|>" }, "supported_frameworks": [ "pytorch", "tensorflow", "onnx", "jax" ], "evaluation_metrics": { "humaneval_pass_at_1": 0.852, "humaneval_pass_at_10": 0.928, "mbpp_pass_at_1": 0.795, "mbpp_pass_at_10": 0.891, "gsm8k_accuracy": 0.785, "math_accuracy": 0.623, "apps_accuracy": 0.412 }, "hardware_requirements": { "minimum_vram_gb": 16, "recommended_vram_gb": 24, "minimum_ram_gb": 32, "recommended_ram_gb": 64, "cpu_cores": 8, "gpu_support": true, "multi_gpu_support": true, "cpu_only_support": true }, "deployment_options": { "inference_frameworks": [ "vllm", "text-generation-inference", "ollama", "llama.cpp" ], "optimization_support": [ "quantization", "pruning", "distillation", "tensorrt", "onnx_runtime" ] } }