{ "architectures": [ "XCodec2Model" ], "attention_bias": false, "attention_dropout": 0.0, "audio_channels": 1, "codebook_dim": 16, "codebook_size": 16384, "codec_decoder_hidden_size": 1024, "codec_encoder_hidden_size": 1024, "depth": 12, "dilations": [ 1, 3, 9 ], "head_dim": 64, "hidden_size": 1024, "hop_length": 320, "initializer_range": 0.02, "intermediate_size": 4096, "max_position_embeddings": 4096, "model_type": "xcodec2", "num_attention_heads": 16, "num_hidden_layers": 12, "num_key_value_heads": 16, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "sampling_rate": 16000, "semantic_hidden_size": 1024, "torch_dtype": "float32", "transformers_version": "4.52.0.dev0", "up_ratios": [ 2, 2, 4, 4, 5 ], "use_vocos": true, "vq_commit_weight": 0.25, "vq_dim": 2048, "vq_full_commit_loss": false, "vq_num_quantizers": 1, "vq_weight_init": false }