diff --git a/.gitattributes b/.gitattributes index 47b94e2c1adf3411e3cde74c07c444f9e6568481..7d2ed67cfca9e1bf04fff801f2e35b43474a8d86 100644 --- a/.gitattributes +++ b/.gitattributes @@ -13312,3 +13312,21 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_e6150818c39bca2677d1+8eeafb64/model.neff neuronxcc-2.21.18209.0+043b1bf7/MODULE_e6150818c39bca2677d1+8eeafb64/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_f2f5d7eea0d7ac74facb+26bb721a/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.21.18209.0+043b1bf7/MODULE_f2f5d7eea0d7ac74facb+26bb721a/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/0782cbf4b3ed6eea7218.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/0782cbf4b3ed6eea7218.json new file mode 100644 index 0000000000000000000000000000000000000000..61e22f3e6b59c8772e8278c7d71f651a37bc2c06 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/0782cbf4b3ed6eea7218.json @@ -0,0 +1,190 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3, + 5, + 7, + 9, + 11, + 13, + 15, + 17, + 19, + 21, + 23, + 25, + 27, + 29, + 31, + 33, + 35, + 37, + 39, + 41, + 43, + 45, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "checkpoint_revision": "73d14711bcc77c16df3470856949c3764056b617", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 64, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "n_active_tokens": 4096, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 4096, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 64 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 128, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/9fc13314975962cba5a6.json b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/9fc13314975962cba5a6.json new file mode 100644 index 0000000000000000000000000000000000000000..0a3946d8b647d2ca3a61676e7fd3e2e53e12cc17 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.3.dev1/llama4_text/meta-llama/Llama-4-Maverick-17B-128E-Instruct/9fc13314975962cba5a6.json @@ -0,0 +1,190 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "_task": "text-generation", + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": true, + "dtype": "bfloat16", + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 2, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "layer_types": [ + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention", + "chunked_attention", + "chunked_attention", + "chunked_attention", + "full_attention" + ], + "max_position_embeddings": 1048576, + "model_type": "llama4_text", + "moe_layers": [ + 1, + 3, + 5, + 7, + 9, + 11, + 13, + 15, + 17, + 19, + 21, + 23, + 25, + 27, + 29, + 31, + 33, + 35, + 37, + 39, + 41, + 43, + 45, + 47 + ], + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "checkpoint_revision": "73d14711bcc77c16df3470856949c3764056b617", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": false, + "glu_mlp": true, + "local_ranks_size": 64, + "max_batch_size": 1, + "max_context_length": 8192, + "max_topk": 256, + "n_active_tokens": 8192, + "neuronxcc_version": "2.21.18209.0+043b1bf7", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.3.dev1", + "output_logits": false, + "pp_degree": 1, + "sequence_length": 8192, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn2", + "torch_dtype": "bfloat16", + "tp_degree": 64 + }, + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 128, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 202048 +} \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..78e4188baf7de97269080a1d6acd81593fc41e73 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:024adf15e89a8497d981f4e938a3324f26464df0317c371a54b04a9af78bbbab +size 203660169 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3378a7826058386d92fe0d85ed3a814b51fd1425 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:308e47f230df190897ed9e5282bf56ec98ed13a3d7a92ec4b1b666d845d1d289 +size 11254784 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a873f7917ddafd0f444486348d94214029aa101c --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_05be74483bc93c8757df+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a81d483014f7a46d5e5ef1038c202899a516f7925b20a4b24698c050fe1e0ff +size 11564149 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9b0afb242567f783c84f7cc9dd177bb81f866601 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcaea021293d4d4472d7afda78070bec3b2760e9f748cd882859201d1a26917b +size 107351189 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..70674d049f8e8c5b109bdd32305604ef5a49d198 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_30b7f3226f615e011f33+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c059eda8abaeb544f662db22082ce4b294a6af28bc592db36e86c4a8ca6ebad5 +size 39937024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dfc24e326d3334c99fc6c12b3ecf9ac6e2f1f1d5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d996d1b6b51ba153eff193f4ee2995efe72f50f85f41dc33347c7a3f3b57fe4 +size 204269521 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cbfc088c53b35d9f0441a45d97ff63fbdd1dbb20 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba38c5f2f4aa3906bfab6e329ad29bc4eef48551c1b39f710c70cb42d9709c58 +size 11316224 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9ed8600678f6e23b1eb770afdf4749a78115832a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_3b867487803e851b46f0+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ce2961eab1a7f057e1d2a01fa3066a66c28ed8b5478a78b7c0f867f1ac69af +size 11625573 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e77f74edddf6fca6a678a765d14b598ad5a96eaf --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e540c4f1e31e36c9b251c7099e31307fc0d1e6008b967542cd4239534e1c9feb +size 208063973 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c6b9d2384c5e3db1f78d4561fe80433f051661c5 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_416e0fa403c0921df6c3+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4082b934fd93cfc3969a5b641b273afaa8002499e848790bad7912a07a4ad01 +size 77569024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b70939c2154e362486988045ac2b61a983b4f04b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6f942527d5e73f3ee38027454e1c6f85c9b3073d9acb209127e2b68f7e7ead +size 208356581 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..af3e9296d09992ecba8993dfc9db66a129d31f83 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4200960674e1921e9aec+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7bd52b12ed63676b9008ab140fee0705f1afefa7b2be860962fcf9902f84e5d +size 77579264 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..72f25f83dfa55163eb6033698153b8b9c983f448 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f287de9e03fb9e81b450ba63b209b2cedc3906bbdb3de4ab291532276186ec +size 207847637 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..786d58325ba01d74d5433bcc214b1f9d4a9aa74b --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_4ec3b67fd9a78f7cbdb4+a32116a7/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e43e6fe7e06e1f1bc0a66df6b299ded92c4a335a23d3c984a6a8544da3b080e +size 77569024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3635eba8f6f5931012b24b2e6d5a849f597b2115 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833c44de5240e3c4e4dc6986a3d3429c0b1e6ed7a75c4b505c28216f9d85a76c +size 204263377 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4d20c162b43d0ee263c63c67f1afa2704742f999 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0bc0ef0a08a71504b94db3e2733a14e9307a354b66e5b499ee2118db26807de +size 11070464 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ce100496ebd821b761588f8204520cbed7af66ae --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_65e34d6afbea63ef1564+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9a303d035b0d4a4a595ee9dfc6211e558bf2943c767ee3af6a4d46f2b55292 +size 11379813 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674332a679782ee04898+a32116a7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674332a679782ee04898+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674332a679782ee04898+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674332a679782ee04898+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674332a679782ee04898+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..37fef796d9b039f2a69bacd119e8342564289224 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674332a679782ee04898+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dce21659a8523d02dcd06c067d00863c54f39f5e180742c35e54fedb64d5680 +size 233570789 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674332a679782ee04898+a32116a7/model.log b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674332a679782ee04898+a32116a7/model.log new file mode 100644 index 0000000000000000000000000000000000000000..4dc354b48b820ecc715e73d60b4fd3b8e441b172 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_674332a679782ee04898+a32116a7/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_674332a679782ee04898+a32116a7.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_674332a679782ee04898+a32116a7.neff', '--target=trn2', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=2', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (26.537GB) exceeds 24GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-11-27T15:03:49Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (26.537GB) exceeds 24GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8325de44f605a823c436+a32116a7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8325de44f605a823c436+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8325de44f605a823c436+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8325de44f605a823c436+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8325de44f605a823c436+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..276093eaf6fda4e9e11d96019bc7f2def7ed07bf --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8325de44f605a823c436+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a3e441a854ee41a2276c1518609fd4d76b0243a9324305bc07d8cd8d64d6fa1 +size 309218022 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8325de44f605a823c436+a32116a7/model.log b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8325de44f605a823c436+a32116a7/model.log new file mode 100644 index 0000000000000000000000000000000000000000..30bb98abebedcb03c9740353084f75dd4c4af35a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_8325de44f605a823c436+a32116a7/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_8325de44f605a823c436+a32116a7.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_8325de44f605a823c436+a32116a7.neff', '--target=trn2', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=2', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (29.349GB) exceeds 24GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-11-27T12:18:44Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (29.349GB) exceeds 24GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dcb09733d681ea535621ee94a922c6eb6dc3b195 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608a242d7b2e8f63eb7fc01a96237daf8141e54c737fa3029ceaab5aaf2b66ec +size 304936914 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..30be2119281bae636992dd3bec770109c79af344 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84fba4d0937a4e0d98003e8309115635db8e7521dbde620a3095507f85d1edc1 +size 15391744 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..16d6485311ff6bd1c9577ca5a792655aa0c11e46 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_a42f0ea98535622da192+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b13c9403176a2f84027601bf46e9f78dd3528b386784e9e054515646492279 +size 15701093 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e3758470c9345fd9e570fe2b565756204902a351 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09751a30b4f4a733f9b2e3ea959d6e13a475daa18e9b00c87827261fa81d928c +size 103598913 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3863468d29707f3f12cdfee17f9c333268e75e55 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59380a9125d65a7774945e905d0ad89cff759e65ac2e5f479fff373ad9e0edc +size 7169024 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1c25f4d28dec0bedb3bc3b0e71dfc2f9a5268c5d --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_b50fd34438465c143456+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:292d6ce7151d25e424cb3eb49a31e99b915adacc4129a08d870c6744a6012c77 +size 7478373 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_baeb2b430ce4dbeb0eaf+a32116a7/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_baeb2b430ce4dbeb0eaf+a32116a7/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..3c18214665ab208ee16402ace66a8dd081416d1a --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_baeb2b430ce4dbeb0eaf+a32116a7/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_baeb2b430ce4dbeb0eaf+a32116a7/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_baeb2b430ce4dbeb0eaf+a32116a7/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..360a9ca9474561055c03384238b8c584e3dd2a25 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_baeb2b430ce4dbeb0eaf+a32116a7/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9187db080745e95aea071bf5ab3a52c0e4407c092f20780e6762eb149337fe17 +size 410146693 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_baeb2b430ce4dbeb0eaf+a32116a7/model.log b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_baeb2b430ce4dbeb0eaf+a32116a7/model.log new file mode 100644 index 0000000000000000000000000000000000000000..3dd6ef0b052a5a1924f14d2f4adb531f17bf74b6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_baeb2b430ce4dbeb0eaf+a32116a7/model.log @@ -0,0 +1,3 @@ +Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_baeb2b430ce4dbeb0eaf+a32116a7.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding/_tp0_bk0/model.MODULE_baeb2b430ce4dbeb0eaf+a32116a7.neff', '--target=trn2', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=2', '--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (34.863GB) exceeds 24GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. +2025-11-27T11:14:47Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (34.863GB) exceeds 24GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables. + diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1bb7fe24e316af15c3fbb9ec00c0219a3750d1e4 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fe8ae7fbf3c9f0ca59c3b92e44fa70c37aab10fa2327edfa98a366b9830f18d +size 229436369 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e78775a7edca41f2e005e013d4b50f6a178e7cd2 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3898cc949046ddc1288d0165085f33c1e324eb3fc12eea4cd20b09e9f9c0fc4d +size 12370944 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0ad02b25d55da76f79be4c75566740a719bccad6 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_ec5bfc4ef4ea5c5e3798+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c5a0cf85497a3246cd73f61a57f910263152e04ca3fc5fcf326ba93f2f5303 +size 12680293 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/compile_flags.json b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..7d1678bf25666438bbebf65702a2233ae75c43c9 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/compile_flags.json @@ -0,0 +1 @@ +["--target=trn2", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=2", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/model.done b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/model.hlo_module.pb b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41b281250e2a2f15f647eaeef591267fe3213016 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db98291bd6650c5c0ce403fc9b0ad2a2609d08e528819cf59f3147652e3ef632 +size 405604309 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/model.neff b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fa05c65cee3dd74f499807cbf4f9f014af424a93 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0637d7e875f3bf9dc6dceb2f1ab8f52cf5e9f4cff52427d395b66bff280401ca +size 19405824 diff --git a/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/wrapped_neff.hlo b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5807e8cbba57540c594a056c29b2d042771776e7 --- /dev/null +++ b/neuronxcc-2.21.18209.0+043b1bf7/MODULE_f850227f2a268ed6719b+ac10809c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac403254811e8e4fce7fd8ca15db8352576e1501c828a14cacad879267000e0a +size 19715173