aws-neuron
/

optimum-neuron-cache

Model card Files Files and versions

xet

Community

615

dacorvo HF Staff commited on Oct 9

Commit

711bb96

verified ·

1 Parent(s): ce1c46e

Update inference-cache-config/llama4.json

Browse files

Files changed (1) hide show

inference-cache-config/llama4.json +108 -0

inference-cache-config/llama4.json CHANGED Viewed

@@ -2,6 +2,7 @@
   "meta-llama/Llama-4-Scout-17B-16E-Instruct": [
     {
       "task": "text-generation",
       "batch_size": 1,
       "sequence_length": 4096,
       "num_cores": 16,
@@ -9,10 +10,117 @@
     },
     {
       "task": "text-generation",
       "batch_size": 4,
       "sequence_length": 4096,
       "num_cores": 16,
       "auto_cast_type": "bf16"
     }
   ]
 }

   "meta-llama/Llama-4-Scout-17B-16E-Instruct": [
     {
       "task": "text-generation",
+      "instance_type": "trn1",
       "batch_size": 1,
       "sequence_length": 4096,
       "num_cores": 16,
     },
     {
       "task": "text-generation",
+      "instance_type": "trn1",
       "batch_size": 4,
       "sequence_length": 4096,
       "num_cores": 16,
       "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn1",
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 32,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn1",
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 32,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn1",
+      "batch_size": 8,
+      "sequence_length": 4096,
+      "num_cores": 32,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn2",
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 32,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn2",
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 32,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn2",
+      "batch_size": 8,
+      "sequence_length": 4096,
+      "num_cores": 32,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn2",
+      "batch_size": 16,
+      "sequence_length": 4096,
+      "num_cores": 32,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn2",
+      "batch_size": 32,
+      "sequence_length": 4096,
+      "num_cores": 32,
+      "auto_cast_type": "bf16"
+    }
+  ],
+  "meta-llama/Llama-4-Maverick-17B-128E-Instruct": [
+    {
+      "task": "text-generation",
+      "instance_type": "trn2",
+      "batch_size": 1,
+      "sequence_length": 4096,
+      "num_cores": 64,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn2",
+      "batch_size": 4,
+      "sequence_length": 4096,
+      "num_cores": 64,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn1",
+      "batch_size": 8,
+      "sequence_length": 4096,
+      "num_cores": 64,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn1",
+      "batch_size": 16,
+      "sequence_length": 4096,
+      "num_cores": 64,
+      "auto_cast_type": "bf16"
+    },
+    {
+      "task": "text-generation",
+      "instance_type": "trn1",
+      "batch_size": 32,
+      "sequence_length": 4096,
+      "num_cores": 64,
+      "auto_cast_type": "bf16"
     }
   ]
 }