dacorvo HF Staff commited on
Commit
711bb96
·
verified ·
1 Parent(s): ce1c46e

Update inference-cache-config/llama4.json

Browse files
Files changed (1) hide show
  1. inference-cache-config/llama4.json +108 -0
inference-cache-config/llama4.json CHANGED
@@ -2,6 +2,7 @@
2
  "meta-llama/Llama-4-Scout-17B-16E-Instruct": [
3
  {
4
  "task": "text-generation",
 
5
  "batch_size": 1,
6
  "sequence_length": 4096,
7
  "num_cores": 16,
@@ -9,10 +10,117 @@
9
  },
10
  {
11
  "task": "text-generation",
 
12
  "batch_size": 4,
13
  "sequence_length": 4096,
14
  "num_cores": 16,
15
  "auto_cast_type": "bf16"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  }
17
  ]
18
  }
 
2
  "meta-llama/Llama-4-Scout-17B-16E-Instruct": [
3
  {
4
  "task": "text-generation",
5
+ "instance_type": "trn1",
6
  "batch_size": 1,
7
  "sequence_length": 4096,
8
  "num_cores": 16,
 
10
  },
11
  {
12
  "task": "text-generation",
13
+ "instance_type": "trn1",
14
  "batch_size": 4,
15
  "sequence_length": 4096,
16
  "num_cores": 16,
17
  "auto_cast_type": "bf16"
18
+ },
19
+ {
20
+ "task": "text-generation",
21
+ "instance_type": "trn1",
22
+ "batch_size": 1,
23
+ "sequence_length": 4096,
24
+ "num_cores": 32,
25
+ "auto_cast_type": "bf16"
26
+ },
27
+ {
28
+ "task": "text-generation",
29
+ "instance_type": "trn1",
30
+ "batch_size": 4,
31
+ "sequence_length": 4096,
32
+ "num_cores": 32,
33
+ "auto_cast_type": "bf16"
34
+ },
35
+ {
36
+ "task": "text-generation",
37
+ "instance_type": "trn1",
38
+ "batch_size": 8,
39
+ "sequence_length": 4096,
40
+ "num_cores": 32,
41
+ "auto_cast_type": "bf16"
42
+ },
43
+ {
44
+ "task": "text-generation",
45
+ "instance_type": "trn2",
46
+ "batch_size": 1,
47
+ "sequence_length": 4096,
48
+ "num_cores": 32,
49
+ "auto_cast_type": "bf16"
50
+ },
51
+ {
52
+ "task": "text-generation",
53
+ "instance_type": "trn2",
54
+ "batch_size": 4,
55
+ "sequence_length": 4096,
56
+ "num_cores": 32,
57
+ "auto_cast_type": "bf16"
58
+ },
59
+ {
60
+ "task": "text-generation",
61
+ "instance_type": "trn2",
62
+ "batch_size": 8,
63
+ "sequence_length": 4096,
64
+ "num_cores": 32,
65
+ "auto_cast_type": "bf16"
66
+ },
67
+ {
68
+ "task": "text-generation",
69
+ "instance_type": "trn2",
70
+ "batch_size": 16,
71
+ "sequence_length": 4096,
72
+ "num_cores": 32,
73
+ "auto_cast_type": "bf16"
74
+ },
75
+ {
76
+ "task": "text-generation",
77
+ "instance_type": "trn2",
78
+ "batch_size": 32,
79
+ "sequence_length": 4096,
80
+ "num_cores": 32,
81
+ "auto_cast_type": "bf16"
82
+ }
83
+ ],
84
+ "meta-llama/Llama-4-Maverick-17B-128E-Instruct": [
85
+ {
86
+ "task": "text-generation",
87
+ "instance_type": "trn2",
88
+ "batch_size": 1,
89
+ "sequence_length": 4096,
90
+ "num_cores": 64,
91
+ "auto_cast_type": "bf16"
92
+ },
93
+ {
94
+ "task": "text-generation",
95
+ "instance_type": "trn2",
96
+ "batch_size": 4,
97
+ "sequence_length": 4096,
98
+ "num_cores": 64,
99
+ "auto_cast_type": "bf16"
100
+ },
101
+ {
102
+ "task": "text-generation",
103
+ "instance_type": "trn1",
104
+ "batch_size": 8,
105
+ "sequence_length": 4096,
106
+ "num_cores": 64,
107
+ "auto_cast_type": "bf16"
108
+ },
109
+ {
110
+ "task": "text-generation",
111
+ "instance_type": "trn1",
112
+ "batch_size": 16,
113
+ "sequence_length": 4096,
114
+ "num_cores": 64,
115
+ "auto_cast_type": "bf16"
116
+ },
117
+ {
118
+ "task": "text-generation",
119
+ "instance_type": "trn1",
120
+ "batch_size": 32,
121
+ "sequence_length": 4096,
122
+ "num_cores": 64,
123
+ "auto_cast_type": "bf16"
124
  }
125
  ]
126
  }