{ "meta-llama/Meta-Llama-3.1-8B": [ { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 4, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 8, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 4, "sequence_length": 4096, "tensor_parallel_size": 8, "instance_type" : "trn1" }, { "batch_size": 8, "sequence_length": 4096, "tensor_parallel_size": 8, "instance_type" : "trn1" }, { "batch_size": 16, "sequence_length": 4096, "tensor_parallel_size": 8, "instance_type" : "trn1" }, { "batch_size": 32, "sequence_length": 4096, "tensor_parallel_size": 8, "instance_type" : "trn1" } ], "meta-llama/Llama-2-7b-hf": [ { "batch_size": 1, "sequence_length": 2048, "tensor_parallel_size": 2, "instance_type" : "trn1" } ], "meta-llama/Llama-2-13b-hf": [ { "batch_size": 1, "sequence_length": 2048, "tensor_parallel_size": 8, "instance_type" : "trn1" } ], "meta-llama/Meta-Llama-3-8B": [ { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" } ], "meta-llama/Llama-3.2-1B": [ { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 4, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" } ], "meta-llama/Llama-3.2-3B": [ { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" }, { "batch_size": 4, "sequence_length": 4096, "tensor_parallel_size": 2, "instance_type" : "trn1" } ], "TinyLlama/TinyLlama-1.1B-Chat-v1.0": [ { "batch_size": 1, "sequence_length": 2048, "tensor_parallel_size": 2, "instance_type" : "trn1" } ], "meta-llama/Llama-3.3-70B-Instruct": [ { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 24, "instance_type" : "trn1" }, { "batch_size": 8, "sequence_length": 4096, "tensor_parallel_size": 24, "instance_type" : "trn1" }, { "batch_size": 1, "sequence_length": 4096, "tensor_parallel_size": 32, "instance_type" : "trn1" }, { "batch_size": 8, "sequence_length": 4096, "tensor_parallel_size": 32, "instance_type" : "trn1" } ] }