{ "name": "Rose-Beeper", "context": 512, "vocab_size": 8192, "dim": 512, "n_layers": 6, "n_heads": 8, "mlp_ratio": 4.0, "dropout": 0.0, "resid_dropout": 0.1, "grad_checkpoint": false, "compile_model": false, "tokenizer_path": "beeper.tokenizer.json", "add_bos_eos": true, "span_corrupt_frac": 0.0, "val_ratio": 0.01, "test_ratio": 0.01, "max_rows_per_dataset": null, "dataset_cache_verbose": true, "batch_size": 32, "grad_accum_steps": 1, "epochs": 3, "lr": 0.0003, "betas": [ 0.9, 0.95 ], "weight_decay": 0.1, "warmup_steps": 500, "max_steps": null, "clip_grad": 1.0, "min_lr": 1e-06, "label_smoothing": 0.0, "mixed_precision": "bf16", "log_dir": "./runs/rose_beeper", "log_interval": 50, "ckpt_dir": "./beeper_checkpoints", "export_dir": "./beeper_export", "temperature": 0.9, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1, "presence_penalty": 0.6, "frequency_penalty": 0.0, "hf_repo": "AbstractPhil/beeper-rose-v2", "upload_to_hub": true, "pent_level": "medium", "lambda_contrast": 0.25, "pent_min_edge": 0.5, "pent_temp": 0.1, "contrast_warmup": 800, "lambda_rose": 0.1, "rose_scale": 1.8, "lambda_geom_sep": 0.5, "geom_sep_margin": 0.9, "lambda_geom": 0.3, "lambda_geom_angle": 0.8, "lambda_geom_var": 0.3, "lambda_geom_edge": 0.3, "lambda_geom_vol": 0.6, "lambda_geom_minrel": 1.0, "geom_min_edge_rel": 0.6, "geom_vol_lower_frac": 0.85, "geom_sample_classes": 64, "geom_sample_k": 64, "seed": 1337, "corpus": [ { "name": "TinyStories", "path": "roneneldan/TinyStories", "split": "train[10%:20%]", "weight": 0.1, "dialect": [ 0.6, 0.1, 0.05, 0.05, 0.2 ] }, { "name": "WikipediaEN", "path": "wikimedia/wikipedia", "config": "20231101.en", "split": "train[1%:2%]", "weight": 0.6, "dialect": [ 0.12, 0.58, 0.1, 0.1, 0.1 ] }, { "name": "AGNews", "path": "ag_news", "split": "train[:50%]", "weight": 0.4, "dialect": [ 0.2, 0.5, 0.1, 0.1, 0.1 ] }, { "name": "GSM8K", "path": "openai/gsm8k", "config": "main", "split": "train[10%:20%]", "weight": 0.6, "dialect": [ 0.1, 0.15, 0.5, 0.15, 0.1 ] }, { "name": "AI2-ARC-Easy", "path": "allenai/ai2_arc", "config": "ARC-Easy", "split": "train[10%:20%]", "weight": 0.5, "dialect": [ 0.05, 0.15, 0.4, 0.25, 0.15 ] }, { "name": "HH-RLHF", "path": "Anthropic/hh-rlhf", "split": "train[1%:2%]", "weight": 0.6, "dialect": [ 0.1, 0.25, 0.2, 0.25, 0.2 ] }, { "name": "SVAMP", "path": "ChilleD/SVAMP", "split": "train", "weight": 0.45, "dialect": [ 0.1, 0.15, 0.55, 0.15, 0.05 ] }, { "name": "MATH-500", "path": "HuggingFaceH4/MATH-500", "split": "test", "weight": 0.15, "dialect": [ 0.05, 0.15, 0.6, 0.15, 0.05 ] }, { "name": "SEP", "path": "AiresPucrs/stanford-encyclopedia-philosophy", "split": "train", "weight": 0.3, "dialect": [ 0.05, 0.45, 0.18, 0.22, 0.1 ] }, { "name": "ETHICS-commonsense", "path": "hendrycks/ethics", "config": "commonsense", "split": "train", "weight": 0.45, "dialect": [ 0.1, 0.3, 0.18, 0.24, 0.18 ] }, { "name": "ETHICS-deontology", "path": "hendrycks/ethics", "config": "deontology", "split": "train", "weight": 0.35, "dialect": [ 0.1, 0.3, 0.18, 0.24, 0.18 ] }, { "name": "ETHICS-justice", "path": "hendrycks/ethics", "config": "justice", "split": "train", "weight": 0.35, "dialect": [ 0.1, 0.3, 0.18, 0.24, 0.18 ] }, { "name": "ETHICS-utilitarianism", "path": "hendrycks/ethics", "config": "utilitarianism", "split": "train", "weight": 0.35, "dialect": [ 0.1, 0.3, 0.18, 0.24, 0.18 ] }, { "name": "ETHICS-virtue", "path": "hendrycks/ethics", "config": "virtue", "split": "train", "weight": 0.35, "dialect": [ 0.1, 0.3, 0.18, 0.24, 0.18 ] }, { "name": "SocialChem101", "path": "allenai/social-chemistry-101", "split": "train", "weight": 0.65, "dialect": [ 0.15, 0.25, 0.2, 0.2, 0.2 ] }, { "name": "MoralStories", "path": "demelin/moral_stories", "split": "train", "weight": 0.35, "dialect": [ 0.2, 0.2, 0.2, 0.2, 0.2 ] }, { "name": "ART-AbductiveNLI", "path": "allenai/art", "split": "train", "weight": 0.3, "dialect": [ 0.05, 0.2, 0.45, 0.2, 0.1 ] }, { "name": "EntailmentBankV3", "path": "ariesutiono/entailment-bank-v3", "split": "train", "weight": 0.35, "dialect": [ 0.05, 0.25, 0.45, 0.15, 0.1 ] }, { "name": "LogiQA2.0NLI", "path": "tasksource/logiqa-2.0-nli", "split": "train", "weight": 0.45, "dialect": [ 0.05, 0.25, 0.45, 0.15, 0.1 ] }, { "name": "TruthfulQA-MC", "path": "EleutherAI/truthful_qa_mc", "split": "validation", "weight": 0.25, "dialect": [ 0.05, 0.35, 0.25, 0.25, 0.1 ] }, { "name": "VUA20-Metaphor", "path": "CreativeLang/vua20_metaphor", "split": "train[:5%]", "weight": 0.3, "dialect": [ 0.3, 0.1, 0.1, 0.15, 0.35 ] } ], "capoera": { "enable": true, "topic_bins": 512, "mood_bins": 7 }, "_ok_entries": [ { "name": "TinyStories", "path": "roneneldan/TinyStories", "split": "train[10%:20%]", "weight": 0.1, "dialect": [ 0.6000000238418579, 0.10000000149011612, 0.05000000074505806, 0.05000000074505806, 0.20000000298023224 ], "class_id": 0, "p": 0.012195121951219514 }, { "name": "WikipediaEN", "path": "wikimedia/wikipedia", "config": "20231101.en", "split": "train[1%:2%]", "weight": 0.6, "dialect": [ 0.11999999731779099, 0.5799999833106995, 0.10000000149011612, 0.10000000149011612, 0.10000000149011612 ], "class_id": 1, "p": 0.07317073170731708 }, { "name": "AGNews", "path": "ag_news", "split": "train[:50%]", "weight": 0.4, "dialect": [ 0.20000000298023224, 0.5, 0.10000000149011612, 0.10000000149011612, 0.10000000149011612 ], "class_id": 2, "p": 0.04878048780487806 }, { "name": "GSM8K", "path": "openai/gsm8k", "config": "main", "split": "train[10%:20%]", "weight": 0.6, "dialect": [ 0.10000000149011612, 0.15000000596046448, 0.5, 0.15000000596046448, 0.10000000149011612 ], "class_id": 3, "p": 0.07317073170731708 }, { "name": "AI2-ARC-Easy", "path": "allenai/ai2_arc", "config": "ARC-Easy", "split": "train[10%:20%]", "weight": 0.5, "dialect": [ 0.05000000074505806, 0.15000000596046448, 0.4000000059604645, 0.25, 0.15000000596046448 ], "class_id": 4, "p": 0.06097560975609757 }, { "name": "HH-RLHF", "path": "Anthropic/hh-rlhf", "split": "train[1%:2%]", "weight": 0.6, "dialect": [ 0.10000000149011612, 0.25, 0.20000000298023224, 0.25, 0.20000000298023224 ], "class_id": 5, "p": 0.07317073170731708 }, { "name": "SVAMP", "path": "ChilleD/SVAMP", "split": "train", "weight": 0.45, "dialect": [ 0.10000000149011612, 0.15000000596046448, 0.550000011920929, 0.15000000596046448, 0.05000000074505806 ], "class_id": 6, "p": 0.05487804878048781 }, { "name": "MATH-500", "path": "HuggingFaceH4/MATH-500", "split": "test", "weight": 0.15, "dialect": [ 0.05000000074505806, 0.15000000596046448, 0.6000000238418579, 0.15000000596046448, 0.05000000074505806 ], "class_id": 7, "p": 0.01829268292682927 }, { "name": "SEP", "path": "AiresPucrs/stanford-encyclopedia-philosophy", "split": "train", "weight": 0.3, "dialect": [ 0.05000000074505806, 0.44999998807907104, 0.18000000715255737, 0.2199999988079071, 0.10000000149011612 ], "class_id": 8, "p": 0.03658536585365854 }, { "name": "ETHICS-commonsense", "path": "hendrycks/ethics", "config": "commonsense", "split": "train", "weight": 0.45, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 9, "p": 0.05487804878048781 }, { "name": "ETHICS-deontology", "path": "hendrycks/ethics", "config": "deontology", "split": "train", "weight": 0.35, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 10, "p": 0.042682926829268296 }, { "name": "ETHICS-justice", "path": "hendrycks/ethics", "config": "justice", "split": "train", "weight": 0.35, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 11, "p": 0.042682926829268296 }, { "name": "ETHICS-utilitarianism", "path": "hendrycks/ethics", "config": "utilitarianism", "split": "train", "weight": 0.35, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 12, "p": 0.042682926829268296 }, { "name": "ETHICS-virtue", "path": "hendrycks/ethics", "config": "virtue", "split": "train", "weight": 0.35, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 13, "p": 0.042682926829268296 }, { "name": "SocialChem101", "path": "allenai/social-chemistry-101", "split": "train", "weight": 0.65, "dialect": [ 0.15000000596046448, 0.25, 0.20000000298023224, 0.20000000298023224, 0.20000000298023224 ], "class_id": 14, "p": 0.07926829268292684 }, { "name": "MoralStories", "path": "demelin/moral_stories", "split": "train", "weight": 0.35, "dialect": [ 0.20000000298023224, 0.20000000298023224, 0.20000000298023224, 0.20000000298023224, 0.20000000298023224 ], "class_id": 15, "p": 0.042682926829268296 }, { "name": "ART-AbductiveNLI", "path": "allenai/art", "split": "train", "weight": 0.3, "dialect": [ 0.05000000074505806, 0.20000000298023224, 0.44999998807907104, 0.20000000298023224, 0.10000000149011612 ], "class_id": 16, "p": 0.03658536585365854 }, { "name": "EntailmentBankV3", "path": "ariesutiono/entailment-bank-v3", "split": "train", "weight": 0.35, "dialect": [ 0.05000000074505806, 0.25, 0.44999998807907104, 0.15000000596046448, 0.10000000149011612 ], "class_id": 17, "p": 0.042682926829268296 }, { "name": "LogiQA2.0NLI", "path": "tasksource/logiqa-2.0-nli", "split": "train", "weight": 0.45, "dialect": [ 0.05000000074505806, 0.25, 0.44999998807907104, 0.15000000596046448, 0.10000000149011612 ], "class_id": 18, "p": 0.05487804878048781 }, { "name": "TruthfulQA-MC", "path": "EleutherAI/truthful_qa_mc", "split": "validation", "weight": 0.25, "dialect": [ 0.05000000074505806, 0.3499999940395355, 0.25, 0.25, 0.10000000149011612 ], "class_id": 19, "p": 0.030487804878048783 }, { "name": "VUA20-Metaphor", "path": "CreativeLang/vua20_metaphor", "split": "train[:5%]", "weight": 0.3, "dialect": [ 0.30000001192092896, 0.10000000149011612, 0.10000000149011612, 0.15000000596046448, 0.3499999940395355 ], "class_id": 20, "p": 0.03658536585365854 } ], "_alive_entries": [ { "name": "TinyStories", "path": "roneneldan/TinyStories", "split": "train[10%:20%]", "weight": 0.1, "dialect": [ 0.6000000238418579, 0.10000000149011612, 0.05000000074505806, 0.05000000074505806, 0.20000000298023224 ], "class_id": 0, "p": 0.012195121951219514 }, { "name": "WikipediaEN", "path": "wikimedia/wikipedia", "config": "20231101.en", "split": "train[1%:2%]", "weight": 0.6, "dialect": [ 0.11999999731779099, 0.5799999833106995, 0.10000000149011612, 0.10000000149011612, 0.10000000149011612 ], "class_id": 1, "p": 0.07317073170731708 }, { "name": "AGNews", "path": "ag_news", "split": "train[:50%]", "weight": 0.4, "dialect": [ 0.20000000298023224, 0.5, 0.10000000149011612, 0.10000000149011612, 0.10000000149011612 ], "class_id": 2, "p": 0.04878048780487806 }, { "name": "GSM8K", "path": "openai/gsm8k", "config": "main", "split": "train[10%:20%]", "weight": 0.6, "dialect": [ 0.10000000149011612, 0.15000000596046448, 0.5, 0.15000000596046448, 0.10000000149011612 ], "class_id": 3, "p": 0.07317073170731708 }, { "name": "AI2-ARC-Easy", "path": "allenai/ai2_arc", "config": "ARC-Easy", "split": "train[10%:20%]", "weight": 0.5, "dialect": [ 0.05000000074505806, 0.15000000596046448, 0.4000000059604645, 0.25, 0.15000000596046448 ], "class_id": 4, "p": 0.06097560975609757 }, { "name": "HH-RLHF", "path": "Anthropic/hh-rlhf", "split": "train[1%:2%]", "weight": 0.6, "dialect": [ 0.10000000149011612, 0.25, 0.20000000298023224, 0.25, 0.20000000298023224 ], "class_id": 5, "p": 0.07317073170731708 }, { "name": "SVAMP", "path": "ChilleD/SVAMP", "split": "train", "weight": 0.45, "dialect": [ 0.10000000149011612, 0.15000000596046448, 0.550000011920929, 0.15000000596046448, 0.05000000074505806 ], "class_id": 6, "p": 0.05487804878048781 }, { "name": "MATH-500", "path": "HuggingFaceH4/MATH-500", "split": "test", "weight": 0.15, "dialect": [ 0.05000000074505806, 0.15000000596046448, 0.6000000238418579, 0.15000000596046448, 0.05000000074505806 ], "class_id": 7, "p": 0.01829268292682927 }, { "name": "SEP", "path": "AiresPucrs/stanford-encyclopedia-philosophy", "split": "train", "weight": 0.3, "dialect": [ 0.05000000074505806, 0.44999998807907104, 0.18000000715255737, 0.2199999988079071, 0.10000000149011612 ], "class_id": 8, "p": 0.03658536585365854 }, { "name": "ETHICS-commonsense", "path": "hendrycks/ethics", "config": "commonsense", "split": "train", "weight": 0.45, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 9, "p": 0.05487804878048781 }, { "name": "ETHICS-deontology", "path": "hendrycks/ethics", "config": "deontology", "split": "train", "weight": 0.35, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 10, "p": 0.042682926829268296 }, { "name": "ETHICS-justice", "path": "hendrycks/ethics", "config": "justice", "split": "train", "weight": 0.35, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 11, "p": 0.042682926829268296 }, { "name": "ETHICS-utilitarianism", "path": "hendrycks/ethics", "config": "utilitarianism", "split": "train", "weight": 0.35, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 12, "p": 0.042682926829268296 }, { "name": "ETHICS-virtue", "path": "hendrycks/ethics", "config": "virtue", "split": "train", "weight": 0.35, "dialect": [ 0.10000000149011612, 0.30000001192092896, 0.18000000715255737, 0.23999999463558197, 0.18000000715255737 ], "class_id": 13, "p": 0.042682926829268296 }, { "name": "SocialChem101", "path": "allenai/social-chemistry-101", "split": "train", "weight": 0.65, "dialect": [ 0.15000000596046448, 0.25, 0.20000000298023224, 0.20000000298023224, 0.20000000298023224 ], "class_id": 14, "p": 0.07926829268292684 }, { "name": "MoralStories", "path": "demelin/moral_stories", "split": "train", "weight": 0.35, "dialect": [ 0.20000000298023224, 0.20000000298023224, 0.20000000298023224, 0.20000000298023224, 0.20000000298023224 ], "class_id": 15, "p": 0.042682926829268296 }, { "name": "ART-AbductiveNLI", "path": "allenai/art", "split": "train", "weight": 0.3, "dialect": [ 0.05000000074505806, 0.20000000298023224, 0.44999998807907104, 0.20000000298023224, 0.10000000149011612 ], "class_id": 16, "p": 0.03658536585365854 }, { "name": "LogiQA2.0NLI", "path": "tasksource/logiqa-2.0-nli", "split": "train", "weight": 0.45, "dialect": [ 0.05000000074505806, 0.25, 0.44999998807907104, 0.15000000596046448, 0.10000000149011612 ], "class_id": 18, "p": 0.05487804878048781 }, { "name": "TruthfulQA-MC", "path": "EleutherAI/truthful_qa_mc", "split": "validation", "weight": 0.25, "dialect": [ 0.05000000074505806, 0.3499999940395355, 0.25, 0.25, 0.10000000149011612 ], "class_id": 19, "p": 0.030487804878048783 }, { "name": "VUA20-Metaphor", "path": "CreativeLang/vua20_metaphor", "split": "train[:5%]", "weight": 0.3, "dialect": [ 0.30000001192092896, 0.10000000149011612, 0.10000000149011612, 0.15000000596046448, 0.3499999940395355 ], "class_id": 20, "p": 0.03658536585365854 } ] }