File size: 2,105 Bytes
a2083b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
cluster_args:
  cluster_description: null
  cluster_method: agglomerative
  compression_ratio: 0.2
  expert_sim: ttm
  frequency_penalty: true
  linkage_method: average
  max_cluster_size: null
  multi_layer: null
  num_clusters: null
  singleton_outlier_experts: false
  singleton_super_experts: false
  softmax_temperature: null
ds_args:
  dataset_config_name: null
  dataset_name: theblackcat102/evol-codealpaca-v1:1000,open-r1/Mixture-of-Thoughts[code]:3480,open-r1/Mixture-of-Thoughts[math]:3578,open-r1/Mixture-of-Thoughts[science]:3576,Salesforce/xlam-function-calling-60k:1000,SWE-bench/SWE-smith-trajectories(tool):1000,qiaojin/PubMedQA[pqa_labeled]:800,derek-thomas/ScienceQA:800,openai/gsm8k[main]:4466,HuggingFaceH4/MATH-500(test):500,evalplus/humanevalplus(test):164,theblackcat102/evol-codealpaca-v1:1636
  dataset_test_split: test
  shuffle: true
  split: train
eval_args:
  evalplus_tasks:
  - mbpp
  - humaneval
  greedy: true
  lm_eval_tasks:
  - winogrande
  - arc_challenge
  - arc_easy
  - boolq
  - hellaswag
  - mmlu
  - openbookqa
  - rte
  min_p: 0.0
  parallel_tasks: 32
  results_dir: null
  run_evalplus: true
  run_livecodebench: true
  run_lm_eval: true
  run_math: false
  run_wildbench: false
  server_log_file_name: server.log
  temperature: 0.7
  top_k: 20
  top_p: 0.8
  use_server: true
  vllm_port: 8000
model_args:
  model_name: /mnt/llm_models/gemma-4-26B-A4B-it
  num_experts_per_tok_override: null
obs_args:
  batch_size: 1
  distance_measure: angular
  model_max_length: 16384
  output_file_name: observations_22k_reap_gemma4.pt
  overwrite_observations: false
  record_pruning_metrics_only: true
  renormalize_router_weights: true
  return_vllm_tokens_prompt: false
  samples_per_category: 1024
  select_only_categories: null
  split_by_category: false
  truncate: false
prune_args:
  n_experts_to_prune: null
  overwrite_pruned_model: false
  perserve_outliers: false
  perserve_super_experts: false
  prune_method: reap
reap_args:
  debug: false
  do_eval: false
  plot_clusters: true
  profile: false
  run_observer_only: false
  seed: 42
  smoke_test: false