NishithP2004 commited on
Commit
f81260f
·
verified ·
1 Parent(s): 0f3fcaa

Upload folder using huggingface_hub

Browse files
openenv_aegis_env.egg-info/PKG-INFO CHANGED
@@ -6,8 +6,8 @@ Requires-Python: >=3.10
6
  Requires-Dist: openenv-core[core]>=0.2.2
7
  Requires-Dist: openai>=1.0.0
8
  Requires-Dist: python-dotenv>=1.0.0
9
- Requires-Dist: datasets>=2.19.0
10
  Requires-Dist: huggingface-hub>=0.23.0
 
11
  Provides-Extra: dev
12
  Requires-Dist: pytest>=8.0.0; extra == "dev"
13
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
 
6
  Requires-Dist: openenv-core[core]>=0.2.2
7
  Requires-Dist: openai>=1.0.0
8
  Requires-Dist: python-dotenv>=1.0.0
 
9
  Requires-Dist: huggingface-hub>=0.23.0
10
+ Requires-Dist: datasets>=2.19.0
11
  Provides-Extra: dev
12
  Requires-Dist: pytest>=8.0.0; extra == "dev"
13
  Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
openenv_aegis_env.egg-info/requires.txt CHANGED
@@ -1,8 +1,8 @@
1
  openenv-core[core]>=0.2.2
2
  openai>=1.0.0
3
  python-dotenv>=1.0.0
4
- datasets>=2.19.0
5
  huggingface-hub>=0.23.0
 
6
 
7
  [dev]
8
  pytest>=8.0.0
 
1
  openenv-core[core]>=0.2.2
2
  openai>=1.0.0
3
  python-dotenv>=1.0.0
 
4
  huggingface-hub>=0.23.0
5
+ datasets>=2.19.0
6
 
7
  [dev]
8
  pytest>=8.0.0
pyproject.toml CHANGED
@@ -18,8 +18,10 @@ dependencies = [
18
  # install from github
19
  # "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
20
  "openenv-core[core]>=0.2.2",
21
- "pymongo>=4.6.0",
22
  "openai>=1.0.0",
 
 
 
23
  # Environment-specific dependencies
24
  # Add all dependencies needed for your environment here
25
  # Examples:
 
18
  # install from github
19
  # "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
20
  "openenv-core[core]>=0.2.2",
 
21
  "openai>=1.0.0",
22
+ "python-dotenv>=1.0.0",
23
+ "huggingface-hub>=0.23.0",
24
+ "datasets>=2.19.0",
25
  # Environment-specific dependencies
26
  # Add all dependencies needed for your environment here
27
  # Examples:
server/aegis_env_environment.py CHANGED
@@ -7,13 +7,16 @@
7
  """
8
  AEGIS-Env: automated grading simulation with deterministic rewards.
9
 
10
- MongoDB is queried only in ``__init__``; ``reset`` and ``step`` are CPU-only.
 
11
  """
12
 
13
  from __future__ import annotations
14
 
 
15
  import os
16
  import random
 
17
  from typing import Any, Dict, List, Optional
18
  from uuid import uuid4
19
 
@@ -46,6 +49,145 @@ def _jaccard(a_text: str, b_text: str) -> float:
46
  return float(inter) / float(union) if union else 0.0
47
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  class AegisEnvironment(Environment[AegisAction, AegisObservation, State]):
50
  """
51
  Single-step grading episode: reset samples a row; step scores the agent output.
@@ -68,33 +210,10 @@ class AegisEnvironment(Environment[AegisAction, AegisObservation, State]):
68
  self.current_reference_feedback: str = ""
69
  self.current_max_score: float = 1.0
70
 
71
- uri = os.environ.get("MONGO_URI")
72
- if not uri:
73
- self._load_error = "MONGO_URI is not set; dataset is empty."
74
- return
75
-
76
  try:
77
- from pymongo import MongoClient # type: ignore[import-untyped]
78
-
79
- client = MongoClient(uri, serverSelectionTimeoutMS=10_000)
80
- try:
81
- coll = client["AEGIS"]["AEGIS-Eval-v2"]
82
- projection = {
83
- "dataset": 1,
84
- "question": 1,
85
- "rubrics": 1,
86
- "student_response": 1,
87
- "max_score": 1,
88
- "obtained_score": 1,
89
- "reference_feedback": 1,
90
- }
91
- cursor = coll.find({}, projection)
92
- for doc in cursor:
93
- self.dataset.append(doc)
94
- finally:
95
- client.close()
96
  except Exception as e:
97
- self._load_error = f"MongoDB load failed: {e!s}"
98
  self.dataset = []
99
 
100
  def reset(
@@ -140,7 +259,7 @@ class AegisEnvironment(Environment[AegisAction, AegisObservation, State]):
140
 
141
  # Store the ground truth for the deterministic reward calculation in step()
142
  self.current_ground_truth = float(selected_record.get("obtained_score", 0.0))
143
- self.current_reference_feedback = str(selected_record.get("reference_feedback", "") or "")
144
  self.current_max_score = float(selected_record.get("max_score", 1.0) or 1.0)
145
 
146
  # Ensure rubrics are handled even if missing (like in ASAP-SAS)
 
7
  """
8
  AEGIS-Env: automated grading simulation with deterministic rewards.
9
 
10
+ The dataset is downloaded from Hugging Face and cached on disk; ``reset`` and
11
+ ``step`` are CPU-only.
12
  """
13
 
14
  from __future__ import annotations
15
 
16
+ import json
17
  import os
18
  import random
19
+ from pathlib import Path
20
  from typing import Any, Dict, List, Optional
21
  from uuid import uuid4
22
 
 
49
  return float(inter) / float(union) if union else 0.0
50
 
51
 
52
+ def _load_dotenv_if_available() -> None:
53
+ try:
54
+ from dotenv import load_dotenv
55
+
56
+ load_dotenv(override=True)
57
+ except Exception:
58
+ pass
59
+
60
+
61
+ def _cache_dir() -> Path:
62
+ # Default to a repo-local cache so it works in sandboxed runners.
63
+ # You can override via AEGIS_CACHE_DIR / HF_HOME / XDG_CACHE_HOME.
64
+ root = (
65
+ os.environ.get("AEGIS_CACHE_DIR")
66
+ or os.environ.get("HF_HOME")
67
+ or os.environ.get("XDG_CACHE_HOME")
68
+ )
69
+ if root:
70
+ return Path(root) / "aegis_env"
71
+ repo_root = Path(__file__).resolve().parents[1]
72
+ return repo_root / ".cache" / "aegis_env"
73
+
74
+
75
+ def _unwrap_object_id(v: Any) -> str:
76
+ # Expected schema: {"$oid": "..."}; tolerate already-string ids.
77
+ if isinstance(v, dict) and "$oid" in v:
78
+ return str(v.get("$oid") or "")
79
+ return str(v or "")
80
+
81
+
82
+ def _unwrap_number(v: Any) -> Optional[float]:
83
+ # Expected schema: number OR {"$numberDouble": "Infinity"/"-Infinity"/"NaN"}.
84
+ if v is None:
85
+ return None
86
+ if isinstance(v, (int, float)):
87
+ return float(v)
88
+ if isinstance(v, dict) and "$numberDouble" in v:
89
+ s = str(v.get("$numberDouble"))
90
+ if s == "Infinity":
91
+ return float("inf")
92
+ if s == "-Infinity":
93
+ return float("-inf")
94
+ if s == "NaN":
95
+ return float("nan")
96
+ try:
97
+ return float(v)
98
+ except Exception:
99
+ return None
100
+
101
+
102
+ def _reference_feedback_from_record(rec: Dict[str, Any]) -> str:
103
+ # New schema stores feedback under evaluation.agent_feedback.
104
+ ev = rec.get("evaluation") or {}
105
+ agent_feedback = (ev.get("agent_feedback") or {}) if isinstance(ev, dict) else {}
106
+ if isinstance(agent_feedback, dict):
107
+ sj = agent_feedback.get("score_justification")
108
+ ia = agent_feedback.get("improvement_advice")
109
+ joined = " ".join([str(x).strip() for x in [sj, ia] if x is not None]).strip()
110
+ if joined:
111
+ return joined
112
+ # Backward-compat: old field name.
113
+ return str(rec.get("reference_feedback") or "")
114
+
115
+
116
+ def _download_dataset_json(repo_id: str, filename: str, revision: Optional[str]) -> Path:
117
+ from huggingface_hub import hf_hub_download # type: ignore[import-not-found]
118
+
119
+ cache_dir = _cache_dir()
120
+ cache_dir.mkdir(parents=True, exist_ok=True)
121
+ try:
122
+ downloaded = hf_hub_download(
123
+ repo_id=repo_id,
124
+ filename=filename,
125
+ repo_type="dataset",
126
+ revision=revision,
127
+ cache_dir=str(cache_dir / "hf"),
128
+ )
129
+ except Exception:
130
+ # In some sandboxed environments, network access to Hugging Face may be blocked.
131
+ # If the file is already present in the global HF cache, fall back to it.
132
+ downloaded = hf_hub_download(
133
+ repo_id=repo_id,
134
+ filename=filename,
135
+ repo_type="dataset",
136
+ revision=revision,
137
+ cache_dir=None,
138
+ local_files_only=True,
139
+ )
140
+ stable_path = cache_dir / f"{repo_id.replace('/', '__')}__{filename}"
141
+ try:
142
+ stable_path.write_bytes(Path(downloaded).read_bytes())
143
+ return stable_path
144
+ except Exception:
145
+ return Path(downloaded)
146
+
147
+
148
+ def _load_dataset_records() -> List[Dict[str, Any]]:
149
+ _load_dotenv_if_available()
150
+
151
+ repo_id = os.environ.get("AEGIS_HF_DATASET_REPO") or "NishithP2004/AEGIS-Eval-v2"
152
+ filename = os.environ.get("AEGIS_HF_DATASET_FILE") or "dataset.json"
153
+ revision = os.environ.get("AEGIS_HF_DATASET_REVISION") or None
154
+ offline = str(os.environ.get("AEGIS_HF_OFFLINE") or "").lower() in {"1", "true", "yes"}
155
+
156
+ cache_dir = _cache_dir()
157
+ stable_path = cache_dir / f"{repo_id.replace('/', '__')}__{filename}"
158
+
159
+ path: Optional[Path] = None
160
+ if stable_path.exists():
161
+ path = stable_path
162
+ elif not offline:
163
+ path = _download_dataset_json(repo_id, filename, revision)
164
+
165
+ if path is None or not path.exists():
166
+ raise RuntimeError(
167
+ f"Dataset cache not found. Expected {stable_path}. "
168
+ f"Set AEGIS_HF_OFFLINE=0 to allow download or provide the cached file."
169
+ )
170
+
171
+ data = json.loads(path.read_text(encoding="utf-8"))
172
+ if isinstance(data, dict) and "data" in data and isinstance(data["data"], list):
173
+ records = data["data"]
174
+ elif isinstance(data, list):
175
+ records = data
176
+ else:
177
+ raise RuntimeError(f"Unexpected dataset.json shape in {path}")
178
+
179
+ out: List[Dict[str, Any]] = []
180
+ for rec in records:
181
+ if not isinstance(rec, dict):
182
+ continue
183
+ norm: Dict[str, Any] = dict(rec)
184
+ norm["_id"] = _unwrap_object_id(rec.get("_id"))
185
+ for k in ("max_score", "min_score", "obtained_score"):
186
+ norm[k] = _unwrap_number(rec.get(k))
187
+ out.append(norm)
188
+ return out
189
+
190
+
191
  class AegisEnvironment(Environment[AegisAction, AegisObservation, State]):
192
  """
193
  Single-step grading episode: reset samples a row; step scores the agent output.
 
210
  self.current_reference_feedback: str = ""
211
  self.current_max_score: float = 1.0
212
 
 
 
 
 
 
213
  try:
214
+ self.dataset = _load_dataset_records()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  except Exception as e:
216
+ self._load_error = f"Dataset load failed: {e!s}"
217
  self.dataset = []
218
 
219
  def reset(
 
259
 
260
  # Store the ground truth for the deterministic reward calculation in step()
261
  self.current_ground_truth = float(selected_record.get("obtained_score", 0.0))
262
+ self.current_reference_feedback = _reference_feedback_from_record(selected_record)
263
  self.current_max_score = float(selected_record.get("max_score", 1.0) or 1.0)
264
 
265
  # Ensure rubrics are handled even if missing (like in ASAP-SAS)