rgthelen's picture
Add eval results (95.0%, 475/500, 0 safety violations)
208f452 verified
{
"accuracy": 0.95,
"correct": 475,
"total": 500,
"safety_violations": 0,
"per_tool": {
"drone_look": {
"correct": 41,
"total": 42
},
"drone_mission": {
"correct": 45,
"total": 61
},
"drone_goto": {
"correct": 43,
"total": 46
},
"drone_move": {
"correct": 69,
"total": 69
},
"drone_yaw": {
"correct": 50,
"total": 51
},
"drone_flight": {
"correct": 122,
"total": 123
},
"drone_track": {
"correct": 34,
"total": 34
},
"drone_notify": {
"correct": 24,
"total": 26
},
"drone_scan": {
"correct": 47,
"total": 48
}
},
"failures": [
{
"expected": "drone_notify(message=\"Alt 53.2m, bat 85%, all systems nominal\")",
"predicted": "drone_notify(message=\"Alt 53.2m, bat 85%, all current bat 85%, all current bat 85%, all current bat 85%, all current.",
"tool": "drone_notify",
"user": "858,-97.121 | vision=SCANNING target=vehicle wp=0/0\n[CMD] how's the search going"
},
{
"expected": "drone_mission(action=\"patrol\", waypoints=\"32.854,-97.147;32.86,-97.127;32.846,-97.116;32.847,-97.118\")",
"predicted": "drone_mission(action=\"patrol\", waypoints=\"32.85,-97.149;32.848,-97.122;32.848,-97.122;32.84,-97.124;32.84,-97.124;3",
"tool": "drone_mission",
"user": " pos=32.843,-97.102 | vision=IDLE target=none wp=0/0\n[CMD] patrol these 4 points"
},
{
"expected": "drone_mission(action=\"search\", target=\"person\", waypoints=\"32.855,-97.102;32.87,-97.111;32.849,-97.145;32.853,-97.117\")",
"predicted": "drone_mission(action=\"search\", target=\"person\", waypoints=\"32.85,-97.149;32.848,-97.124;32.84,-97.124;32.84,-97.124;32.84,-97.124",
"tool": "drone_mission",
"user": ",-97.122 | vision=IDLE target=none wp=0/0\n[CMD] search these 4 points for person"
},
{
"expected": "drone_mission(action=\"search\", target=\"animal\", waypoints=\"32.868,-97.127;32.857,-97.142;32.868,-97.129;32.864,-97.125\")",
"predicted": "drone_mission(action=\"search\", target=\"animal\", waypoints=\"32.85,-97.11;32.848,-97.124;32.84,-97.124;32.84,-97.124;32.84,-97.124;",
"tool": "drone_mission",
"user": ",-97.113 | vision=IDLE target=none wp=0/0\n[CMD] search these 4 points for animal"
},
{
"expected": "drone_mission(action=\"search\")",
"predicted": "drone_scan()",
"tool": "drone_mission",
"user": "=5.2m/s pos=32.863,-97.132 | vision=IDLE target=none wp=0/0\n[CMD] sweep the area"
},
{
"expected": "drone_notify(message=\"Alt 89.4m, bat 81%, all systems nominal\")",
"predicted": "drone_notify(message=\"Alt 89.4m, alt 89.4m, range ~10m, tracking\")",
"tool": "drone_notify",
"user": "4\u00b0 spd=2.3m/s pos=32.842,-97.133 | vision=LOST target=person wp=0/0\n[CMD] report"
},
{
"expected": "drone_goto(lat=32.866, lon=-97.125, on_arrival=\"scan\")",
"predicted": "drone_goto(lat=32.853, lon=-97.123, on_arrival=\"scan\")",
"tool": "drone_goto",
"user": ".9m/s pos=32.855,-97.118 | vision=LOST target=any wp=0/0\n[CMD] go there and scan"
},
{
"expected": "drone_goto(lat=32.852, lon=-97.138, on_arrival=\"scan\")",
"predicted": "drone_goto(lat=32.853, lon=-97.124, on_arrival=\"scan\")",
"tool": "drone_goto",
"user": "/s pos=32.862,-97.123 | vision=LOST target=person wp=0/0\n[CMD] go there and scan"
},
{
"expected": "drone_mission(action=\"search\", target=\"person\", waypoints=\"32.865,-97.137;32.859,-97.144\")",
"predicted": "drone_mission(action=\"search\", target=\"person\", waypoints=\"32.85,-97.129;32.848,-97.124;32.848,-97.124;32.84,-97.124;32.84,-97.12",
"tool": "drone_mission",
"user": "7,-97.13 | vision=IDLE target=none wp=0/0\n[CMD] search these 2 points for person"
},
{
"expected": "drone_mission(action=\"search\", target=\"vehicle\", waypoints=\"32.866,-97.134;32.846,-97.112;32.87,-97.134;32.869,-97.13\")",
"predicted": "drone_mission(action=\"search\", target=\"vehicle\", waypoints=\"32.85,-97.149;32.84,-97.124;32.84,-97.124;32.84,-97.124;32.84,-97.124;",
"tool": "drone_mission",
"user": "-97.107 | vision=IDLE target=none wp=0/0\n[CMD] search these 4 points for vehicle"
},
{
"expected": "drone_mission(action=\"search\")",
"predicted": "drone_scan()",
"tool": "drone_mission",
"user": "=3.7m/s pos=32.863,-97.104 | vision=IDLE target=none wp=0/0\n[CMD] sweep the area"
},
{
"expected": "drone_mission(action=\"patrol\", waypoints=\"32.859,-97.144;32.869,-97.147;32.861,-97.122;32.84,-97.126;32.856,-97.141;32.841,-97.137\")",
"predicted": "drone_mission(action=\"patrol\", waypoints=\"32.85,-97.104;32.848,-97.124;32.84,-97.124;32.848,-97.124;32.84,-97.124;3",
"tool": "drone_mission",
"user": "s pos=32.84,-97.107 | vision=IDLE target=none wp=0/0\n[CMD] patrol these 6 points"
},
{
"expected": "drone_mission(action=\"search\", target=\"any\", waypoints=\"32.86,-97.137;32.864,-97.116;32.863,-97.118;32.842,-97.11;32.847,-97.101\")",
"predicted": "drone_mission(action=\"search\", target=\"any\", waypoints=\"32.85,-97.11;32.843,-97.124;32.84,-97.124;32.84,-97.124;32.84,-97.124;",
"tool": "drone_mission",
"user": "868,-97.121 | vision=IDLE target=none wp=0/0\n[CMD] search these 5 points for any"
},
{
"expected": "drone_mission(action=\"search\", target=\"person\")",
"predicted": "drone_scan(lock_on_class=\"person\")",
"tool": "drone_mission",
"user": "32.864,-97.147 | vision=IDLE target=none wp=0/0\n[CMD] search the area for people"
},
{
"expected": "drone_mission(action=\"search\", target=\"animal\", waypoints=\"32.859,-97.105;32.843,-97.101;32.859,-97.138;32.852,-97.143\")",
"predicted": "drone_mission(action=\"search\", target=\"animal\", waypoints=\"32.85,-97.149;32.84,-97.124;32.84,-97.124;32.84,-97.124;32.84,-97.124;",
"tool": "drone_mission",
"user": ",-97.128 | vision=IDLE target=none wp=0/0\n[CMD] search these 4 points for animal"
},
{
"expected": "drone_mission(action=\"search\", target=\"any\", pattern=\"lawnmower\")",
"predicted": "drone_mission(action=\"search\", pattern=\"lawnmower\")",
"tool": "drone_mission",
"user": "4m/s pos=32.859,-97.126 | vision=IDLE target=none wp=0/0\n[CMD] broad area search"
},
{
"expected": "drone_mission(action=\"search\")",
"predicted": "drone_scan()",
"tool": "drone_mission",
"user": "d=0.6m/s pos=32.86,-97.139 | vision=IDLE target=none wp=0/0\n[CMD] sweep the area"
},
{
"expected": "drone_yaw(degrees=360)",
"predicted": "drone_yaw(degrees=90)",
"tool": "drone_yaw",
"user": "pd=0.7m/s pos=32.861,-97.126 | vision=LOST target=any wp=0/0\n[CMD] full rotation"
},
{
"expected": "drone_goto(lat=32.848, lon=-97.109, on_arrival=\"photo\")",
"predicted": "drone_goto(lat=32.853, lon=-97.123, on_arrival=\"photo\")",
"tool": "drone_goto",
"user": "97.103 | vision=IDLE target=none wp=0/0\n[CMD] fly to that point and take a photo"
},
{
"expected": "drone_scan(lock_on_class=\"person\")",
"predicted": "drone_scan(lock_on_class=\"vehicle\")",
"tool": "drone_scan",
"user": "97.132 | vision=IDLE target=none wp=0/0\n[CMD] find someone and keep eyes on them"
},
{
"expected": "drone_mission(action=\"search\", target=\"any\", waypoints=\"32.848,-97.103;32.847,-97.134;32.853,-97.127;32.844,-97.147\")",
"predicted": "drone_mission(action=\"search\", target=\"any\", waypoints=\"32.85,-97.129;32.848,-97.144;32.84,-97.124;32.84,-97.144;32.84,-97.124",
"tool": "drone_mission",
"user": ".86,-97.141 | vision=IDLE target=none wp=0/0\n[CMD] search these 4 points for any"
},
{
"expected": "drone_look(action=\"status\")",
"predicted": "drone_look(action=\"investigate\")",
"tool": "drone_look",
"user": "m/s pos=32.864,-97.116 | vision=LOST target=animal wp=0/0\n[CMD] how high are you"
},
{
"expected": "drone_mission(action=\"search\", target=\"vehicle\", waypoints=\"32.861,-97.111;32.847,-97.131\")",
"predicted": "drone_mission(action=\"search\", target=\"vehicle\", waypoints=\"32.85,-97.11;32.843,-97.124;32.84,-97.124;32.84,-97.124;32.84,-97.124;",
"tool": "drone_mission",
"user": "-97.137 | vision=IDLE target=none wp=0/0\n[CMD] search these 2 points for vehicle"
},
{
"expected": "drone_flight(action=\"rth\")",
"predicted": "drone_move(direction=\"back\", distance_m=5)",
"tool": "drone_flight",
"user": "\u00b0 spd=1.3m/s pos=32.862,-97.144 | vision=IDLE target=none wp=0/0\n[CMD] head back"
},
{
"expected": "drone_mission(action=\"search\", target=\"any\", waypoints=\"32.858,-97.142;32.868,-97.124;32.862,-97.143;32.863,-97.105\")",
"predicted": "drone_mission(action=\"search\", target=\"any\", waypoints=\"32.85,-97.149;32.848,-97.124;32.84,-97.124;32.84,-97.124;32.84,-97.124",
"tool": "drone_mission",
"user": "846,-97.115 | vision=IDLE target=none wp=0/0\n[CMD] search these 4 points for any"
}
]
}