Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -228,10 +228,10 @@ def battle_arena(prompt):
|
|
| 228 |
|
| 229 |
# Check for API errors in responses
|
| 230 |
if any("Error: Unable to get response from the model" in msg["content"]
|
| 231 |
-
for msg in response_a + response_b
|
| 232 |
if msg["role"] == "assistant"):
|
| 233 |
return (
|
| 234 |
-
[], [], None, None,
|
| 235 |
gr.update(value=[]),
|
| 236 |
gr.update(value=[]),
|
| 237 |
gr.update(interactive=False, value="Voting Disabled - API Error"),
|
|
@@ -247,10 +247,11 @@ def battle_arena(prompt):
|
|
| 247 |
nickname_b = random.choice(config.model_nicknames)
|
| 248 |
|
| 249 |
# The responses are already in the correct format, no need to reformat
|
|
|
|
| 250 |
if random.choice([True, False]):
|
| 251 |
-
logger.warning(f"NOT SWAPPED: left={model_a}, right={model_b}")
|
| 252 |
return (
|
| 253 |
-
response_a, response_b, model_a, model_b,
|
| 254 |
gr.update(label=nickname_a, value=response_a),
|
| 255 |
gr.update(label=nickname_b, value=response_b),
|
| 256 |
gr.update(interactive=True, value=f"Vote for {nickname_a}"),
|
|
@@ -262,9 +263,9 @@ def battle_arena(prompt):
|
|
| 262 |
gr.update(value="Ready for your vote! π³οΈ", visible=True)
|
| 263 |
)
|
| 264 |
else:
|
| 265 |
-
logger.warning(f"SWAPPED: left={model_b}, right={model_a}")
|
| 266 |
return (
|
| 267 |
-
response_b, response_a, model_b, model_a,
|
| 268 |
gr.update(label=nickname_a, value=response_b),
|
| 269 |
gr.update(label=nickname_b, value=response_a),
|
| 270 |
gr.update(interactive=True, value=f"Vote for {nickname_a}"),
|
|
@@ -276,7 +277,7 @@ def battle_arena(prompt):
|
|
| 276 |
gr.update(value="Ready for your vote! π³οΈ", visible=True)
|
| 277 |
)
|
| 278 |
|
| 279 |
-
def record_vote(prompt, left_response, right_response, left_model, right_model,
|
| 280 |
# Check if outputs are generated
|
| 281 |
if not left_response or not right_response or not left_model or not right_model:
|
| 282 |
return (
|
|
@@ -291,8 +292,20 @@ def record_vote(prompt, left_response, right_response, left_model, right_model,
|
|
| 291 |
gr.update()
|
| 292 |
)
|
| 293 |
|
| 294 |
-
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
# Update the leaderboard
|
| 298 |
battle_results = update_leaderboard(winner, loser)
|
|
@@ -302,9 +315,9 @@ def record_vote(prompt, left_response, right_response, left_model, right_model,
|
|
| 302 |
|
| 303 |
result_message = f"""
|
| 304 |
π Vote recorded! You're awesome! π
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
π And the champion you picked is... {get_human_readable_name(winner)}! π₯
|
| 308 |
"""
|
| 309 |
|
| 310 |
return (
|
|
@@ -327,8 +340,10 @@ def new_battle():
|
|
| 327 |
"", # Reset prompt_input
|
| 328 |
gr.update(value=[], label=nickname_a), # Reset left Chatbot
|
| 329 |
gr.update(value=[], label=nickname_b), # Reset right Chatbot
|
| 330 |
-
None,
|
| 331 |
-
None,
|
|
|
|
|
|
|
| 332 |
gr.update(interactive=False, value=f"Vote for {nickname_a}"),
|
| 333 |
gr.update(interactive=False, value=f"Vote for {nickname_b}"),
|
| 334 |
gr.update(interactive=False, visible=False), # Reset Tie button
|
|
@@ -461,8 +476,11 @@ with gr.Blocks() as demo:
|
|
| 461 |
left_model = gr.Textbox(label="π΅ Left Model", interactive=False)
|
| 462 |
right_model = gr.Textbox(label="π΄ Right Model", interactive=False)
|
| 463 |
|
| 464 |
-
|
| 465 |
-
|
|
|
|
|
|
|
|
|
|
| 466 |
|
| 467 |
new_battle_btn = gr.Button("New Battle")
|
| 468 |
|
|
@@ -497,22 +515,26 @@ with gr.Blocks() as demo:
|
|
| 497 |
battle_arena,
|
| 498 |
inputs=prompt_input,
|
| 499 |
outputs=[
|
| 500 |
-
left_output, right_output, left_model, right_model,
|
| 501 |
left_output, right_output, left_vote_btn, right_vote_btn,
|
| 502 |
tie_btn, previous_prompt, tie_count, model_names_row, result
|
| 503 |
]
|
| 504 |
)
|
| 505 |
|
| 506 |
left_vote_btn.click(
|
| 507 |
-
lambda
|
| 508 |
-
|
|
|
|
|
|
|
| 509 |
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 510 |
right_vote_btn, tie_btn, model_names_row, battles_counter, submit_btn]
|
| 511 |
)
|
| 512 |
|
| 513 |
right_vote_btn.click(
|
| 514 |
-
lambda
|
| 515 |
-
|
|
|
|
|
|
|
| 516 |
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 517 |
right_vote_btn, tie_btn, model_names_row, battles_counter, submit_btn]
|
| 518 |
)
|
|
@@ -526,7 +548,7 @@ with gr.Blocks() as demo:
|
|
| 526 |
new_battle_btn.click(
|
| 527 |
new_battle,
|
| 528 |
outputs=[prompt_input, left_output, right_output, left_model,
|
| 529 |
-
right_model, left_vote_btn, right_vote_btn, tie_btn,
|
| 530 |
result, leaderboard, model_names_row, elo_leaderboard, tie_count, submit_btn]
|
| 531 |
)
|
| 532 |
|
|
|
|
| 228 |
|
| 229 |
# Check for API errors in responses
|
| 230 |
if any("Error: Unable to get response from the model" in msg["content"]
|
| 231 |
+
for msg in response_a + response_b
|
| 232 |
if msg["role"] == "assistant"):
|
| 233 |
return (
|
| 234 |
+
[], [], None, None, None, None,
|
| 235 |
gr.update(value=[]),
|
| 236 |
gr.update(value=[]),
|
| 237 |
gr.update(interactive=False, value="Voting Disabled - API Error"),
|
|
|
|
| 247 |
nickname_b = random.choice(config.model_nicknames)
|
| 248 |
|
| 249 |
# The responses are already in the correct format, no need to reformat
|
| 250 |
+
# Randomly swap positions for blind testing
|
| 251 |
if random.choice([True, False]):
|
| 252 |
+
logger.warning(f"NOT SWAPPED: left={model_a}({nickname_a}), right={model_b}({nickname_b})")
|
| 253 |
return (
|
| 254 |
+
response_a, response_b, model_a, model_b, nickname_a, nickname_b,
|
| 255 |
gr.update(label=nickname_a, value=response_a),
|
| 256 |
gr.update(label=nickname_b, value=response_b),
|
| 257 |
gr.update(interactive=True, value=f"Vote for {nickname_a}"),
|
|
|
|
| 263 |
gr.update(value="Ready for your vote! π³οΈ", visible=True)
|
| 264 |
)
|
| 265 |
else:
|
| 266 |
+
logger.warning(f"SWAPPED: left={model_b}({nickname_a}), right={model_a}({nickname_b})")
|
| 267 |
return (
|
| 268 |
+
response_b, response_a, model_b, model_a, nickname_a, nickname_b,
|
| 269 |
gr.update(label=nickname_a, value=response_b),
|
| 270 |
gr.update(label=nickname_b, value=response_a),
|
| 271 |
gr.update(interactive=True, value=f"Vote for {nickname_a}"),
|
|
|
|
| 277 |
gr.update(value="Ready for your vote! π³οΈ", visible=True)
|
| 278 |
)
|
| 279 |
|
| 280 |
+
def record_vote(prompt, left_response, right_response, left_model, right_model, left_nickname, right_nickname, voted_nickname, battles_display, submit_btn_state):
|
| 281 |
# Check if outputs are generated
|
| 282 |
if not left_response or not right_response or not left_model or not right_model:
|
| 283 |
return (
|
|
|
|
| 292 |
gr.update()
|
| 293 |
)
|
| 294 |
|
| 295 |
+
# Determine winner based on the voted nickname
|
| 296 |
+
if voted_nickname == left_nickname:
|
| 297 |
+
winner = left_model
|
| 298 |
+
loser = right_model
|
| 299 |
+
winner_nickname = left_nickname
|
| 300 |
+
loser_nickname = right_nickname
|
| 301 |
+
else:
|
| 302 |
+
winner = right_model
|
| 303 |
+
loser = left_model
|
| 304 |
+
winner_nickname = right_nickname
|
| 305 |
+
loser_nickname = left_nickname
|
| 306 |
+
|
| 307 |
+
logger.warning(f"VOTE: voted_nickname={voted_nickname}, left={left_nickname}({left_model}), right={right_nickname}({right_model})")
|
| 308 |
+
logger.warning(f"VOTE RESULT: winner={winner_nickname}({winner}), loser={loser_nickname}({loser})")
|
| 309 |
|
| 310 |
# Update the leaderboard
|
| 311 |
battle_results = update_leaderboard(winner, loser)
|
|
|
|
| 315 |
|
| 316 |
result_message = f"""
|
| 317 |
π Vote recorded! You're awesome! π
|
| 318 |
+
π€ **{left_nickname}** was actually: {get_human_readable_name(left_model)}
|
| 319 |
+
π€ **{right_nickname}** was actually: {get_human_readable_name(right_model)}
|
| 320 |
+
π And the champion you picked is... **{winner_nickname}** ({get_human_readable_name(winner)})! π₯
|
| 321 |
"""
|
| 322 |
|
| 323 |
return (
|
|
|
|
| 340 |
"", # Reset prompt_input
|
| 341 |
gr.update(value=[], label=nickname_a), # Reset left Chatbot
|
| 342 |
gr.update(value=[], label=nickname_b), # Reset right Chatbot
|
| 343 |
+
None, # left_model
|
| 344 |
+
None, # right_model
|
| 345 |
+
None, # left_nickname_state
|
| 346 |
+
None, # right_nickname_state
|
| 347 |
gr.update(interactive=False, value=f"Vote for {nickname_a}"),
|
| 348 |
gr.update(interactive=False, value=f"Vote for {nickname_b}"),
|
| 349 |
gr.update(interactive=False, visible=False), # Reset Tie button
|
|
|
|
| 476 |
left_model = gr.Textbox(label="π΅ Left Model", interactive=False)
|
| 477 |
right_model = gr.Textbox(label="π΄ Right Model", interactive=False)
|
| 478 |
|
| 479 |
+
# State variables
|
| 480 |
+
previous_prompt = gr.State("")
|
| 481 |
+
tie_count = gr.State(0)
|
| 482 |
+
left_nickname_state = gr.State("")
|
| 483 |
+
right_nickname_state = gr.State("")
|
| 484 |
|
| 485 |
new_battle_btn = gr.Button("New Battle")
|
| 486 |
|
|
|
|
| 515 |
battle_arena,
|
| 516 |
inputs=prompt_input,
|
| 517 |
outputs=[
|
| 518 |
+
left_output, right_output, left_model, right_model, left_nickname_state, right_nickname_state,
|
| 519 |
left_output, right_output, left_vote_btn, right_vote_btn,
|
| 520 |
tie_btn, previous_prompt, tie_count, model_names_row, result
|
| 521 |
]
|
| 522 |
)
|
| 523 |
|
| 524 |
left_vote_btn.click(
|
| 525 |
+
lambda prompt, left_resp, right_resp, left_mod, right_mod, left_nick, right_nick, battles, submit: record_vote(
|
| 526 |
+
prompt, left_resp, right_resp, left_mod, right_mod, left_nick, right_nick, left_nick, battles, submit
|
| 527 |
+
),
|
| 528 |
+
inputs=[prompt_input, left_output, right_output, left_model, right_model, left_nickname_state, right_nickname_state, battles_counter, submit_btn],
|
| 529 |
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 530 |
right_vote_btn, tie_btn, model_names_row, battles_counter, submit_btn]
|
| 531 |
)
|
| 532 |
|
| 533 |
right_vote_btn.click(
|
| 534 |
+
lambda prompt, left_resp, right_resp, left_mod, right_mod, left_nick, right_nick, battles, submit: record_vote(
|
| 535 |
+
prompt, left_resp, right_resp, left_mod, right_mod, left_nick, right_nick, right_nick, battles, submit
|
| 536 |
+
),
|
| 537 |
+
inputs=[prompt_input, left_output, right_output, left_model, right_model, left_nickname_state, right_nickname_state, battles_counter, submit_btn],
|
| 538 |
outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
|
| 539 |
right_vote_btn, tie_btn, model_names_row, battles_counter, submit_btn]
|
| 540 |
)
|
|
|
|
| 548 |
new_battle_btn.click(
|
| 549 |
new_battle,
|
| 550 |
outputs=[prompt_input, left_output, right_output, left_model,
|
| 551 |
+
right_model, left_nickname_state, right_nickname_state, left_vote_btn, right_vote_btn, tie_btn,
|
| 552 |
result, leaderboard, model_names_row, elo_leaderboard, tie_count, submit_btn]
|
| 553 |
)
|
| 554 |
|