k-mktr commited on
Commit
635b518
Β·
verified Β·
1 Parent(s): 2bc6aeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -22
app.py CHANGED
@@ -228,10 +228,10 @@ def battle_arena(prompt):
228
 
229
  # Check for API errors in responses
230
  if any("Error: Unable to get response from the model" in msg["content"]
231
- for msg in response_a + response_b
232
  if msg["role"] == "assistant"):
233
  return (
234
- [], [], None, None,
235
  gr.update(value=[]),
236
  gr.update(value=[]),
237
  gr.update(interactive=False, value="Voting Disabled - API Error"),
@@ -247,10 +247,11 @@ def battle_arena(prompt):
247
  nickname_b = random.choice(config.model_nicknames)
248
 
249
  # The responses are already in the correct format, no need to reformat
 
250
  if random.choice([True, False]):
251
- logger.warning(f"NOT SWAPPED: left={model_a}, right={model_b}")
252
  return (
253
- response_a, response_b, model_a, model_b,
254
  gr.update(label=nickname_a, value=response_a),
255
  gr.update(label=nickname_b, value=response_b),
256
  gr.update(interactive=True, value=f"Vote for {nickname_a}"),
@@ -262,9 +263,9 @@ def battle_arena(prompt):
262
  gr.update(value="Ready for your vote! πŸ—³οΈ", visible=True)
263
  )
264
  else:
265
- logger.warning(f"SWAPPED: left={model_b}, right={model_a}")
266
  return (
267
- response_b, response_a, model_b, model_a,
268
  gr.update(label=nickname_a, value=response_b),
269
  gr.update(label=nickname_b, value=response_a),
270
  gr.update(interactive=True, value=f"Vote for {nickname_a}"),
@@ -276,7 +277,7 @@ def battle_arena(prompt):
276
  gr.update(value="Ready for your vote! πŸ—³οΈ", visible=True)
277
  )
278
 
279
- def record_vote(prompt, left_response, right_response, left_model, right_model, choice, battles_display, submit_btn_state):
280
  # Check if outputs are generated
281
  if not left_response or not right_response or not left_model or not right_model:
282
  return (
@@ -291,8 +292,20 @@ def record_vote(prompt, left_response, right_response, left_model, right_model,
291
  gr.update()
292
  )
293
 
294
- winner = left_model if choice == "Left is better" else right_model
295
- loser = right_model if choice == "Left is better" else left_model
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
  # Update the leaderboard
298
  battle_results = update_leaderboard(winner, loser)
@@ -302,9 +315,9 @@ def record_vote(prompt, left_response, right_response, left_model, right_model,
302
 
303
  result_message = f"""
304
  πŸŽ‰ Vote recorded! You're awesome! 🌟
305
- πŸ”΅ In the left corner: {get_human_readable_name(left_model)}
306
- πŸ”΄ In the right corner: {get_human_readable_name(right_model)}
307
- πŸ† And the champion you picked is... {get_human_readable_name(winner)}! πŸ₯‡
308
  """
309
 
310
  return (
@@ -327,8 +340,10 @@ def new_battle():
327
  "", # Reset prompt_input
328
  gr.update(value=[], label=nickname_a), # Reset left Chatbot
329
  gr.update(value=[], label=nickname_b), # Reset right Chatbot
330
- None,
331
- None,
 
 
332
  gr.update(interactive=False, value=f"Vote for {nickname_a}"),
333
  gr.update(interactive=False, value=f"Vote for {nickname_b}"),
334
  gr.update(interactive=False, visible=False), # Reset Tie button
@@ -461,8 +476,11 @@ with gr.Blocks() as demo:
461
  left_model = gr.Textbox(label="πŸ”΅ Left Model", interactive=False)
462
  right_model = gr.Textbox(label="πŸ”΄ Right Model", interactive=False)
463
 
464
- previous_prompt = gr.State("") # Add this line to store the previous prompt
465
- tie_count = gr.State(0) # Add this line to keep track of tie count
 
 
 
466
 
467
  new_battle_btn = gr.Button("New Battle")
468
 
@@ -497,22 +515,26 @@ with gr.Blocks() as demo:
497
  battle_arena,
498
  inputs=prompt_input,
499
  outputs=[
500
- left_output, right_output, left_model, right_model,
501
  left_output, right_output, left_vote_btn, right_vote_btn,
502
  tie_btn, previous_prompt, tie_count, model_names_row, result
503
  ]
504
  )
505
 
506
  left_vote_btn.click(
507
- lambda *args: record_vote(*args, "Left is better"),
508
- inputs=[prompt_input, left_output, right_output, left_model, right_model, battles_counter, submit_btn],
 
 
509
  outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
510
  right_vote_btn, tie_btn, model_names_row, battles_counter, submit_btn]
511
  )
512
 
513
  right_vote_btn.click(
514
- lambda *args: record_vote(*args, "Right is better"),
515
- inputs=[prompt_input, left_output, right_output, left_model, right_model, battles_counter, submit_btn],
 
 
516
  outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
517
  right_vote_btn, tie_btn, model_names_row, battles_counter, submit_btn]
518
  )
@@ -526,7 +548,7 @@ with gr.Blocks() as demo:
526
  new_battle_btn.click(
527
  new_battle,
528
  outputs=[prompt_input, left_output, right_output, left_model,
529
- right_model, left_vote_btn, right_vote_btn, tie_btn,
530
  result, leaderboard, model_names_row, elo_leaderboard, tie_count, submit_btn]
531
  )
532
 
 
228
 
229
  # Check for API errors in responses
230
  if any("Error: Unable to get response from the model" in msg["content"]
231
+ for msg in response_a + response_b
232
  if msg["role"] == "assistant"):
233
  return (
234
+ [], [], None, None, None, None,
235
  gr.update(value=[]),
236
  gr.update(value=[]),
237
  gr.update(interactive=False, value="Voting Disabled - API Error"),
 
247
  nickname_b = random.choice(config.model_nicknames)
248
 
249
  # The responses are already in the correct format, no need to reformat
250
+ # Randomly swap positions for blind testing
251
  if random.choice([True, False]):
252
+ logger.warning(f"NOT SWAPPED: left={model_a}({nickname_a}), right={model_b}({nickname_b})")
253
  return (
254
+ response_a, response_b, model_a, model_b, nickname_a, nickname_b,
255
  gr.update(label=nickname_a, value=response_a),
256
  gr.update(label=nickname_b, value=response_b),
257
  gr.update(interactive=True, value=f"Vote for {nickname_a}"),
 
263
  gr.update(value="Ready for your vote! πŸ—³οΈ", visible=True)
264
  )
265
  else:
266
+ logger.warning(f"SWAPPED: left={model_b}({nickname_a}), right={model_a}({nickname_b})")
267
  return (
268
+ response_b, response_a, model_b, model_a, nickname_a, nickname_b,
269
  gr.update(label=nickname_a, value=response_b),
270
  gr.update(label=nickname_b, value=response_a),
271
  gr.update(interactive=True, value=f"Vote for {nickname_a}"),
 
277
  gr.update(value="Ready for your vote! πŸ—³οΈ", visible=True)
278
  )
279
 
280
+ def record_vote(prompt, left_response, right_response, left_model, right_model, left_nickname, right_nickname, voted_nickname, battles_display, submit_btn_state):
281
  # Check if outputs are generated
282
  if not left_response or not right_response or not left_model or not right_model:
283
  return (
 
292
  gr.update()
293
  )
294
 
295
+ # Determine winner based on the voted nickname
296
+ if voted_nickname == left_nickname:
297
+ winner = left_model
298
+ loser = right_model
299
+ winner_nickname = left_nickname
300
+ loser_nickname = right_nickname
301
+ else:
302
+ winner = right_model
303
+ loser = left_model
304
+ winner_nickname = right_nickname
305
+ loser_nickname = left_nickname
306
+
307
+ logger.warning(f"VOTE: voted_nickname={voted_nickname}, left={left_nickname}({left_model}), right={right_nickname}({right_model})")
308
+ logger.warning(f"VOTE RESULT: winner={winner_nickname}({winner}), loser={loser_nickname}({loser})")
309
 
310
  # Update the leaderboard
311
  battle_results = update_leaderboard(winner, loser)
 
315
 
316
  result_message = f"""
317
  πŸŽ‰ Vote recorded! You're awesome! 🌟
318
+ πŸ‘€ **{left_nickname}** was actually: {get_human_readable_name(left_model)}
319
+ πŸ‘€ **{right_nickname}** was actually: {get_human_readable_name(right_model)}
320
+ πŸ† And the champion you picked is... **{winner_nickname}** ({get_human_readable_name(winner)})! πŸ₯‡
321
  """
322
 
323
  return (
 
340
  "", # Reset prompt_input
341
  gr.update(value=[], label=nickname_a), # Reset left Chatbot
342
  gr.update(value=[], label=nickname_b), # Reset right Chatbot
343
+ None, # left_model
344
+ None, # right_model
345
+ None, # left_nickname_state
346
+ None, # right_nickname_state
347
  gr.update(interactive=False, value=f"Vote for {nickname_a}"),
348
  gr.update(interactive=False, value=f"Vote for {nickname_b}"),
349
  gr.update(interactive=False, visible=False), # Reset Tie button
 
476
  left_model = gr.Textbox(label="πŸ”΅ Left Model", interactive=False)
477
  right_model = gr.Textbox(label="πŸ”΄ Right Model", interactive=False)
478
 
479
+ # State variables
480
+ previous_prompt = gr.State("")
481
+ tie_count = gr.State(0)
482
+ left_nickname_state = gr.State("")
483
+ right_nickname_state = gr.State("")
484
 
485
  new_battle_btn = gr.Button("New Battle")
486
 
 
515
  battle_arena,
516
  inputs=prompt_input,
517
  outputs=[
518
+ left_output, right_output, left_model, right_model, left_nickname_state, right_nickname_state,
519
  left_output, right_output, left_vote_btn, right_vote_btn,
520
  tie_btn, previous_prompt, tie_count, model_names_row, result
521
  ]
522
  )
523
 
524
  left_vote_btn.click(
525
+ lambda prompt, left_resp, right_resp, left_mod, right_mod, left_nick, right_nick, battles, submit: record_vote(
526
+ prompt, left_resp, right_resp, left_mod, right_mod, left_nick, right_nick, left_nick, battles, submit
527
+ ),
528
+ inputs=[prompt_input, left_output, right_output, left_model, right_model, left_nickname_state, right_nickname_state, battles_counter, submit_btn],
529
  outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
530
  right_vote_btn, tie_btn, model_names_row, battles_counter, submit_btn]
531
  )
532
 
533
  right_vote_btn.click(
534
+ lambda prompt, left_resp, right_resp, left_mod, right_mod, left_nick, right_nick, battles, submit: record_vote(
535
+ prompt, left_resp, right_resp, left_mod, right_mod, left_nick, right_nick, right_nick, battles, submit
536
+ ),
537
+ inputs=[prompt_input, left_output, right_output, left_model, right_model, left_nickname_state, right_nickname_state, battles_counter, submit_btn],
538
  outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
539
  right_vote_btn, tie_btn, model_names_row, battles_counter, submit_btn]
540
  )
 
548
  new_battle_btn.click(
549
  new_battle,
550
  outputs=[prompt_input, left_output, right_output, left_model,
551
+ right_model, left_nickname_state, right_nickname_state, left_vote_btn, right_vote_btn, tie_btn,
552
  result, leaderboard, model_names_row, elo_leaderboard, tie_count, submit_btn]
553
  )
554