liuyang commited on
Commit
6c3a671
·
1 Parent(s): 64397b6

Refactor audio processing: Simplified the handling of audio chunks in prepare_and_save_audio_for_model and updated preprocess_from_task_json to support both single and multiple chunk tasks, enhancing flexibility in audio preparation.

Browse files
Files changed (1) hide show
  1. app.py +17 -26
app.py CHANGED
@@ -269,24 +269,8 @@ def prepare_and_save_audio_for_model(task: dict, out_dir: str) -> dict:
269
  "options": task.get("options", None),
270
  "filekey": task.get("filekey", None),
271
  }
272
-
273
- # Handle both single chunk and multiple chunks
274
- if task.get("segments", None):
275
- # Process multiple chunks
276
- chunks = task["segments"]
277
- results = []
278
- for chunk in chunks:
279
- # Create a task for each chunk
280
- single_chunk_task = task.copy()
281
- single_chunk_task["chunk"] = chunk
282
- chunk_result = _process_single_chunk(single_chunk_task, out_dir)
283
- results.append(chunk_result)
284
- # Compose wrapper dict with general fields applicable to all chunks
285
- result["segments"] = results
286
- else:
287
- # Process single chunk and wrap in the standard response structure
288
- chunk_result = _process_single_chunk(task, out_dir)
289
- result["chunk"] = chunk_result
290
  return result
291
 
292
 
@@ -450,7 +434,7 @@ class WhisperTranscriber:
450
  # do **not** create the models here!
451
  pass
452
 
453
- def preprocess_from_task_json(self, task_json: str) -> dict:
454
  """Parse task JSON and run prepare_and_save_audio_for_model, returning metadata."""
455
  try:
456
  task = json.loads(task_json)
@@ -459,7 +443,14 @@ class WhisperTranscriber:
459
 
460
  out_dir = os.path.join(CACHE_ROOT, "preprocessed")
461
  os.makedirs(out_dir, exist_ok=True)
462
- meta = prepare_and_save_audio_for_model(task, out_dir)
 
 
 
 
 
 
 
463
  return meta
464
 
465
  @spaces.GPU # each call gets a GPU slice
@@ -958,10 +949,10 @@ class WhisperTranscriber:
958
  print("Preprocessing chunk JSON...")
959
  pre_meta = self.preprocess_from_task_json(task_json)
960
  transcribe_options = pre_meta.get("options", None)
961
- if "chunk" in pre_meta:
962
- return self.transcribe_chunk(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
963
- elif "segments" in pre_meta:
964
  return self.transcribe_segments(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
 
 
965
  except Exception as e:
966
  import traceback
967
  traceback.print_exc()
@@ -1016,7 +1007,7 @@ class WhisperTranscriber:
1016
  pass
1017
 
1018
  @spaces.GPU
1019
- def transcribe_segments(self, pre_meta, language=None,
1020
  translate=False, prompt=None, batch_size=8, model_name: str = DEFAULT_MODEL, transcribe_options: dict = None):
1021
  """Main processing function with diarization using task JSON for a single chunk.
1022
 
@@ -1026,8 +1017,8 @@ class WhisperTranscriber:
1026
  print("Transcribing segments...")
1027
  transcription_results = []
1028
  # Step 1: Preprocess per chunk JSON
1029
- chunks = pre_meta["segments"]
1030
- for chunk in chunks:
1031
  if chunk.get("skip"):
1032
  return {"segments": [], "language": "unknown", "num_speakers": 0, "transcription_method": "diarized_segments_batched", "batch_size": batch_size}
1033
  wav_path = chunk["out_wav_path"]
 
269
  "options": task.get("options", None),
270
  "filekey": task.get("filekey", None),
271
  }
272
+ chunk_result = _process_single_chunk(task, out_dir)
273
+ result["chunk"] = chunk_result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  return result
275
 
276
 
 
434
  # do **not** create the models here!
435
  pass
436
 
437
+ def preprocess_from_task_json(self, task_json: str) -> any:
438
  """Parse task JSON and run prepare_and_save_audio_for_model, returning metadata."""
439
  try:
440
  task = json.loads(task_json)
 
443
 
444
  out_dir = os.path.join(CACHE_ROOT, "preprocessed")
445
  os.makedirs(out_dir, exist_ok=True)
446
+ meta = None
447
+ #task could be a single chunk or a list of chunks
448
+ if isinstance(task, list):
449
+ meta = []
450
+ for chunk in task:
451
+ meta.append(prepare_and_save_audio_for_model(chunk, out_dir))
452
+ else:
453
+ meta = prepare_and_save_audio_for_model(task, out_dir)
454
  return meta
455
 
456
  @spaces.GPU # each call gets a GPU slice
 
949
  print("Preprocessing chunk JSON...")
950
  pre_meta = self.preprocess_from_task_json(task_json)
951
  transcribe_options = pre_meta.get("options", None)
952
+ if isinstance(pre_meta, list):
 
 
953
  return self.transcribe_segments(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
954
+ elif isinstance(pre_meta, dict) and "chunk" in pre_meta:
955
+ return self.transcribe_chunk(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
956
  except Exception as e:
957
  import traceback
958
  traceback.print_exc()
 
1007
  pass
1008
 
1009
  @spaces.GPU
1010
+ def transcribe_segments(self, pre_metas, language=None,
1011
  translate=False, prompt=None, batch_size=8, model_name: str = DEFAULT_MODEL, transcribe_options: dict = None):
1012
  """Main processing function with diarization using task JSON for a single chunk.
1013
 
 
1017
  print("Transcribing segments...")
1018
  transcription_results = []
1019
  # Step 1: Preprocess per chunk JSON
1020
+ for pre_meta in pre_metas:
1021
+ chunk = pre_meta["chunk"]
1022
  if chunk.get("skip"):
1023
  return {"segments": [], "language": "unknown", "num_speakers": 0, "transcription_method": "diarized_segments_batched", "batch_size": batch_size}
1024
  wav_path = chunk["out_wav_path"]