Spaces:
Runtime error
Runtime error
liuyang
commited on
Commit
·
6c3a671
1
Parent(s):
64397b6
Refactor audio processing: Simplified the handling of audio chunks in prepare_and_save_audio_for_model and updated preprocess_from_task_json to support both single and multiple chunk tasks, enhancing flexibility in audio preparation.
Browse files
app.py
CHANGED
|
@@ -269,24 +269,8 @@ def prepare_and_save_audio_for_model(task: dict, out_dir: str) -> dict:
|
|
| 269 |
"options": task.get("options", None),
|
| 270 |
"filekey": task.get("filekey", None),
|
| 271 |
}
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
if task.get("segments", None):
|
| 275 |
-
# Process multiple chunks
|
| 276 |
-
chunks = task["segments"]
|
| 277 |
-
results = []
|
| 278 |
-
for chunk in chunks:
|
| 279 |
-
# Create a task for each chunk
|
| 280 |
-
single_chunk_task = task.copy()
|
| 281 |
-
single_chunk_task["chunk"] = chunk
|
| 282 |
-
chunk_result = _process_single_chunk(single_chunk_task, out_dir)
|
| 283 |
-
results.append(chunk_result)
|
| 284 |
-
# Compose wrapper dict with general fields applicable to all chunks
|
| 285 |
-
result["segments"] = results
|
| 286 |
-
else:
|
| 287 |
-
# Process single chunk and wrap in the standard response structure
|
| 288 |
-
chunk_result = _process_single_chunk(task, out_dir)
|
| 289 |
-
result["chunk"] = chunk_result
|
| 290 |
return result
|
| 291 |
|
| 292 |
|
|
@@ -450,7 +434,7 @@ class WhisperTranscriber:
|
|
| 450 |
# do **not** create the models here!
|
| 451 |
pass
|
| 452 |
|
| 453 |
-
def preprocess_from_task_json(self, task_json: str) ->
|
| 454 |
"""Parse task JSON and run prepare_and_save_audio_for_model, returning metadata."""
|
| 455 |
try:
|
| 456 |
task = json.loads(task_json)
|
|
@@ -459,7 +443,14 @@ class WhisperTranscriber:
|
|
| 459 |
|
| 460 |
out_dir = os.path.join(CACHE_ROOT, "preprocessed")
|
| 461 |
os.makedirs(out_dir, exist_ok=True)
|
| 462 |
-
meta =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 463 |
return meta
|
| 464 |
|
| 465 |
@spaces.GPU # each call gets a GPU slice
|
|
@@ -958,10 +949,10 @@ class WhisperTranscriber:
|
|
| 958 |
print("Preprocessing chunk JSON...")
|
| 959 |
pre_meta = self.preprocess_from_task_json(task_json)
|
| 960 |
transcribe_options = pre_meta.get("options", None)
|
| 961 |
-
if
|
| 962 |
-
return self.transcribe_chunk(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
|
| 963 |
-
elif "segments" in pre_meta:
|
| 964 |
return self.transcribe_segments(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
|
|
|
|
|
|
|
| 965 |
except Exception as e:
|
| 966 |
import traceback
|
| 967 |
traceback.print_exc()
|
|
@@ -1016,7 +1007,7 @@ class WhisperTranscriber:
|
|
| 1016 |
pass
|
| 1017 |
|
| 1018 |
@spaces.GPU
|
| 1019 |
-
def transcribe_segments(self,
|
| 1020 |
translate=False, prompt=None, batch_size=8, model_name: str = DEFAULT_MODEL, transcribe_options: dict = None):
|
| 1021 |
"""Main processing function with diarization using task JSON for a single chunk.
|
| 1022 |
|
|
@@ -1026,8 +1017,8 @@ class WhisperTranscriber:
|
|
| 1026 |
print("Transcribing segments...")
|
| 1027 |
transcription_results = []
|
| 1028 |
# Step 1: Preprocess per chunk JSON
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
if chunk.get("skip"):
|
| 1032 |
return {"segments": [], "language": "unknown", "num_speakers": 0, "transcription_method": "diarized_segments_batched", "batch_size": batch_size}
|
| 1033 |
wav_path = chunk["out_wav_path"]
|
|
|
|
| 269 |
"options": task.get("options", None),
|
| 270 |
"filekey": task.get("filekey", None),
|
| 271 |
}
|
| 272 |
+
chunk_result = _process_single_chunk(task, out_dir)
|
| 273 |
+
result["chunk"] = chunk_result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
return result
|
| 275 |
|
| 276 |
|
|
|
|
| 434 |
# do **not** create the models here!
|
| 435 |
pass
|
| 436 |
|
| 437 |
+
def preprocess_from_task_json(self, task_json: str) -> any:
|
| 438 |
"""Parse task JSON and run prepare_and_save_audio_for_model, returning metadata."""
|
| 439 |
try:
|
| 440 |
task = json.loads(task_json)
|
|
|
|
| 443 |
|
| 444 |
out_dir = os.path.join(CACHE_ROOT, "preprocessed")
|
| 445 |
os.makedirs(out_dir, exist_ok=True)
|
| 446 |
+
meta = None
|
| 447 |
+
#task could be a single chunk or a list of chunks
|
| 448 |
+
if isinstance(task, list):
|
| 449 |
+
meta = []
|
| 450 |
+
for chunk in task:
|
| 451 |
+
meta.append(prepare_and_save_audio_for_model(chunk, out_dir))
|
| 452 |
+
else:
|
| 453 |
+
meta = prepare_and_save_audio_for_model(task, out_dir)
|
| 454 |
return meta
|
| 455 |
|
| 456 |
@spaces.GPU # each call gets a GPU slice
|
|
|
|
| 949 |
print("Preprocessing chunk JSON...")
|
| 950 |
pre_meta = self.preprocess_from_task_json(task_json)
|
| 951 |
transcribe_options = pre_meta.get("options", None)
|
| 952 |
+
if isinstance(pre_meta, list):
|
|
|
|
|
|
|
| 953 |
return self.transcribe_segments(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
|
| 954 |
+
elif isinstance(pre_meta, dict) and "chunk" in pre_meta:
|
| 955 |
+
return self.transcribe_chunk(pre_meta, language, translate, prompt, batch_size, model_name, transcribe_options)
|
| 956 |
except Exception as e:
|
| 957 |
import traceback
|
| 958 |
traceback.print_exc()
|
|
|
|
| 1007 |
pass
|
| 1008 |
|
| 1009 |
@spaces.GPU
|
| 1010 |
+
def transcribe_segments(self, pre_metas, language=None,
|
| 1011 |
translate=False, prompt=None, batch_size=8, model_name: str = DEFAULT_MODEL, transcribe_options: dict = None):
|
| 1012 |
"""Main processing function with diarization using task JSON for a single chunk.
|
| 1013 |
|
|
|
|
| 1017 |
print("Transcribing segments...")
|
| 1018 |
transcription_results = []
|
| 1019 |
# Step 1: Preprocess per chunk JSON
|
| 1020 |
+
for pre_meta in pre_metas:
|
| 1021 |
+
chunk = pre_meta["chunk"]
|
| 1022 |
if chunk.get("skip"):
|
| 1023 |
return {"segments": [], "language": "unknown", "num_speakers": 0, "transcription_method": "diarized_segments_batched", "batch_size": batch_size}
|
| 1024 |
wav_path = chunk["out_wav_path"]
|