Spaces:
Runtime error
Runtime error
liuyang
commited on
Commit
·
a3f71ba
1
Parent(s):
75ff28d
apply audio convertion
Browse files
app.py
CHANGED
|
@@ -234,23 +234,28 @@ class WhisperTranscriber:
|
|
| 234 |
if audio_file is None:
|
| 235 |
return {"error": "No audio file provided"}
|
| 236 |
|
|
|
|
| 237 |
try:
|
| 238 |
print("Starting new processing pipeline...")
|
| 239 |
|
| 240 |
-
# Step 1:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
diarization_segments, detected_num_speakers = self.perform_diarization(
|
| 242 |
-
|
| 243 |
)
|
| 244 |
|
| 245 |
-
# Step
|
| 246 |
-
audio_segments = self.cut_audio_segments(
|
| 247 |
|
| 248 |
-
# Step
|
| 249 |
transcription_results = self.transcribe_audio_segments(
|
| 250 |
audio_segments, language, translate, prompt
|
| 251 |
)
|
| 252 |
|
| 253 |
-
# Step
|
| 254 |
return {
|
| 255 |
"speaker_count": detected_num_speakers,
|
| 256 |
"transcription": transcription_results
|
|
@@ -260,6 +265,11 @@ class WhisperTranscriber:
|
|
| 260 |
import traceback
|
| 261 |
traceback.print_exc()
|
| 262 |
return {"error": f"Processing failed: {str(e)}"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
|
| 264 |
# Initialize transcriber
|
| 265 |
transcriber = WhisperTranscriber()
|
|
|
|
| 234 |
if audio_file is None:
|
| 235 |
return {"error": "No audio file provided"}
|
| 236 |
|
| 237 |
+
converted_audio_path = None
|
| 238 |
try:
|
| 239 |
print("Starting new processing pipeline...")
|
| 240 |
|
| 241 |
+
# Step 1: Convert audio format first
|
| 242 |
+
print("Converting audio format...")
|
| 243 |
+
converted_audio_path = self.convert_audio_format(audio_file)
|
| 244 |
+
|
| 245 |
+
# Step 2: Perform diarization on converted audio
|
| 246 |
diarization_segments, detected_num_speakers = self.perform_diarization(
|
| 247 |
+
converted_audio_path, num_speakers
|
| 248 |
)
|
| 249 |
|
| 250 |
+
# Step 3: Cut audio into segments based on diarization
|
| 251 |
+
audio_segments = self.cut_audio_segments(converted_audio_path, diarization_segments)
|
| 252 |
|
| 253 |
+
# Step 4: Transcribe each segment
|
| 254 |
transcription_results = self.transcribe_audio_segments(
|
| 255 |
audio_segments, language, translate, prompt
|
| 256 |
)
|
| 257 |
|
| 258 |
+
# Step 5: Return in requested format
|
| 259 |
return {
|
| 260 |
"speaker_count": detected_num_speakers,
|
| 261 |
"transcription": transcription_results
|
|
|
|
| 265 |
import traceback
|
| 266 |
traceback.print_exc()
|
| 267 |
return {"error": f"Processing failed: {str(e)}"}
|
| 268 |
+
finally:
|
| 269 |
+
# Clean up converted audio file
|
| 270 |
+
if converted_audio_path and os.path.exists(converted_audio_path):
|
| 271 |
+
os.unlink(converted_audio_path)
|
| 272 |
+
print("Cleaned up converted audio file")
|
| 273 |
|
| 274 |
# Initialize transcriber
|
| 275 |
transcriber = WhisperTranscriber()
|