liuyang commited on
Commit
a3f71ba
·
1 Parent(s): 75ff28d

apply audio convertion

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -234,23 +234,28 @@ class WhisperTranscriber:
234
  if audio_file is None:
235
  return {"error": "No audio file provided"}
236
 
 
237
  try:
238
  print("Starting new processing pipeline...")
239
 
240
- # Step 1: Perform diarization first
 
 
 
 
241
  diarization_segments, detected_num_speakers = self.perform_diarization(
242
- audio_file, num_speakers
243
  )
244
 
245
- # Step 2: Cut audio into segments based on diarization
246
- audio_segments = self.cut_audio_segments(audio_file, diarization_segments)
247
 
248
- # Step 3: Transcribe each segment
249
  transcription_results = self.transcribe_audio_segments(
250
  audio_segments, language, translate, prompt
251
  )
252
 
253
- # Step 4: Return in requested format
254
  return {
255
  "speaker_count": detected_num_speakers,
256
  "transcription": transcription_results
@@ -260,6 +265,11 @@ class WhisperTranscriber:
260
  import traceback
261
  traceback.print_exc()
262
  return {"error": f"Processing failed: {str(e)}"}
 
 
 
 
 
263
 
264
  # Initialize transcriber
265
  transcriber = WhisperTranscriber()
 
234
  if audio_file is None:
235
  return {"error": "No audio file provided"}
236
 
237
+ converted_audio_path = None
238
  try:
239
  print("Starting new processing pipeline...")
240
 
241
+ # Step 1: Convert audio format first
242
+ print("Converting audio format...")
243
+ converted_audio_path = self.convert_audio_format(audio_file)
244
+
245
+ # Step 2: Perform diarization on converted audio
246
  diarization_segments, detected_num_speakers = self.perform_diarization(
247
+ converted_audio_path, num_speakers
248
  )
249
 
250
+ # Step 3: Cut audio into segments based on diarization
251
+ audio_segments = self.cut_audio_segments(converted_audio_path, diarization_segments)
252
 
253
+ # Step 4: Transcribe each segment
254
  transcription_results = self.transcribe_audio_segments(
255
  audio_segments, language, translate, prompt
256
  )
257
 
258
+ # Step 5: Return in requested format
259
  return {
260
  "speaker_count": detected_num_speakers,
261
  "transcription": transcription_results
 
265
  import traceback
266
  traceback.print_exc()
267
  return {"error": f"Processing failed: {str(e)}"}
268
+ finally:
269
+ # Clean up converted audio file
270
+ if converted_audio_path and os.path.exists(converted_audio_path):
271
+ os.unlink(converted_audio_path)
272
+ print("Cleaned up converted audio file")
273
 
274
  # Initialize transcriber
275
  transcriber = WhisperTranscriber()