prans-cs55 commited on
Commit
673f9e2
·
verified ·
1 Parent(s): 2625f4f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import pytesseract
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+
6
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
+
8
+ def extract_and_summarize(image):
9
+ if image is None:
10
+ return "Please upload an image.", "No summary yet."
11
+
12
+ extracted_text = pytesseract.image_to_string(image).strip()
13
+ if not extracted_text:
14
+ return "No text detected in the image.", ""
15
+
16
+ if len(extracted_text.split()) > 30:
17
+ summary = summarizer(
18
+ extracted_text,
19
+ max_length=100,
20
+ min_length=30,
21
+ do_sample=False
22
+ )[0]['summary_text']
23
+ else:
24
+ summary = "Text too short for summarization."
25
+
26
+ return extracted_text, summary
27
+
28
+
29
+ interface = gr.Interface(
30
+ fn=extract_and_summarize,
31
+ inputs=gr.Image(type="pil", label="Upload Image"),
32
+ outputs=[
33
+ gr.Textbox(label="Extracted Text"),
34
+ gr.Textbox(label="Summary"),
35
+ ],
36
+ title="🧠 OCR Text Extractor + Summarizer",
37
+ description="Upload an image with text — it extracts the text using Tesseract OCR and summarizes it using a transformer model."
38
+ )
39
+
40
+ interface.launch()