text_extractor / app.py
prans-cs55's picture
Create app.py
673f9e2 verified
raw
history blame
1.16 kB
from PIL import Image
import pytesseract
import gradio as gr
from transformers import pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def extract_and_summarize(image):
if image is None:
return "Please upload an image.", "No summary yet."
extracted_text = pytesseract.image_to_string(image).strip()
if not extracted_text:
return "No text detected in the image.", ""
if len(extracted_text.split()) > 30:
summary = summarizer(
extracted_text,
max_length=100,
min_length=30,
do_sample=False
)[0]['summary_text']
else:
summary = "Text too short for summarization."
return extracted_text, summary
interface = gr.Interface(
fn=extract_and_summarize,
inputs=gr.Image(type="pil", label="Upload Image"),
outputs=[
gr.Textbox(label="Extracted Text"),
gr.Textbox(label="Summary"),
],
title="🧠 OCR Text Extractor + Summarizer",
description="Upload an image with text β€” it extracts the text using Tesseract OCR and summarizes it using a transformer model."
)
interface.launch()