# requirements.txt for Hugging Face Spaces with NVIDIA T4 GPU # Core Framework Dependencies # Note: gradio, fastapi, uvicorn, datasets, huggingface-hub, # pydantic==2.10.6, and protobuf<4 are installed by HF Spaces SDK # PyTorch with CUDA support (for GPU inference) # Note: HF Spaces provides torch, but we ensure GPU support # Pin to avoid breaking changes with newer versions torch>=2.0.0,<2.2.0 # Web Framework & Interface aiohttp>=3.9.0 httpx>=0.25.0 # Hugging Face Ecosystem # PINNED for Phi-3 and DynamicCache compatibility # transformers 4.36.0+ has better Phi-3 support, but <4.41.0 to avoid breaking changes transformers>=4.36.0,<4.41.0 accelerate>=0.24.0,<0.28.0 tokenizers>=0.15.0 sentence-transformers>=2.2.0 # Vector Database & Search faiss-cpu>=1.7.4 # Pin numpy to avoid compatibility issues with numpy 2.0 numpy>=1.24.0,<2.0.0 scipy>=1.11.0 # Data Processing & Utilities pandas>=2.1.0 scikit-learn>=1.3.0 # Database & Persistence sqlalchemy>=2.0.0 alembic>=1.12.0 # Caching & Performance cachetools>=5.3.0 redis>=5.0.0 python-multipart>=0.0.6 # Security & Validation pydantic-settings>=2.1.0 python-dotenv>=1.0.0 # For secure .env file loading python-jose[cryptography]>=3.3.0 bcrypt>=4.0.0 # Mobile Optimization & UI cssutils>=2.7.0 pillow>=10.1.0 requests>=2.31.0 # Async & Concurrency aiofiles>=23.2.0 concurrent-log-handler>=0.9.0 # Logging & Monitoring structlog>=23.2.0 prometheus-client>=0.19.0 psutil>=5.9.0 # Development & Testing pytest>=7.4.0 pytest-asyncio>=0.21.0 pytest-cov>=4.1.0 black>=23.11.0 flake8>=6.1.0 mypy>=1.7.0 # Utility Libraries python-dateutil>=2.8.0 pytz>=2023.3 tzdata>=2023.3 ujson>=5.8.0 orjson>=3.9.0 # Flask API for external integrations flask>=3.0.0 flask-cors>=4.0.0 flask-limiter>=3.5.0 # Rate limiting for API protection # Production WSGI Server gunicorn>=21.2.0 # Production WSGI server (replaces Flask dev server) # HF Spaces Specific Dependencies # Note: huggingface-cli is part of huggingface-hub (installed by SDK) gradio-client>=0.8.0 gradio-pdf>=0.0.6 # Model-specific dependencies safetensors>=0.4.0 # Pin bitsandbytes to avoid breaking changes with quantization bitsandbytes>=0.41.0,<0.43.0 # Required for 4-bit and 8-bit quantization on GPU # Optional: Flash Attention (for better performance with transformer models) # Uncomment if you want flash attention (requires CUDA 11.8+ and compatible GPU) # Note: Improves performance but adds build complexity # flash-attn>=2.3.0 # Optional - improves performance but requires CUDA 11.8+ # Development/debugging ipython>=8.17.0 ipdb>=0.13.0 debugpy>=1.7.0 # Security Tools (for security audits) bandit>=1.7.5 # Security linter for Python code safety>=2.3.5 # Dependency vulnerability scanner # HTTP Client for ZeroGPU Chat API (aiohttp already included above) # Note: No OpenAI client needed - using direct HTTP requests