# ============================================================================= # Research AI Assistant API - Environment Configuration # ============================================================================= # Copy this content to a file named .env and fill in your actual values # Never commit .env to version control! # ============================================================================= # ZeroGPU Chat API Configuration (REQUIRED) # ============================================================================= # Base URL for your ZeroGPU Chat API endpoint (RunPod) # Format: http://your-pod-ip:8000 or https://your-domain.com # Example: http://bm9njt1ypzvuqw-8000.proxy.runpod.net ZEROGPU_BASE_URL=http://your-pod-ip:8000 # Email for authentication (register first via /register endpoint) ZEROGPU_EMAIL=your-email@example.com # Password for authentication ZEROGPU_PASSWORD=your_secure_password_here # ============================================================================= # Token Allocation Configuration # ============================================================================= # Maximum tokens dedicated for user input (prioritized over context) # Recommended: 32000 tokens for DeepSeek R1 (128K context window) USER_INPUT_MAX_TOKENS=32000 # Maximum tokens for context preparation (includes user input + context) # Recommended: 115000 tokens for DeepSeek R1 (leaves ~13K for output) CONTEXT_PREPARATION_BUDGET=115000 # Context pruning threshold (should match context_preparation_budget) CONTEXT_PRUNING_THRESHOLD=115000 # Always prioritize user input over historical context PRIORITIZE_USER_INPUT=True # Model context window (actual limit for your deployed model) # Default: 8192 tokens (adjust based on your model) # This is the maximum total tokens (input + output) the model can handle # Common values: 4096, 8192, 16384, 32768, etc. ZEROGPU_MODEL_CONTEXT_WINDOW=8192 # ============================================================================= # Database Configuration # ============================================================================= # SQLite database path (default: sessions.db) # Use /tmp/ for Docker/containerized environments DB_PATH=sessions.db # FAISS index path for embeddings (default: embeddings.faiss) FAISS_INDEX_PATH=embeddings.faiss # ============================================================================= # Cache Configuration # ============================================================================= # HuggingFace cache directory (for any remaining model downloads) HF_HOME=~/.cache/huggingface TRANSFORMERS_CACHE=~/.cache/huggingface # HuggingFace token (optional - only needed if using gated models) HF_TOKEN= # Cache TTL in seconds (default: 3600 = 1 hour) CACHE_TTL=3600 # ============================================================================= # Session Configuration # ============================================================================= # Session timeout in seconds (default: 3600 = 1 hour) SESSION_TIMEOUT=3600 # Maximum session size in megabytes (default: 10 MB) MAX_SESSION_SIZE_MB=10 # ============================================================================= # Performance Configuration # ============================================================================= # Maximum worker threads for parallel processing (default: 4) MAX_WORKERS=4 # ============================================================================= # Mobile Optimization # ============================================================================= # Maximum tokens for mobile responses (default: 1200) # Increased from 800 to allow better responses on mobile MOBILE_MAX_TOKENS=1200 # Mobile request timeout in milliseconds (default: 15000) MOBILE_TIMEOUT=15000 # ============================================================================= # API Configuration # ============================================================================= # Flask/Gradio server port (default: 7860) GRADIO_PORT=7860 # Server host (default: 0.0.0.0 for all interfaces) GRADIO_HOST=0.0.0.0 # ============================================================================= # Logging Configuration # ============================================================================= # Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL (default: INFO) LOG_LEVEL=INFO # Log format: json or text (default: json) LOG_FORMAT=json # Log directory (default: /tmp/logs) LOG_DIR=/tmp/logs # ============================================================================= # Context Configuration # ============================================================================= # Maximum context tokens (default: 4000) # Note: This is overridden by CONTEXT_PREPARATION_BUDGET if set MAX_CONTEXT_TOKENS=4000 # Cache TTL for context in seconds (default: 300 = 5 minutes) CACHE_TTL_SECONDS=300 # Maximum cache size (default: 100) MAX_CACHE_SIZE=100 # Enable parallel processing (default: True) PARALLEL_PROCESSING=True # Context decay factor (default: 0.8) CONTEXT_DECAY_FACTOR=0.8 # Maximum interactions to keep in context (default: 10) MAX_INTERACTIONS_TO_KEEP=10 # Enable metrics collection (default: True) ENABLE_METRICS=True # Enable context compression (default: True) COMPRESSION_ENABLED=True # Summarization threshold in tokens (default: 2000) SUMMARIZATION_THRESHOLD=2000 # ============================================================================= # Model Selection (for context operations - if still using local models) # ============================================================================= # These are optional and only used if local models are still needed # for context summarization or other operations CONTEXT_SUMMARIZATION_MODEL=Qwen/Qwen2.5-7B-Instruct CONTEXT_INTENT_MODEL=Qwen/Qwen2.5-7B-Instruct CONTEXT_SYNTHESIS_MODEL=Qwen/Qwen2.5-7B-Instruct # ============================================================================= # Security Notes # ============================================================================= # - Never commit .env file to version control # - Keep API keys secret and rotate them regularly # - Use environment variables in production (not .env files) # - Set proper file permissions: chmod 600 .env