# ============================================================================= # Research AI Assistant API - Environment Configuration # ============================================================================= # Copy this content to a file named .env and fill in your actual values # Never commit .env to version control! # ============================================================================= # Novita AI Configuration (REQUIRED) # ============================================================================= # Get your API key from: https://novita.ai NOVITA_API_KEY=sk_gaMaeJaUy-qQxms1NIgJuov_RotL_NZXMoQbJlNhS6M # Dedicated endpoint base URL (default for dedicated endpoints) NOVITA_BASE_URL=https://api.novita.ai/dedicated/v1/openai # Your dedicated endpoint model ID # Format: model-name:endpoint-id NOVITA_MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:de-1a706eeafbf3ebc2 # ============================================================================= # DeepSeek-R1 Optimized Settings # ============================================================================= # Temperature: 0.5-0.7 range (0.6 recommended for DeepSeek-R1) DEEPSEEK_R1_TEMPERATURE=0.6 # Force reasoning trigger: Enable to ensure DeepSeek-R1 uses reasoning pattern # Set to True to add `` prefix for reasoning tasks DEEPSEEK_R1_FORCE_REASONING=True # ============================================================================= # Token Allocation Configuration # ============================================================================= # Maximum tokens dedicated for user input (prioritized over context) # Recommended: 32000 tokens for DeepSeek R1 (128K context window) USER_INPUT_MAX_TOKENS=32000 # Maximum tokens for context preparation (includes user input + context) # Recommended: 115000 tokens for DeepSeek R1 (leaves ~13K for output) CONTEXT_PREPARATION_BUDGET=115000 # Context pruning threshold (should match context_preparation_budget) CONTEXT_PRUNING_THRESHOLD=115000 # Always prioritize user input over historical context PRIORITIZE_USER_INPUT=True # Model context window (actual limit for your deployed model) # Default: 128000 tokens for DeepSeek R1 (128K context window) # This is the maximum total tokens (input + output) the model can handle # Take full advantage of DeepSeek R1's 128K capability NOVITA_MODEL_CONTEXT_WINDOW=128000 # ============================================================================= # Database Configuration # ============================================================================= # SQLite database path (default: sessions.db) # Use /tmp/ for Docker/containerized environments DB_PATH=sessions.db # FAISS index path for embeddings (default: embeddings.faiss) FAISS_INDEX_PATH=embeddings.faiss # ============================================================================= # Cache Configuration # ============================================================================= # HuggingFace cache directory (for any remaining model downloads) HF_HOME=~/.cache/huggingface TRANSFORMERS_CACHE=~/.cache/huggingface # HuggingFace token (optional - only needed if using gated models) HF_TOKEN= # Cache TTL in seconds (default: 3600 = 1 hour) CACHE_TTL=3600 # ============================================================================= # Session Configuration # ============================================================================= # Session timeout in seconds (default: 3600 = 1 hour) SESSION_TIMEOUT=3600 # Maximum session size in megabytes (default: 10 MB) MAX_SESSION_SIZE_MB=10 # ============================================================================= # Performance Configuration # ============================================================================= # Maximum worker threads for parallel processing (default: 4) MAX_WORKERS=4 # ============================================================================= # Mobile Optimization # ============================================================================= # Maximum tokens for mobile responses (default: 1200) # Increased from 800 to allow better responses on mobile MOBILE_MAX_TOKENS=1200 # Mobile request timeout in milliseconds (default: 15000) MOBILE_TIMEOUT=15000 # ============================================================================= # API Configuration # ============================================================================= # Flask/Gradio server port (default: 7860) GRADIO_PORT=7860 # Server host (default: 0.0.0.0 for all interfaces) GRADIO_HOST=0.0.0.0 # ============================================================================= # Logging Configuration # ============================================================================= # Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL (default: INFO) LOG_LEVEL=INFO # Log format: json or text (default: json) LOG_FORMAT=json # Log directory (default: /tmp/logs) LOG_DIR=/tmp/logs # ============================================================================= # Context Configuration # ============================================================================= # Maximum context tokens (default: 4000) # Note: This is overridden by CONTEXT_PREPARATION_BUDGET if set MAX_CONTEXT_TOKENS=4000 # Cache TTL for context in seconds (default: 300 = 5 minutes) CACHE_TTL_SECONDS=300 # Maximum cache size (default: 100) MAX_CACHE_SIZE=100 # Enable parallel processing (default: True) PARALLEL_PROCESSING=True # Context decay factor (default: 0.8) CONTEXT_DECAY_FACTOR=0.8 # Maximum interactions to keep in context (default: 10) MAX_INTERACTIONS_TO_KEEP=10 # Enable metrics collection (default: True) ENABLE_METRICS=True # Enable context compression (default: True) COMPRESSION_ENABLED=True # Summarization threshold in tokens (default: 2000) SUMMARIZATION_THRESHOLD=2000 # ============================================================================= # Model Selection (for context operations - if still using local models) # ============================================================================= # These are optional and only used if local models are still needed # for context summarization or other operations CONTEXT_SUMMARIZATION_MODEL=Qwen/Qwen2.5-7B-Instruct CONTEXT_INTENT_MODEL=Qwen/Qwen2.5-7B-Instruct CONTEXT_SYNTHESIS_MODEL=Qwen/Qwen2.5-7B-Instruct # ============================================================================= # Security Notes # ============================================================================= # - Never commit .env file to version control # - Keep API keys secret and rotate them regularly # - Use environment variables in production (not .env files) # - Set proper file permissions: chmod 600 .env