""" Feather DB — Interactive Demo HuggingFace Space · feather-db v0.6.0 Demonstrates: - Semantic search over a pre-loaded knowledge graph - Context chain (vector search + graph BFS expansion) - Graph health report - feather_why — retrieval score breakdown - Add new intel nodes live """ import hashlib import json import time import gradio as gr import numpy as np try: import feather_db _FEATHER_OK = True except ImportError: _FEATHER_OK = False # ── Offline embedder (no API key needed) ────────────────────────────────────── def _embed(text: str, dim: int = 768) -> np.ndarray: vec = np.zeros(dim, dtype=np.float32) tokens = text.lower().replace(",", " ").replace(".", " ").split() for tok in tokens: h = int(hashlib.md5(tok.encode()).hexdigest(), 16) for j in range(8): vec[(h >> (j * 5)) % dim] += 1.0 / (j + 1) norm = np.linalg.norm(vec) return (vec / norm) if norm > 0 else vec # ── Seed knowledge graph — AI developer tools / product intelligence ────────── # # Domain: a team building an AI-powered developer tool (editor, CLI, SDK, cloud) # tracks feature performance, competitor moves, community signals, and strategy. # All data points are realistic and meaningful for this domain. # SEED_NODES = [ ( 1, "AI autocomplete in the editor: 68% daily active usage, avg 12 completions accepted per session. " "Highest adoption of any feature shipped this quarter. Strongest signal in power-user cohort.", "feature_performance", "Editor", 0.92, ), ( 2, "Competitor launched inline AI debugging with natural-language error explanations. " "3,400 GitHub stars in 48 hours. Announcement dominated dev Twitter for two days. " "Directly targets our core editor user base.", "competitor_intel", "Editor", 0.95, ), ( 3, "StackOverflow Developer Survey 2026: 71% of developers now use AI coding assistants daily, " "up from 44% last year. Willingness to pay for productivity tools at an all-time high. " "Enterprise segment growing fastest.", "market_signal", "SDK", 0.90, ), ( 4, "CLI onboarding funnel: 34% of new users drop off at step 3 (API key setup). " "Median time-to-first-output is 4.2 minutes — well above our 90-second target. " "Friction is authentication, not comprehension.", "user_feedback", "CLI", 0.87, ), ( 5, "SDK v2 launched with streaming and tool-use support. Download velocity 2.1x faster than SDK v1 " "in the first week. Community PRs opened within 6 hours of release. " "Streaming is the most-requested missing feature now resolved.", "feature_performance", "SDK", 0.89, ), ( 6, "Strategy brief: reduce time-to-first-value under 90 seconds for all entry points. " "Frictionless auth (OAuth + token auto-detect) identified as the highest-leverage lever. " "Target: onboarding completion rate from 66% to 85% in Q2.", "strategy_brief", "CLI", 0.93, ), ( 7, "Community Discord: offline / air-gapped mode has 47 upvotes and is the top feature request. " "Users cite enterprise security policy and data-residency requirements. " "Three Fortune 500 pilots blocked specifically by this gap.", "community_signal", "Cloud", 0.88, ), ( 8, "VS Code extension outperforms JetBrains plugin 3.1x in weekly active users and 4.8x in session length. " "Recommend 70/30 investment split. JetBrains users skew toward Java/Kotlin — " "worth a targeted language-server improvement sprint.", "channel_insight", "Editor", 0.86, ), ( 9, "Retention analysis: power users (5+ sessions/week) show 8.4x 90-day retention vs casual users. " "Habit formation — not feature breadth — is the primary retention driver. " "Users who complete 3 sessions in week 1 have 72% chance of being active at day 90.", "user_feedback", "SDK", 0.91, ), ( 10, "Open-source alternative launched under MIT license: 12k GitHub stars in first month. " "No cloud sync, no team features, local-only. Actively targeting our free-tier users " "with 'no vendor lock-in' messaging. Poses risk to top-of-funnel acquisition.", "competitor_intel", "Cloud", 0.93, ), ] SEED_EDGES = [ (2, 1, "contradicts", 0.90), # competitor launch threatens editor feature lead (3, 5, "supports", 0.85), # market survey supports SDK investment (4, 6, "references", 0.92), # onboarding drop-off directly informs strategy brief (6, 4, "derived_from", 0.88), # strategy brief derived from CLI feedback (8, 1, "supports", 0.78), # VS Code dominance supports editor focus (9, 6, "supports", 0.87), # retention data supports onboarding strategy (10, 7, "supports", 0.80), # OSS competitor validates offline mode demand (3, 1, "supports", 0.75), # rising AI adoption supports editor feature investment ] DIM = 768 _DB_PATH = "/tmp/feather_demo.feather" _db = None def _get_db(): global _db if _db is not None: return _db if not _FEATHER_OK: return None db = feather_db.DB.open(_DB_PATH, dim=DIM) t0 = int(time.time()) - 7 * 86400 # seed nodes spread across last 7 days for nid, content, etype, product, imp in SEED_NODES: vec = _embed(content, DIM) meta = feather_db.Metadata() meta.timestamp = t0 + nid * 14400 # 4-hour intervals meta.importance = imp meta.confidence = 0.9 meta.type = feather_db.ContextType.FACT meta.source = "demo_seed" meta.content = content meta.namespace_id = "devtools" meta.entity_id = etype meta.set_attribute("entity_type", etype) meta.set_attribute("product", product) db.add(id=nid, vec=vec, meta=meta) for src, tgt, rel, w in SEED_EDGES: db.link(src, tgt, rel, w) db.save() _db = db return db # ── Tool implementations ─────────────────────────────────────────────────────── def do_search(query: str, k: int, product_filter: str) -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed. Run: pip install feather-db" if not query.strip(): return "Enter a query above." vec = _embed(query, DIM) results = db.search(vec, k=k * 3) rows = [] for r in results: m = r.metadata p = m.get_attribute("product") if product_filter and product_filter != "All" and p != product_filter: continue rows.append({ "id": r.id, "score": round(r.score, 4), "entity_type": m.get_attribute("entity_type"), "product": p, "content": m.content, "recall_count": m.recall_count, "importance": round(m.importance, 3), }) if len(rows) >= k: break if not rows: return "No results found." return json.dumps(rows, indent=2) def do_context_chain(query: str, k: int, hops: int) -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed." if not query.strip(): return "Enter a query above." vec = _embed(query, DIM) chain = db.context_chain(vec, k=k, hops=hops, modality="text") nodes = [] for node in sorted(chain.nodes, key=lambda n: (n.hop, -n.score)): m = node.metadata nodes.append({ "id": node.id, "hop": node.hop, "score": round(node.score, 4), "entity_type": m.get_attribute("entity_type"), "product": m.get_attribute("product"), "content": m.content[:140] + ("…" if len(m.content) > 140 else ""), }) edges = [ {"source": e.source, "target": e.target, "rel_type": e.rel_type, "weight": round(e.weight, 3)} for e in chain.edges ] return json.dumps({ "summary": f"{len(nodes)} nodes reached across {hops} graph hop(s)", "nodes": nodes, "edges": edges, }, indent=2) def do_why(node_id: int, query: str) -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed." if not query.strip(): return "Enter a query above." from feather_db.memory import MemoryManager vec = _embed(query, DIM) result = MemoryManager.why_retrieved(db, node_id=int(node_id), query_vec=vec) return json.dumps(result, indent=2) def do_health() -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed." from feather_db.memory import MemoryManager report = MemoryManager.health_report(db, modality="text") return json.dumps(report, indent=2) def do_add(content: str, entity_type: str, product: str, importance: float) -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed." if not content.strip(): return "Content cannot be empty." nid = int(time.time() * 1000) % (2 ** 32) vec = _embed(content, DIM) meta = feather_db.Metadata() meta.timestamp = int(time.time()) meta.importance = float(importance) meta.confidence = 0.85 meta.type = feather_db.ContextType.EVENT meta.source = "gradio_user" meta.content = content meta.namespace_id = "devtools" meta.entity_id = entity_type meta.set_attribute("entity_type", entity_type) meta.set_attribute("product", product) db.add(id=nid, vec=vec, meta=meta) db.save() return json.dumps({ "status": "added", "id": nid, "entity_type": entity_type, "product": product, "tip": "Node is now live — try searching for it in the Search tab.", }) # ── Preload on startup ──────────────────────────────────────────────────────── _get_db() # ── Gradio UI ───────────────────────────────────────────────────────────────── with gr.Blocks( title="Feather DB — Living Context Engine", theme=gr.themes.Soft(), css=".tool-output { font-family: monospace; font-size: 0.84rem; }", ) as demo: gr.HTML("""
Embedded vector DB · HNSW search · typed context graph · adaptive decay · MCP server
getfeather.store ·
Hawky.ai ·
PyPI ·
GitHub ·
pip install feather-db