""" Feather DB — Interactive Demo HuggingFace Space · feather-db v0.6.0 Demonstrates: - Semantic search over a pre-loaded knowledge graph - Context chain (vector search + graph BFS expansion) - Graph health report - feather_why — retrieval score breakdown - Add new intel nodes live """ import hashlib import json import time import gradio as gr import numpy as np try: import feather_db _FEATHER_OK = True except ImportError: _FEATHER_OK = False # ── Offline embedder (no API key needed) ────────────────────────────────────── def _embed(text: str, dim: int = 768) -> np.ndarray: vec = np.zeros(dim, dtype=np.float32) tokens = text.lower().replace(",", " ").replace(".", " ").split() for tok in tokens: h = int(hashlib.md5(tok.encode()).hexdigest(), 16) for j in range(8): vec[(h >> (j * 5)) % dim] += 1.0 / (j + 1) norm = np.linalg.norm(vec) return (vec / norm) if norm > 0 else vec # ── Seed knowledge graph — AI developer tools / product intelligence ────────── # # Domain: a team building an AI-powered developer tool (editor, CLI, SDK, cloud) # tracks feature performance, competitor moves, community signals, and strategy. # All data points are realistic and meaningful for this domain. # SEED_NODES = [ ( 1, "AI autocomplete in the editor: 68% daily active usage, avg 12 completions accepted per session. " "Highest adoption of any feature shipped this quarter. Strongest signal in power-user cohort.", "feature_performance", "Editor", 0.92, ), ( 2, "Competitor launched inline AI debugging with natural-language error explanations. " "3,400 GitHub stars in 48 hours. Announcement dominated dev Twitter for two days. " "Directly targets our core editor user base.", "competitor_intel", "Editor", 0.95, ), ( 3, "StackOverflow Developer Survey 2026: 71% of developers now use AI coding assistants daily, " "up from 44% last year. Willingness to pay for productivity tools at an all-time high. " "Enterprise segment growing fastest.", "market_signal", "SDK", 0.90, ), ( 4, "CLI onboarding funnel: 34% of new users drop off at step 3 (API key setup). " "Median time-to-first-output is 4.2 minutes — well above our 90-second target. " "Friction is authentication, not comprehension.", "user_feedback", "CLI", 0.87, ), ( 5, "SDK v2 launched with streaming and tool-use support. Download velocity 2.1x faster than SDK v1 " "in the first week. Community PRs opened within 6 hours of release. " "Streaming is the most-requested missing feature now resolved.", "feature_performance", "SDK", 0.89, ), ( 6, "Strategy brief: reduce time-to-first-value under 90 seconds for all entry points. " "Frictionless auth (OAuth + token auto-detect) identified as the highest-leverage lever. " "Target: onboarding completion rate from 66% to 85% in Q2.", "strategy_brief", "CLI", 0.93, ), ( 7, "Community Discord: offline / air-gapped mode has 47 upvotes and is the top feature request. " "Users cite enterprise security policy and data-residency requirements. " "Three Fortune 500 pilots blocked specifically by this gap.", "community_signal", "Cloud", 0.88, ), ( 8, "VS Code extension outperforms JetBrains plugin 3.1x in weekly active users and 4.8x in session length. " "Recommend 70/30 investment split. JetBrains users skew toward Java/Kotlin — " "worth a targeted language-server improvement sprint.", "channel_insight", "Editor", 0.86, ), ( 9, "Retention analysis: power users (5+ sessions/week) show 8.4x 90-day retention vs casual users. " "Habit formation — not feature breadth — is the primary retention driver. " "Users who complete 3 sessions in week 1 have 72% chance of being active at day 90.", "user_feedback", "SDK", 0.91, ), ( 10, "Open-source alternative launched under MIT license: 12k GitHub stars in first month. " "No cloud sync, no team features, local-only. Actively targeting our free-tier users " "with 'no vendor lock-in' messaging. Poses risk to top-of-funnel acquisition.", "competitor_intel", "Cloud", 0.93, ), ] SEED_EDGES = [ (2, 1, "contradicts", 0.90), # competitor launch threatens editor feature lead (3, 5, "supports", 0.85), # market survey supports SDK investment (4, 6, "references", 0.92), # onboarding drop-off directly informs strategy brief (6, 4, "derived_from", 0.88), # strategy brief derived from CLI feedback (8, 1, "supports", 0.78), # VS Code dominance supports editor focus (9, 6, "supports", 0.87), # retention data supports onboarding strategy (10, 7, "supports", 0.80), # OSS competitor validates offline mode demand (3, 1, "supports", 0.75), # rising AI adoption supports editor feature investment ] DIM = 768 _DB_PATH = "/tmp/feather_demo.feather" _db = None def _get_db(): global _db if _db is not None: return _db if not _FEATHER_OK: return None db = feather_db.DB.open(_DB_PATH, dim=DIM) t0 = int(time.time()) - 7 * 86400 # seed nodes spread across last 7 days for nid, content, etype, product, imp in SEED_NODES: vec = _embed(content, DIM) meta = feather_db.Metadata() meta.timestamp = t0 + nid * 14400 # 4-hour intervals meta.importance = imp meta.confidence = 0.9 meta.type = feather_db.ContextType.FACT meta.source = "demo_seed" meta.content = content meta.namespace_id = "devtools" meta.entity_id = etype meta.set_attribute("entity_type", etype) meta.set_attribute("product", product) db.add(id=nid, vec=vec, meta=meta) for src, tgt, rel, w in SEED_EDGES: db.link(src, tgt, rel, w) db.save() _db = db return db # ── Tool implementations ─────────────────────────────────────────────────────── def do_search(query: str, k: int, product_filter: str) -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed. Run: pip install feather-db" if not query.strip(): return "Enter a query above." vec = _embed(query, DIM) results = db.search(vec, k=k * 3) rows = [] for r in results: m = r.metadata p = m.get_attribute("product") if product_filter and product_filter != "All" and p != product_filter: continue rows.append({ "id": r.id, "score": round(r.score, 4), "entity_type": m.get_attribute("entity_type"), "product": p, "content": m.content, "recall_count": m.recall_count, "importance": round(m.importance, 3), }) if len(rows) >= k: break if not rows: return "No results found." return json.dumps(rows, indent=2) def do_context_chain(query: str, k: int, hops: int) -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed." if not query.strip(): return "Enter a query above." vec = _embed(query, DIM) chain = db.context_chain(vec, k=k, hops=hops, modality="text") nodes = [] for node in sorted(chain.nodes, key=lambda n: (n.hop, -n.score)): m = node.metadata nodes.append({ "id": node.id, "hop": node.hop, "score": round(node.score, 4), "entity_type": m.get_attribute("entity_type"), "product": m.get_attribute("product"), "content": m.content[:140] + ("…" if len(m.content) > 140 else ""), }) edges = [ {"source": e.source, "target": e.target, "rel_type": e.rel_type, "weight": round(e.weight, 3)} for e in chain.edges ] return json.dumps({ "summary": f"{len(nodes)} nodes reached across {hops} graph hop(s)", "nodes": nodes, "edges": edges, }, indent=2) def do_why(node_id: int, query: str) -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed." if not query.strip(): return "Enter a query above." from feather_db.memory import MemoryManager vec = _embed(query, DIM) result = MemoryManager.why_retrieved(db, node_id=int(node_id), query_vec=vec) return json.dumps(result, indent=2) def do_health() -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed." from feather_db.memory import MemoryManager report = MemoryManager.health_report(db, modality="text") return json.dumps(report, indent=2) def do_add(content: str, entity_type: str, product: str, importance: float) -> str: db = _get_db() if db is None: return "⚠️ feather_db not installed." if not content.strip(): return "Content cannot be empty." nid = int(time.time() * 1000) % (2 ** 32) vec = _embed(content, DIM) meta = feather_db.Metadata() meta.timestamp = int(time.time()) meta.importance = float(importance) meta.confidence = 0.85 meta.type = feather_db.ContextType.EVENT meta.source = "gradio_user" meta.content = content meta.namespace_id = "devtools" meta.entity_id = entity_type meta.set_attribute("entity_type", entity_type) meta.set_attribute("product", product) db.add(id=nid, vec=vec, meta=meta) db.save() return json.dumps({ "status": "added", "id": nid, "entity_type": entity_type, "product": product, "tip": "Node is now live — try searching for it in the Search tab.", }) # ── Preload on startup ──────────────────────────────────────────────────────── _get_db() # ── Gradio UI ───────────────────────────────────────────────────────────────── with gr.Blocks( title="Feather DB — Living Context Engine", theme=gr.themes.Soft(), css=".tool-output { font-family: monospace; font-size: 0.84rem; }", ) as demo: gr.HTML("""

🪶 Feather DB — Living Context Engine

Embedded vector DB · HNSW search · typed context graph · adaptive decay · MCP server
getfeather.store · Hawky.ai · PyPI · GitHub · pip install feather-db

""") gr.Markdown(""" **Demo graph:** 10 nodes representing product intelligence for an AI developer tools team — feature performance, competitor moves, community signals, strategy briefs, and user research. 8 typed causal edges connect them (`contradicts`, `supports`, `derived_from`, `references`). """) with gr.Tabs(): # ── Search ──────────────────────────────────────────────────────────── with gr.TabItem("🔍 Semantic Search"): gr.Markdown("Find nodes by **meaning**, not keywords. Filtered by product or entity type.") with gr.Row(): with gr.Column(scale=2): s_query = gr.Textbox(label="Query", placeholder="Why is user onboarding failing?") s_k = gr.Slider(1, 10, value=5, step=1, label="Top-k") s_product = gr.Dropdown(["All","Editor","CLI","SDK","Cloud"], value="All", label="Product filter") s_btn = gr.Button("Search", variant="primary") with gr.Column(scale=3): s_out = gr.Code(label="Results", language="json", elem_classes=["tool-output"]) gr.Examples( examples=[ ["Why is user onboarding failing?", 5, "All"], ["What competitor moves should we watch?", 5, "All"], ["Which features drive retention?", 5, "SDK"], ["What does the community want most?", 5, "Cloud"], ["Where should we invest in the editor?", 5, "Editor"], ], inputs=[s_query, s_k, s_product], ) s_btn.click(do_search, [s_query, s_k, s_product], s_out) # ── Context Chain ───────────────────────────────────────────────────── with gr.TabItem("🕸️ Context Chain"): gr.Markdown( "**Two-phase retrieval** — vector search finds seed nodes (hop 0), " "then BFS expands outward over typed graph edges.\n\n" "Use this to trace root causes: *start from a symptom, surface the events that explain it.*" ) with gr.Row(): with gr.Column(scale=2): c_query = gr.Textbox(label="Seed query", placeholder="CLI adoption is slow") c_k = gr.Slider(1, 5, value=3, step=1, label="Seed nodes (k)") c_hops = gr.Slider(1, 3, value=2, step=1, label="Graph hops") c_btn = gr.Button("Run Context Chain", variant="primary") with gr.Column(scale=3): c_out = gr.Code(label="Chain result", language="json", elem_classes=["tool-output"]) gr.Examples( examples=[ ["CLI adoption is slow", 3, 2], ["Why is the competitor threat serious?", 3, 2], ["What drives long-term user retention?", 3, 2], ["Why do enterprise deals stall?", 3, 1], ], inputs=[c_query, c_k, c_hops], ) c_btn.click(do_context_chain, [c_query, c_k, c_hops], c_out) # ── Why Retrieved ───────────────────────────────────────────────────── with gr.TabItem("🔬 Why Retrieved?"): gr.Markdown( "Score breakdown for any node — **similarity**, **stickiness** (recall bonus), " "**recency** (adaptive decay), **importance**, **confidence**, and the full formula.\n\n" "Use to understand and debug retrieval decisions." ) with gr.Row(): with gr.Column(scale=2): w_id = gr.Number(label="Node ID (1–10)", value=4, precision=0) w_query = gr.Textbox(label="Query", placeholder="onboarding drop-off") w_btn = gr.Button("Explain", variant="primary") with gr.Column(scale=3): w_out = gr.Code(label="Score breakdown", language="json", elem_classes=["tool-output"]) gr.Examples( examples=[ [4, "onboarding drop-off time to value"], [2, "competitor launch editor feature"], [9, "retention power users habit"], [7, "offline mode enterprise security"], [6, "strategy brief Q2 auth friction"], ], inputs=[w_id, w_query], ) w_btn.click(do_why, [w_id, w_query], w_out) # ── Health ──────────────────────────────────────────────────────────── with gr.TabItem("🩺 Graph Health"): gr.Markdown( "Snapshot of the knowledge graph: **hot / warm / cold** tier distribution, " "orphan nodes, expired TTL count, recall histogram, avg importance and confidence." ) h_btn = gr.Button("Run Health Check", variant="primary") h_out = gr.Code(label="Health report", language="json", elem_classes=["tool-output"]) h_btn.click(do_health, [], h_out) # ── Add Intel ───────────────────────────────────────────────────────── with gr.TabItem("➕ Add Intel"): gr.Markdown( "Ingest a new intelligence node into the live graph. " "It becomes **immediately searchable** — try adding something then switching to Search." ) with gr.Row(): with gr.Column(): a_content = gr.Textbox( label="Content", lines=3, placeholder="Competitor Y just open-sourced their SDK. " "10k stars overnight. Targets our developer acquisition funnel.", ) a_etype = gr.Dropdown( ["competitor_intel", "feature_performance", "user_feedback", "strategy_brief", "market_signal", "community_signal", "channel_insight"], value="competitor_intel", label="Entity Type", ) a_product = gr.Dropdown(["Editor","CLI","SDK","Cloud"], value="SDK", label="Product") a_importance = gr.Slider(0.0, 1.0, value=0.85, step=0.05, label="Importance") a_btn = gr.Button("Add to Graph", variant="primary") with gr.Column(): a_out = gr.Code(label="Result", language="json", elem_classes=["tool-output"]) a_btn.click(do_add, [a_content, a_etype, a_product, a_importance], a_out) gr.Markdown(""" --- **Connect Feather DB to any LLM in 5 lines:** ```python pip install feather-db from feather_db.integrations import ClaudeConnector conn = ClaudeConnector(db_path="my.feather", dim=3072, embedder=embed_fn) result = conn.run_loop(client, messages=[{"role": "user", "content": "Why is onboarding drop-off so high?"}], model="claude-opus-4-6") ``` Works with **Claude · OpenAI · Gemini · Groq · Mistral · Ollama · MCP (Claude Desktop, Cursor)** [getfeather.store](https://www.getfeather.store/) · [Hawky.ai](https://hawky.ai) · [PyPI](https://pypi.org/project/feather-db/) · [GitHub](https://github.com/feather-store/feather) · [Integrations Guide](https://github.com/feather-store/feather/blob/main/docs/integrations.md) """) if __name__ == "__main__": demo.launch()