import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'

REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))

CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB

def load(name):
    p = CACHE / f'{name}.json'
    if p.exists():
        return json.loads(p.read_text())
    return {}

db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
    db = sqlite3.connect(str(db_path))
    prov = pd.read_sql_query('''
        SELECT skill_id, status, COUNT(*) AS n_calls,
               ROUND(AVG(duration_ms),0) AS mean_ms
        FROM tool_calls
        WHERE created_at >= date('now','-30 days')
        GROUP BY skill_id, status
        ORDER BY n_calls DESC
    ''', db)
    db.close()
    prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
    print(f'{len(prov)} tool-call aggregates (last 30 days):')
    prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
    print(f'Provenance unavailable: {e}')

181 tool-call aggregates (last 30 days):

ann_rows = []
for g in ['AXIS', 'PATHWAY', 'TYROBP']:
    mg = load(f'mygene_{g}')
    hpa = load(f'hpa_{g}')
    if not mg and not hpa:
        ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
                         'disease_involvement': '—'})
        continue
    ann_rows.append({
        'gene': g,
        'name': (mg.get('name') or '')[:55],
        'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
                        if isinstance(hpa.get('protein_class'), list)
                        else str(hpa.get('protein_class') or '—')[:55],
        'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
                              if isinstance(hpa.get('disease_involvement'), list)
                              else str(hpa.get('disease_involvement') or '')[:55],
    })
pd.DataFrame(ann_rows)

go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
    go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
    go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
    go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
    go_df['term'] = go_df['term'].str[:60]
    go_df['n_hits'] = go_df['genes'].apply(len)
    go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
    go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
    print('No GO:BP enrichment data')

# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
    top = go_bp[:8]
    terms = [t['term'][:45] for t in top][::-1]
    neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
    fig, ax = plt.subplots(figsize=(9, 4.5))
    ax.barh(terms, neglogp, color='#4fc3f7')
    ax.set_xlabel('-log10(p-value)')
    ax.set_title('Top GO:BP enrichment (Enrichr)')
    ax.grid(axis='x', alpha=0.3)
    plt.tight_layout(); plt.show()
else:
    print('No GO:BP data to plot')

kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
    kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
    kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
    kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
    kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
    kegg_df
else:
    print('No KEGG enrichment data')

No KEGG enrichment data

ppi = load('string_network')
if isinstance(ppi, list) and ppi:
    ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
    display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
    print(f'{len(ppi_df)} STRING edges')
    ppi_df[display_cols].head(20)
else:
    print('No STRING edges returned')

11 STRING edges

# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
    import math
    nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
    n = len(nodes)
    pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
    fig, ax = plt.subplots(figsize=(7, 7))
    for e in ppi:
        x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
        ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
                linewidth=0.5+2*e['score'])
    for name,(x,y) in pos.items():
        ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
        ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
    ax.set_aspect('equal'); ax.axis('off')
    ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
    plt.tight_layout(); plt.show()
else:
    print('No STRING data to visualize')

pw_rows = []
for g in ['AXIS', 'PATHWAY', 'TYROBP']:
    pws = load(f'reactome_{g}')
    if isinstance(pws, list):
        pw_rows.append({'gene': g, 'n_pathways': len(pws),
                        'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
    else:
        pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)

ish_rows = []
for g in ['AXIS', 'PATHWAY', 'TYROBP']:
    ish = load(f'allen_ish_{g}')
    regions = ish.get('regions') or [] if isinstance(ish, dict) else []
    ish_rows.append({
        'gene': g,
        'n_ish_regions': len(regions),
        'top_region': (regions[0].get('structure','') if regions else '—')[:45],
        'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
    })
pd.DataFrame(ish_rows)

hyp_data = [('PLCG2 Allosteric Modulation as a Precision Therapeutic ', 0.936), ('TREM2-APOE Axis Dissociation for Selective DAM Activati', 0.89), ('TYROBP (DAP12) Conditional Antagonism for Early-Stage N', 0.844), ('CSF1R-TREM2 Co-Agonism for Sustained Microglial Expansi', 0.828), ('TREM2-mTOR Co-Agonism for Metabolic Reprogramming', 0.811), ('CX3CR1-TREM2 Integration for Synapse Pruning Normalizat', 0.776), ('INPP5D (SHIP1) Inhibition to Shift Microglial Polarizat', 0.758)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('TREM2 agonism vs antagonism in DAM microglia')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()

labels = ['PLCG2 Allosteric Modulation as a Precisi', 'TREM2-APOE Axis Dissociation for Selecti', 'TYROBP (DAP12) Conditional Antagonism fo', 'CSF1R-TREM2 Co-Agonism for Sustained Mic', 'TREM2-mTOR Co-Agonism for Metabolic Repr', 'CX3CR1-TREM2 Integration for Synapse Pru', 'INPP5D (SHIP1) Inhibition to Shift Micro']
matrix = np.array([[0.85, 0.42, 0.72, 0.75, 0, 0.58, 0.62, 0.35, 0.48], [0.78, 0.35, 0.68, 0.7, 0, 0.6, 0.58, 0.28, 0.42], [0.82, 0.28, 0.58, 0.55, 0, 0.55, 0.52, 0.25, 0.38], [0.72, 0.25, 0.62, 0.58, 0, 0.48, 0.52, 0.22, 0.45], [0.7, 0.22, 0.65, 0.52, 0, 0.52, 0.48, 0.18, 0.35], [0.65, 0.25, 0.55, 0.48, 0, 0.45, 0.42, 0.22, 0.4], [0.72, 0.22, 0.48, 0.42, 0, 0.35, 0.38, 0.32, 0.32]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
    fig, ax = plt.subplots(figsize=(10, 5))
    im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
    ax.set_xticks(range(len(dims)))
    ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
                       rotation=45, ha='right', fontsize=8)
    ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
    ax.set_title('Score dimensions — top hypotheses')
    plt.colorbar(im, ax=ax, shrink=0.8)
    plt.tight_layout(); plt.show()
else:
    print('No score data available')

hid = 'h-0f025d94'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-5b378bd3'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-f503b337'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-0cbe9bac'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-2e03f316'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-7597968b'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-39148342'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

edge_data = [{'source': 'h-0f025d94', 'relation': 'targets', 'target': 'PLCG2', 'strength': 0.5}, {'source': 'h-5b378bd3', 'relation': 'targets', 'target': 'TREM2-APOE axis', 'strength': 0.5}, {'source': 'h-0cbe9bac', 'relation': 'targets', 'target': 'CSF1R-TREM2', 'strength': 0.5}, {'source': 'h-f503b337', 'relation': 'targets', 'target': 'TYROBP', 'strength': 0.5}, {'source': 'h-2e03f316', 'relation': 'targets', 'target': 'TREM2-mTOR pathway', 'strength': 0.5}, {'source': 'h-7597968b', 'relation': 'targets', 'target': 'CX3CR1-TREM2', 'strength': 0.5}, {'source': 'h-39148342', 'relation': 'targets', 'target': 'INPP5D', 'strength': 0.5}, {'source': 'PLCG2', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'PLCG2', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'TREM2-APOE axis', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'TREM2-APOE axis', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'CSF1R-TREM2', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'CSF1R-TREM2', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'TYROBP', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'TREM2-mTOR pathway', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'TREM2-mTOR pathway', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'CX3CR1-TREM2', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'CX3CR1-TREM2', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'INPP5D', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'INPP5D', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.4}, {'source': 'PLCG2', 'relation': 'co_associated_with', 'target': 'TREM2', 'strength': 0.3}, {'source': 'TREM2-APOE axis', 'relation': 'co_associated_with', 'target': 'TREM2', 'strength': 0.3}, {'source': 'TREM2-APOE axis', 'relation': 'co_associated_with', 'target': 'APOE', 'strength': 0.3}, {'source': 'TREM2-APOE axis', 'relation': 'co_associated_with', 'target': 'DAM', 'strength': 0.3}, {'source': 'CSF1R-TREM2', 'relation': 'co_associated_with', 'target': 'CSF1R', 'strength': 0.3}]
if edge_data:
    pd.DataFrame(edge_data).head(25)
else:
    print('No KG edge data available')

SciDEX Analysis: 2026 04 01 Gap 001

TREM2 agonism vs antagonism in DAM microglia¶

Research question¶

Approach¶

Debate Summary¶

1. Forge tool provenance¶

2. Target gene annotations¶

3. GO Biological Process enrichment (Enrichr)¶

4. KEGG pathway enrichment¶

5. STRING protein interaction network¶

6. Reactome pathway footprint¶

7. Allen Brain Atlas ISH regional expression¶

8. Hypothesis ranking (7 hypotheses)¶

9. Score dimension heatmap (top 10)¶

10. PubMed evidence per hypothesis¶

Hypothesis 1: PLCG2 Allosteric Modulation as a Precision Therapeutic for TREM2-Depen¶

Hypothesis 2: TREM2-APOE Axis Dissociation for Selective DAM Activation¶

Hypothesis 3: TYROBP (DAP12) Conditional Antagonism for Early-Stage Neuroprotection¶

Hypothesis 4: CSF1R-TREM2 Co-Agonism for Sustained Microglial Expansion¶

Hypothesis 5: TREM2-mTOR Co-Agonism for Metabolic Reprogramming¶

Hypothesis 6: CX3CR1-TREM2 Integration for Synapse Pruning Normalization¶

Hypothesis 7: INPP5D (SHIP1) Inhibition to Shift Microglial Polarization¶

11. Knowledge graph edges (31 total)¶

12. Caveats¶

	gene	name	protein_class	disease_involvement
0	AXIS	—	—	—
1	PATHWAY	—	—	—
2	TYROBP	transmembrane immune signaling adaptor TYROBP	Disease related genes, Human disease related g...

	gene	n_pathways	top_pathway
2	TYROBP	6	Immunoregulatory interactions between a Lympho...
0	AXIS	0	—
1	PATHWAY	0	—