import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'

REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))

CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB

def load(name):
    p = CACHE / f'{name}.json'
    if p.exists():
        return json.loads(p.read_text())
    return {}

db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
    db = sqlite3.connect(str(db_path))
    prov = pd.read_sql_query('''
        SELECT skill_id, status, COUNT(*) AS n_calls,
               ROUND(AVG(duration_ms),0) AS mean_ms
        FROM tool_calls
        WHERE created_at >= date('now','-30 days')
        GROUP BY skill_id, status
        ORDER BY n_calls DESC
    ''', db)
    db.close()
    prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
    print(f'{len(prov)} tool-call aggregates (last 30 days):')
    prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
    print(f'Provenance unavailable: {e}')

77 tool-call aggregates (last 30 days):

ann_rows = []
for g in ['MCU', 'RELN']:
    mg = load(f'mygene_{g}')
    hpa = load(f'hpa_{g}')
    if not mg and not hpa:
        ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
                         'disease_involvement': '—'})
        continue
    ann_rows.append({
        'gene': g,
        'name': (mg.get('name') or '')[:55],
        'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
                        if isinstance(hpa.get('protein_class'), list)
                        else str(hpa.get('protein_class') or '—')[:55],
        'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
                              if isinstance(hpa.get('disease_involvement'), list)
                              else str(hpa.get('disease_involvement') or '')[:55],
    })
pd.DataFrame(ann_rows)

go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
    go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
    go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
    go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
    go_df['term'] = go_df['term'].str[:60]
    go_df['n_hits'] = go_df['genes'].apply(len)
    go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
    go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
    print('No GO:BP enrichment data')

# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
    top = go_bp[:8]
    terms = [t['term'][:45] for t in top][::-1]
    neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
    fig, ax = plt.subplots(figsize=(9, 4.5))
    ax.barh(terms, neglogp, color='#4fc3f7')
    ax.set_xlabel('-log10(p-value)')
    ax.set_title('Top GO:BP enrichment (Enrichr)')
    ax.grid(axis='x', alpha=0.3)
    plt.tight_layout(); plt.show()
else:
    print('No GO:BP data to plot')

kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
    kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
    kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
    kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
    kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
    kegg_df
else:
    print('No KEGG enrichment data')

No KEGG enrichment data

ppi = load('string_network')
if isinstance(ppi, list) and ppi:
    ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
    display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
    print(f'{len(ppi_df)} STRING edges')
    ppi_df[display_cols].head(20)
else:
    print('No STRING edges returned')

11 STRING edges

# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
    import math
    nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
    n = len(nodes)
    pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
    fig, ax = plt.subplots(figsize=(7, 7))
    for e in ppi:
        x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
        ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
                linewidth=0.5+2*e['score'])
    for name,(x,y) in pos.items():
        ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
        ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
    ax.set_aspect('equal'); ax.axis('off')
    ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
    plt.tight_layout(); plt.show()
else:
    print('No STRING data to visualize')

pw_rows = []
for g in ['MCU', 'RELN']:
    pws = load(f'reactome_{g}')
    if isinstance(pws, list):
        pw_rows.append({'gene': g, 'n_pathways': len(pws),
                        'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
    else:
        pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)

ish_rows = []
for g in ['MCU', 'RELN']:
    ish = load(f'allen_ish_{g}')
    regions = ish.get('regions') or [] if isinstance(ish, dict) else []
    ish_rows.append({
        'gene': g,
        'n_ish_regions': len(regions),
        'top_region': (regions[0].get('structure','') if regions else '—')[:45],
        'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
    })
pd.DataFrame(ish_rows)

hyp_data = [('HCN1-Mediated Resonance Frequency Stabilization Therapy', 0.594), ('Perforant Path Presynaptic Terminal Protection Strategy', 0.585), ('Grid Cell-Specific Metabolic Reprogramming via IDH2 Enh', 0.583), ('Tau-Independent Microtubule Stabilization via MAP6 Enha', 0.582), ('Astrocytic Lactate Shuttle Enhancement for Grid Cell Bi', 0.582), ('Mitochondrial Calcium Buffering Enhancement via MCU Mod', 0.573), ('Reelin-Mediated Cytoskeletal Stabilization Protocol', 0.558)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Selective vulnerability of entorhinal cortex layer II neurons in AD')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()

labels = ['HCN1-Mediated Resonance Frequency Stabil', 'Perforant Path Presynaptic Terminal Prot', 'Grid Cell-Specific Metabolic Reprogrammi', 'Tau-Independent Microtubule Stabilizatio', 'Astrocytic Lactate Shuttle Enhancement f', 'Mitochondrial Calcium Buffering Enhancem', 'Reelin-Mediated Cytoskeletal Stabilizati']
matrix = np.array([[0.8, 0.7, 0.4, 0.5, 0.436, 0.6, 0.5, 0.9, 0.2], [0.7, 0.8, 0.7, 0.6, 0.523, 0.7, 0.8, 0.8, 0.8], [0.8, 0.5, 0.3, 0.3, 0.509, 0.4, 0.4, 0.7, 0.2], [0.8, 0.7, 0.6, 0.7, 0.05, 0.6, 0.7, 0.7, 0.5], [0.7, 0.6, 0.4, 0.4, 0.524, 0.4, 0.5, 0.6, 0.4], [0.6, 0.5, 0.3, 0.3, 0.688, 0.5, 0.4, 0.8, 0.1], [0.9, 0.4, 0.6, 0.6, 0.675, 0.5, 0.6, 0.3, 0.5]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
    fig, ax = plt.subplots(figsize=(10, 5))
    im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
    ax.set_xticks(range(len(dims)))
    ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
                       rotation=45, ha='right', fontsize=8)
    ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
    ax.set_title('Score dimensions — top hypotheses')
    plt.colorbar(im, ax=ax, shrink=0.8)
    plt.tight_layout(); plt.show()
else:
    print('No score data available')

hid = 'h-d40d2659'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-76888762'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-57862f8a'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-e12109e3'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-5ff6c5ca'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-aa8b4952'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

hid = 'h-d2df6eaf'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
    lit = pd.DataFrame(papers)
    cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
    if cols:
        lit = lit[cols]
        lit['title'] = lit['title'].str[:80]
        if 'journal' in lit.columns:
            lit['journal'] = lit['journal'].str[:30]
        lit.sort_values('year', ascending=False, inplace=True)
        display_df = lit
    else:
        display_df = pd.DataFrame(papers[:5])
else:
    display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df

edge_data = [{'source': 'MAP6', 'relation': 'regulates', 'target': 'microtubule_stabilization', 'strength': 0.8}, {'source': 'microtubule_stabilization', 'relation': 'enables', 'target': 'axonal_transport', 'strength': 0.8}, {'source': 'axonal_transport', 'relation': 'prevents', 'target': 'neurodegeneration_protection', 'strength': 0.8}, {'source': 'PPARGC1A', 'relation': 'encodes', 'target': 'PGC1A_protein', 'strength': 0.8}, {'source': 'PGC1A_protein', 'relation': 'activates', 'target': 'mitochondrial_biogenesis', 'strength': 0.8}, {'source': 'mitochondrial_biogenesis', 'relation': 'promotes', 'target': 'perforant_path_protection', 'strength': 0.8}, {'source': 'RELN', 'relation': 'encodes', 'target': 'reelin_protein', 'strength': 0.8}, {'source': 'reelin_protein', 'relation': 'phosphorylates', 'target': 'DAB1', 'strength': 0.8}, {'source': 'DAB1', 'relation': 'promotes', 'target': 'cytoskeletal_stability', 'strength': 0.8}, {'source': 'HCN1', 'relation': 'encodes', 'target': 'HCN1_channel', 'strength': 0.8}, {'source': 'HCN1_channel', 'relation': 'mediates', 'target': 'membrane_resonance', 'strength': 0.8}, {'source': 'membrane_resonance', 'relation': 'enables', 'target': 'grid_cell_oscillations', 'strength': 0.8}, {'source': 'entorhinal_cortex_layer_II', 'relation': 'early_vulnerability', 'target': 'alzheimers_disease', 'strength': 0.8}, {'source': 'MAP6', 'relation': 'regulates', 'target': 'Tau-Independent Microtubule St', 'strength': 0.7}, {'source': 'PPARGC1A', 'relation': 'regulates', 'target': 'Perforant Path Presynaptic Ter', 'strength': 0.7}, {'source': 'RELN', 'relation': 'regulates', 'target': 'Reelin-Mediated Cytoskeletal S', 'strength': 0.7}, {'source': 'HCN1', 'relation': 'regulates', 'target': 'HCN1-Mediated Resonance Freque', 'strength': 0.7}, {'source': 'SLC16A2', 'relation': 'regulates', 'target': 'Astrocytic Lactate Shuttle Enh', 'strength': 0.7}, {'source': 'IDH2', 'relation': 'regulates', 'target': 'Grid Cell-Specific Metabolic R', 'strength': 0.7}, {'source': 'MCU', 'relation': 'regulates', 'target': 'Mitochondrial Calcium Bufferin', 'strength': 0.7}, {'source': 'Tau-Independent Microtubule St', 'relation': 'therapeutic_target', 'target': "Alzheimer's Disease", 'strength': 0.65}, {'source': 'Perforant Path Presynaptic Ter', 'relation': 'therapeutic_target', 'target': "Alzheimer's Disease", 'strength': 0.65}, {'source': 'Reelin-Mediated Cytoskeletal S', 'relation': 'therapeutic_target', 'target': "Alzheimer's Disease", 'strength': 0.65}, {'source': 'HCN1-Mediated Resonance Freque', 'relation': 'therapeutic_target', 'target': "Alzheimer's Disease", 'strength': 0.65}, {'source': 'Astrocytic Lactate Shuttle Enh', 'relation': 'therapeutic_target', 'target': "Alzheimer's Disease", 'strength': 0.65}]
if edge_data:
    pd.DataFrame(edge_data).head(25)
else:
    print('No KG edge data available')

Selective vulnerability of entorhinal cortex layer II neurons in AD — Analysis Notebook

Selective vulnerability of entorhinal cortex layer II neurons in AD¶

Research question¶

Approach¶

Debate Summary¶

1. Forge tool provenance¶

2. Target gene annotations¶

3. GO Biological Process enrichment (Enrichr)¶

4. KEGG pathway enrichment¶

5. STRING protein interaction network¶

6. Reactome pathway footprint¶

7. Allen Brain Atlas ISH regional expression¶

8. Hypothesis ranking (7 hypotheses)¶

9. Score dimension heatmap (top 10)¶

10. PubMed evidence per hypothesis¶

Hypothesis 1: HCN1-Mediated Resonance Frequency Stabilization Therapy¶

Hypothesis 2: Perforant Path Presynaptic Terminal Protection Strategy¶

Hypothesis 3: Grid Cell-Specific Metabolic Reprogramming via IDH2 Enhancement¶

Hypothesis 4: Tau-Independent Microtubule Stabilization via MAP6 Enhancement¶

Hypothesis 5: Astrocytic Lactate Shuttle Enhancement for Grid Cell Bioenergetics¶

Hypothesis 6: Mitochondrial Calcium Buffering Enhancement via MCU Modulation¶

Hypothesis 7: Reelin-Mediated Cytoskeletal Stabilization Protocol¶

11. Knowledge graph edges (117 total)¶

12. Caveats¶