Mechanistic role of APOE in neurodegeneration¶
Notebook ID: nb-sda-2026-04-01-gap-auto-fd6b1635d9 · Analysis: sda-2026-04-01-gap-auto-fd6b1635d9 · Generated: 2026-04-10
Research question¶
Mechanistic role of APOE in neurodegeneration?
Approach¶
This notebook is generated programmatically from real Forge tool calls and SciDEX debate data. Code cells load cached evidence bundles from data/forge_cache/seaad/*.json and query live data from scidex.db. Re-run python3 scripts/regenerate_notebooks.py --analysis sda-2026-04-01-gap-auto-fd6b1635d9 --force to refresh.
6 hypotheses were generated and debated. The knowledge graph has 35 edges.
Debate Summary¶
Quality score: 0.54 · Rounds: 4 · Personas: Theorist, Skeptic, Domain_Expert, Synthesizer
1. Forge tool provenance¶
import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))
CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB
def load(name):
p = CACHE / f'{name}.json'
if p.exists():
return json.loads(p.read_text())
return {}
db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
db = sqlite3.connect(str(db_path))
prov = pd.read_sql_query('''
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
''', db)
db.close()
prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
print(f'{len(prov)} tool-call aggregates (last 30 days):')
prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
print(f'Provenance unavailable: {e}')
77 tool-call aggregates (last 30 days):
2. Target gene annotations¶
ann_rows = []
for g in ['APOE', 'MTOR']:
mg = load(f'mygene_{g}')
hpa = load(f'hpa_{g}')
if not mg and not hpa:
ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
'disease_involvement': '—'})
continue
ann_rows.append({
'gene': g,
'name': (mg.get('name') or '')[:55],
'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
if isinstance(hpa.get('protein_class'), list)
else str(hpa.get('protein_class') or '—')[:55],
'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
if isinstance(hpa.get('disease_involvement'), list)
else str(hpa.get('disease_involvement') or '')[:55],
})
pd.DataFrame(ann_rows)
| gene | name | protein_class | disease_involvement | |
|---|---|---|---|---|
| 0 | APOE | apolipoprotein E | Cancer-related genes, Candidate cardiovascular... | Alzheimer disease, Amyloidosis |
| 1 | MTOR | — | — | — |
3. GO Biological Process enrichment (Enrichr)¶
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
print('No GO:BP enrichment data')
# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
top = go_bp[:8]
terms = [t['term'][:45] for t in top][::-1]
neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
else:
print('No GO:BP data to plot')
4. KEGG pathway enrichment¶
kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
kegg_df
else:
print('No KEGG enrichment data')
No KEGG enrichment data
5. STRING protein interaction network¶
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
print(f'{len(ppi_df)} STRING edges')
ppi_df[display_cols].head(20)
else:
print('No STRING edges returned')
11 STRING edges
# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
import math
nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
n = len(nodes)
pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
fig, ax = plt.subplots(figsize=(7, 7))
for e in ppi:
x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
linewidth=0.5+2*e['score'])
for name,(x,y) in pos.items():
ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
ax.set_aspect('equal'); ax.axis('off')
ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
plt.tight_layout(); plt.show()
else:
print('No STRING data to visualize')
6. Reactome pathway footprint¶
pw_rows = []
for g in ['APOE', 'MTOR']:
pws = load(f'reactome_{g}')
if isinstance(pws, list):
pw_rows.append({'gene': g, 'n_pathways': len(pws),
'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
else:
pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
| gene | n_pathways | top_pathway | |
|---|---|---|---|
| 0 | APOE | 8 | Nuclear signaling by ERBB4 |
| 1 | MTOR | 0 | — |
7. Allen Brain Atlas ISH regional expression¶
ish_rows = []
for g in ['APOE', 'MTOR']:
ish = load(f'allen_ish_{g}')
regions = ish.get('regions') or [] if isinstance(ish, dict) else []
ish_rows.append({
'gene': g,
'n_ish_regions': len(regions),
'top_region': (regions[0].get('structure','') if regions else '—')[:45],
'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
})
pd.DataFrame(ish_rows)
| gene | n_ish_regions | top_region | top_energy | |
|---|---|---|---|---|
| 0 | APOE | 0 | — | — |
| 1 | MTOR | 0 | — | — |
8. Hypothesis ranking (6 hypotheses)¶
hyp_data = [('APOE-Dependent Autophagy Restoration', 0.594), ('APOE4-Selective Lipid Nanoemulsion Therapy', 0.585), ('APOE-TREM2 Interaction Modulation', 0.583), ('APOE Isoform Conversion Therapy', 0.582), ('Proteostasis Enhancement via APOE Chaperone Targeting', 0.559), ('APOE-Mediated Synaptic Lipid Raft Stabilization', 0.491)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Mechanistic role of APOE in neurodegeneration')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
9. Score dimension heatmap (top 10)¶
labels = ['APOE-Dependent Autophagy Restoration', 'APOE4-Selective Lipid Nanoemulsion Thera', 'APOE-TREM2 Interaction Modulation', 'APOE Isoform Conversion Therapy', 'Proteostasis Enhancement via APOE Chaper', 'APOE-Mediated Synaptic Lipid Raft Stabil']
matrix = np.array([[0.6, 0.9, 0.8, 0.85, 0.09, 0.85, 0.8, 0.95, 0.7], [0.9, 0.3, 0.75, 0.7, 0.257, 0.55, 0.45, 0.35, 0.5], [0.85, 0.45, 0.85, 0.85, 0.247, 0.75, 0.7, 0.4, 0.6], [0.95, 0.15, 0.85, 0.75, 0.13, 0.4, 0.35, 0.2, 0.3], [0.7, 0.85, 0.75, 0.75, 0.13, 0.7, 0.75, 0.9, 0.65], [0.75, 0.5, 0.65, 0.6, 0.385, 0.45, 0.4, 0.6, 0.45]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
fig, ax = plt.subplots(figsize=(10, 5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(dims)))
ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
ax.set_title('Score dimensions — top hypotheses')
plt.colorbar(im, ax=ax, shrink=0.8)
plt.tight_layout(); plt.show()
else:
print('No score data available')
10. PubMed evidence per hypothesis¶
Hypothesis 1: APOE-Dependent Autophagy Restoration¶
Target genes: MTOR · Composite score: 0.594
APOE-Dependent Autophagy Restoration proposes targeting the mechanistic link between apolipoprotein E4 (APOE4) genotype and impaired macroautophagy as a precision therapeutic strategy for Alzheimer's disease. APOE4, carried by ~25% of the population and present in ~65% of AD patients, disrupts autophagosome biogenesis, lysosomal acidification, and autophagic flux through multiple converging mechanisms. Restoring autophagy specifically in APOE4 carriers represents an isoform-targeted approach tha
hid = 'h-51e7234f'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 2: APOE4-Selective Lipid Nanoemulsion Therapy¶
Target genes: APOE · Composite score: 0.585
Background and Rationale
Apolipoprotein E (APOE) represents one of the most significant genetic risk factors for Alzheimer's disease, with the APOE4 allele conferring a 3-fold increased risk in heterozygotes and up to 15-fold in homozygotes compared to the protective APOE2 and neutral APOE3 variants. The APOE protein functions as a critical lipid transport molecule in the central nervous system, facilitating cholesterol and phospholipid redistribution between neurons, astrocytes, and microg
hid = 'h-c9c79e3e'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 3: APOE-TREM2 Interaction Modulation¶
Target genes: TREM2 · Composite score: 0.583
The interaction between APOE and TREM2 on microglia determines neuroinflammatory responses in neurodegeneration. Developing small molecules that enhance APOE-TREM2 binding could promote protective microglial activation states while suppressing harmful inflammatory cascades through improved lipid sensing and phagocytic activity.
Molecular Basis of the APOE-TREM2 Interaction¶
TREM2 (Triggering Receptor Expressed on Myeloid Cells 2) is a transmembrane receptor expressed on microglia that functi
hid = 'h-180807e5'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 4: APOE Isoform Conversion Therapy¶
Target genes: APOE · Composite score: 0.582
APOE Isoform Conversion Therapy proposes the direct in vivo conversion of the pathogenic APOE4 allele to the protective APOE3 or APOE2 sequence using base editing or prime editing CRISPR technologies. This approach addresses the root genetic cause of APOE4-associated Alzheimer's disease risk — the single nucleotide polymorphism encoding Arg112 (vs. Cys112 in APOE3) — rather than treating downstream consequences of the APOE4 protein's dysfunctional structure.
**Genetic Basis of APOE4 Pathogenici
hid = 'h-15336069'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 5: Proteostasis Enhancement via APOE Chaperone Targeting¶
Target genes: HSPA1A · Composite score: 0.559
Background and Rationale
The apolipoprotein E epsilon 4 allele (APOE4) represents the strongest genetic risk factor for late-onset Alzheimer's disease, increasing risk 3-fold in heterozygotes and 8-15-fold in homozygotes. While traditional research has focused on APOE4's effects on amyloid-β clearance and lipid transport, emerging evidence suggests that the structural instability of APOE4 itself creates a fundamental proteostasis crisis that drives neurodegeneration through multiple converg
hid = 'h-5d943bfc'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 6: APOE-Mediated Synaptic Lipid Raft Stabilization¶
Target genes: SPTLC1 · Composite score: 0.491
Background and Rationale
Apolipoprotein E (APOE) genotype represents the strongest genetic risk factor for late-onset Alzheimer's disease, with the APOE4 allele conferring a 3-15 fold increased risk compared to the more common APOE3 variant. While extensive research has focused on APOE's role in amyloid-β clearance and tau pathology, emerging evidence suggests that APOE4's pathogenic effects extend to fundamental alterations in synaptic membrane composition and function. Lipid rafts, specia
hid = 'h-58e655ee'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
11. Knowledge graph edges (35 total)¶
edge_data = [{'source': 'SPTLC1', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.57}, {'source': 'h-51e7234f', 'relation': 'targets', 'target': 'MTOR', 'strength': 0.5}, {'source': 'h-180807e5', 'relation': 'targets', 'target': 'TREM2', 'strength': 0.5}, {'source': 'h-c9c79e3e', 'relation': 'targets', 'target': 'APOE', 'strength': 0.5}, {'source': 'h-58e655ee', 'relation': 'targets', 'target': 'SPTLC1', 'strength': 0.5}, {'source': 'h-15336069', 'relation': 'targets', 'target': 'APOE', 'strength': 0.5}, {'source': 'TREM2', 'relation': 'co_discussed', 'target': 'APOE', 'strength': 0.4}, {'source': 'ULK1', 'relation': 'co_discussed', 'target': 'APOE', 'strength': 0.4}, {'source': 'TREM2', 'relation': 'co_discussed', 'target': 'HSPA1A', 'strength': 0.4}, {'source': 'HSPA1A', 'relation': 'co_discussed', 'target': 'ULK1', 'strength': 0.4}, {'source': 'TFEB', 'relation': 'co_discussed', 'target': 'APOE', 'strength': 0.4}, {'source': 'TFEB', 'relation': 'co_discussed', 'target': 'HSPA1A', 'strength': 0.4}, {'source': 'HSPA1A', 'relation': 'co_discussed', 'target': 'TREM2', 'strength': 0.4}, {'source': 'TREM2', 'relation': 'co_discussed', 'target': 'TFEB', 'strength': 0.4}, {'source': 'ULK1', 'relation': 'co_discussed', 'target': 'TFEB', 'strength': 0.4}, {'source': 'SPTLC1', 'relation': 'co_discussed', 'target': 'TREM2', 'strength': 0.4}, {'source': 'SPTLC1', 'relation': 'co_discussed', 'target': 'ULK1', 'strength': 0.4}, {'source': 'SPTLC1', 'relation': 'co_discussed', 'target': 'MTOR', 'strength': 0.4}, {'source': 'SPTLC1', 'relation': 'co_discussed', 'target': 'TFEB', 'strength': 0.4}, {'source': 'SPTLC1', 'relation': 'co_discussed', 'target': 'HSPA1A', 'strength': 0.4}, {'source': 'SPTLC1', 'relation': 'co_discussed', 'target': 'APOE', 'strength': 0.4}, {'source': 'TREM2', 'relation': 'co_discussed', 'target': 'MTOR', 'strength': 0.4}, {'source': 'ULK1', 'relation': 'co_discussed', 'target': 'MTOR', 'strength': 0.4}, {'source': 'ULK1', 'relation': 'co_discussed', 'target': 'HSPA1A', 'strength': 0.4}, {'source': 'MTOR', 'relation': 'co_discussed', 'target': 'HSPA1A', 'strength': 0.4}]
if edge_data:
pd.DataFrame(edge_data).head(25)
else:
print('No KG edge data available')
12. Caveats¶
This notebook uses real Forge tool calls cached from live APIs, but:
- Enrichment is against curated gene-set libraries, not genome-wide screens
- STRING/Reactome/HPA/MyGene reflect curated knowledge
- PubMed literature is search-relevance ranked, not systematic review
The cached evidence bundle is the minimum viable real-data analysis for this topic.