Synaptic pruning by microglia in early AD¶
Notebook ID: nb-sda-2026-04-01-gap-v2-691b42f1 · Analysis: sda-2026-04-01-gap-v2-691b42f1 · Generated: 2026-04-10
Research question¶
Synaptic pruning by microglia in early AD
Approach¶
This notebook is generated programmatically from real Forge tool calls and SciDEX debate data. Code cells load cached evidence bundles from data/forge_cache/seaad/*.json and query live data from scidex.db. Re-run python3 scripts/regenerate_notebooks.py --analysis sda-2026-04-01-gap-v2-691b42f1 --force to refresh.
7 hypotheses were generated and debated. The knowledge graph has 75 edges.
Debate Summary¶
Quality score: 0.53 · Rounds: 4 · Personas: Theorist, Skeptic, Domain_Expert, Synthesizer
1. Forge tool provenance¶
import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))
CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB
def load(name):
p = CACHE / f'{name}.json'
if p.exists():
return json.loads(p.read_text())
return {}
db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
db = sqlite3.connect(str(db_path))
prov = pd.read_sql_query('''
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
''', db)
db.close()
prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
print(f'{len(prov)} tool-call aggregates (last 30 days):')
prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
print(f'Provenance unavailable: {e}')
2. Target gene annotations¶
ann_rows = []
for g in []:
mg = load(f'mygene_{g}')
hpa = load(f'hpa_{g}')
if not mg and not hpa:
ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
'disease_involvement': '—'})
continue
ann_rows.append({
'gene': g,
'name': (mg.get('name') or '')[:55],
'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
if isinstance(hpa.get('protein_class'), list)
else str(hpa.get('protein_class') or '—')[:55],
'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
if isinstance(hpa.get('disease_involvement'), list)
else str(hpa.get('disease_involvement') or '')[:55],
})
pd.DataFrame(ann_rows)
3. GO Biological Process enrichment (Enrichr)¶
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
print('No GO:BP enrichment data')
# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
top = go_bp[:8]
terms = [t['term'][:45] for t in top][::-1]
neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
else:
print('No GO:BP data to plot')
4. KEGG pathway enrichment¶
kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
kegg_df
else:
print('No KEGG enrichment data')
5. STRING protein interaction network¶
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
print(f'{len(ppi_df)} STRING edges')
ppi_df[display_cols].head(20)
else:
print('No STRING edges returned')
# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
import math
nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
n = len(nodes)
pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
fig, ax = plt.subplots(figsize=(7, 7))
for e in ppi:
x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
linewidth=0.5+2*e['score'])
for name,(x,y) in pos.items():
ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
ax.set_aspect('equal'); ax.axis('off')
ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
plt.tight_layout(); plt.show()
else:
print('No STRING data to visualize')
6. Reactome pathway footprint¶
pw_rows = []
for g in []:
pws = load(f'reactome_{g}')
if isinstance(pws, list):
pw_rows.append({'gene': g, 'n_pathways': len(pws),
'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
else:
pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
7. Allen Brain Atlas ISH regional expression¶
ish_rows = []
for g in []:
ish = load(f'allen_ish_{g}')
regions = ish.get('regions') or [] if isinstance(ish, dict) else []
ish_rows.append({
'gene': g,
'n_ish_regions': len(regions),
'top_region': (regions[0].get('structure','') if regions else '—')[:45],
'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
})
pd.DataFrame(ish_rows)
8. Hypothesis ranking (7 hypotheses)¶
hyp_data = [('Metabolic Reprogramming via Microglial Glycolysis Inhib', 0.584), ('TREM2 Conformational Stabilizers for Synaptic Discrimin', 0.576), ('Purinergic P2Y12 Inverse Agonist Therapy', 0.57), ('Complement C1q Mimetic Decoy Therapy', 0.561), ('Optogenetic Microglial Deactivation via Engineered Inhi', 0.558), ('Synaptic Phosphatidylserine Masking via Annexin A1 Mime', 0.556), ('Fractalkine Axis Amplification via CX3CR1 Positive Allo', 0.553)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Synaptic pruning by microglia in early AD')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
9. Score dimension heatmap (top 10)¶
labels = ['Metabolic Reprogramming via Microglial G', 'TREM2 Conformational Stabilizers for Syn', 'Purinergic P2Y12 Inverse Agonist Therapy', 'Complement C1q Mimetic Decoy Therapy', 'Optogenetic Microglial Deactivation via ', 'Synaptic Phosphatidylserine Masking via ', 'Fractalkine Axis Amplification via CX3CR']
matrix = np.array([[0.6, 0.45, 0.4, 0.35, 0.535, 0.5, 0.4, 0.5, 0.3], [0.9, 0.25, 0.7, 0.4, 0.585, 0.55, 0.4, 0.3, 0.45], [0.8, 0.7, 0.72, 0.75, 0.623, 0.6, 0.58, 0.85, 0.55], [0.82, 0.62, 0.78, 0.75, 0.623, 0.72, 0.58, 0.58, 0.65], [0.95, 0.15, 0.65, 0.5, 0.533, 0.35, 0.3, 0.2, 0.25], [0.75, 0.5, 0.6, 0.55, 0.518, 0.45, 0.5, 0.55, 0.5], [0.8, 0.5, 0.7, 0.65, 0.13, 0.6, 0.55, 0.75, 0.55]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
fig, ax = plt.subplots(figsize=(10, 5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(dims)))
ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
ax.set_title('Score dimensions — top hypotheses')
plt.colorbar(im, ax=ax, shrink=0.8)
plt.tight_layout(); plt.show()
else:
print('No score data available')
10. PubMed evidence per hypothesis¶
Hypothesis 1: Metabolic Reprogramming via Microglial Glycolysis Inhibition¶
Target genes: HK2 · Composite score: 0.584
Molecular Mechanism and Rationale¶
The therapeutic strategy of metabolic reprogramming through microglial glycolysis inhibition represents a novel approach to neurodegeneration that exploits the fundamental metabolic differences between inflammatory M1 and anti-inflammatory M2 microglial phenotypes. At the molecular level, this intervention targets hexokinase 2 (HK2), the rate-limiting enzyme in glycolysis that catalyzes the phosphorylation of glucose to glucose-6-phosphate. HK2 is partic
hid = 'h-38292315'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 2: TREM2 Conformational Stabilizers for Synaptic Discrimination¶
Target genes: TREM2 · Composite score: 0.576
Molecular Mechanism and Rationale
TREM2 (Triggering Receptor Expressed on Myeloid cells 2) serves as a critical immunoreceptor on microglia that orchestrates the balance between neuroprotection and neurodegeneration through its sophisticated recognition and signaling mechanisms. The receptor exists in multiple conformational states that dictate its binding specificity and downstream signaling cascades. In healthy brain tissue, TREM2 recognizes phosphatidylserine (PS) exposure on apoptotic n
hid = 'h-044ee057'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 3: Purinergic P2Y12 Inverse Agonist Therapy¶
Target genes: P2RY12 · Composite score: 0.57
Molecular Mechanism and Rationale
The P2Y12 receptor, encoded by the P2RY12 gene, represents a critical component of microglial surveillance and activation machinery in the central nervous system. This Gi/Go-coupled purinergic receptor responds to extracellular adenosine diphosphate (ADP) and adenosine triphosphate (ATP) released from neurons and other glial cells. Under physiological conditions, P2Y12 receptors maintain microglial processes in a dynamic, highly motile state that enables co
hid = 'h-f99ce4ca'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 4: Complement C1q Mimetic Decoy Therapy¶
Target genes: C1QA · Composite score: 0.561
Molecular Mechanism and Rationale
The complement component 1q (C1q) represents a critical molecular bridge between innate immunity and synaptic plasticity in the central nervous system. C1q is a hexameric glycoprotein composed of three distinct polypeptide chains (C1qA, C1qB, and C1qC) that forms the recognition component of the classical complement pathway. Under physiological conditions, C1q is constitutively expressed by microglia and plays essential roles in developmental synaptic pruni
hid = 'h-1fe4ba9b'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 5: Optogenetic Microglial Deactivation via Engineered Inhibitory Opsins¶
Target genes: CX3CR1 · Composite score: 0.558
Molecular Mechanism and Rationale
The optogenetic microglial deactivation strategy exploits the selective expression of inhibitory opsins in microglia through CX3CR1-targeted delivery systems to achieve precise temporal and spatial control over microglial activation states. CX3CR1, the fractalkine receptor exclusively expressed on microglia within the central nervous system, serves as an ideal molecular target for cell-type-specific interventions. The fractalkine signaling axis (CX3CL1-CX3C
hid = 'h-782b40b1'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 6: Synaptic Phosphatidylserine Masking via Annexin A1 Mimetics¶
Target genes: ANXA1 · Composite score: 0.556
Molecular Mechanism and Rationale
The fundamental mechanism underlying this therapeutic approach centers on the precise molecular orchestration of synaptic maintenance through phosphatidylserine (PS) exposure regulation. Under normal physiological conditions, PS is actively maintained on the inner leaflet of the plasma membrane through the action of ATP-dependent aminophospholipid translocases, particularly ATP11C and CDC50A. However, during synaptic stress—whether induced by oxidative dama
hid = 'h-513a633f'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 7: Fractalkine Axis Amplification via CX3CR1 Positive Allosteric Modulato¶
Target genes: CX3CR1 · Composite score: 0.553
Molecular Mechanism and Rationale
The fractalkine/CX3CR1 signaling axis represents a critical communication pathway between neurons and microglia that maintains homeostatic brain function through precise regulation of microglial activity states. Fractalkine (CX3CL1) is a unique chemokine that exists in both membrane-bound and soluble forms, with the membrane-bound form serving as the primary ligand for the CX3CR1 receptor exclusively expressed on microglia in the central nervous system. Und
hid = 'h-ba3a948a'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
11. Knowledge graph edges (75 total)¶
edge_data = [{'source': 'P2RY12', 'relation': 'regulates', 'target': 'microglial_process_motility', 'strength': 0.8}, {'source': 'CX3CR1', 'relation': 'mediates', 'target': 'fractalkine_signaling', 'strength': 0.8}, {'source': 'fractalkine_signaling', 'relation': 'maintains', 'target': 'microglial_surveillance', 'strength': 0.8}, {'source': 'HK2', 'relation': 'rate_limits', 'target': 'glycolysis', 'strength': 0.8}, {'source': 'glycolysis', 'relation': 'fuels', 'target': 'microglial_activation', 'strength': 0.8}, {'source': 'TREM2', 'relation': 'regulates', 'target': 'microglial_phagocytosis', 'strength': 0.8}, {'source': 'ANXA1', 'relation': 'mediates', 'target': 'phosphatidylserine_masking', 'strength': 0.8}, {'source': 'phosphatidylserine_masking', 'relation': 'suppresses', 'target': 'eat_me_signals', 'strength': 0.8}, {'source': 'h-1fe4ba9b', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.68}, {'source': 'h-f99ce4ca', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.65}, {'source': 'CX3CR1', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.6}, {'source': 'h-ba3a948a', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.6}, {'source': 'SYNAPTIC PRUNING', 'relation': 'contributes_to', 'target': 'COGNITIVE DECLINE', 'strength': 0.6}, {'source': 'ANXA1', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.54}, {'source': 'TREM2', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.53}, {'source': 'h-044ee057', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'CX3CR1', 'relation': 'participates_in', 'target': 'Fractalkine receptor / microgl', 'strength': 0.46}, {'source': 'h-513a633f', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.45}, {'source': 'ANXA1', 'relation': 'participates_in', 'target': 'Synaptic function / plasticity', 'strength': 0.42}, {'source': 'HK2', 'relation': 'co_discussed', 'target': 'TREM2', 'strength': 0.4}, {'source': 'HK2', 'relation': 'co_discussed', 'target': 'P2RY12', 'strength': 0.4}, {'source': 'HK2', 'relation': 'co_discussed', 'target': 'C1Q', 'strength': 0.4}, {'source': 'HK2', 'relation': 'co_discussed', 'target': 'C1QA', 'strength': 0.4}, {'source': 'HK2', 'relation': 'co_discussed', 'target': 'CX3CR1', 'strength': 0.4}, {'source': 'TREM2', 'relation': 'co_discussed', 'target': 'P2RY12', 'strength': 0.4}]
if edge_data:
pd.DataFrame(edge_data).head(25)
else:
print('No KG edge data available')
12. Caveats¶
This notebook uses real Forge tool calls cached from live APIs, but:
- Enrichment is against curated gene-set libraries, not genome-wide screens
- STRING/Reactome/HPA/MyGene reflect curated knowledge
- PubMed literature is search-relevance ranked, not systematic review
The cached evidence bundle is the minimum viable real-data analysis for this topic.