Perivascular spaces and glymphatic clearance failure in AD¶
Notebook ID: nb-sda-2026-04-01-gap-v2-ee5a5023 · Analysis: sda-2026-04-01-gap-v2-ee5a5023 · Generated: 2026-04-10
Research question¶
Perivascular spaces and glymphatic clearance failure in AD
Approach¶
This notebook is generated programmatically from real Forge tool calls and SciDEX debate data. Code cells load cached evidence bundles from data/forge_cache/seaad/*.json and query live data from scidex.db. Re-run python3 scripts/regenerate_notebooks.py --analysis sda-2026-04-01-gap-v2-ee5a5023 --force to refresh.
7 hypotheses were generated and debated. The knowledge graph has 142 edges.
Debate Summary¶
Quality score: 0.67 · Rounds: 4 · Personas: Theorist, Skeptic, Domain_Expert, Synthesizer
1. Forge tool provenance¶
import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))
CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB
def load(name):
p = CACHE / f'{name}.json'
if p.exists():
return json.loads(p.read_text())
return {}
db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
db = sqlite3.connect(str(db_path))
prov = pd.read_sql_query('''
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
''', db)
db.close()
prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
print(f'{len(prov)} tool-call aggregates (last 30 days):')
prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
print(f'Provenance unavailable: {e}')
77 tool-call aggregates (last 30 days):
2. Target gene annotations¶
ann_rows = []
for g in ['PDGFRB']:
mg = load(f'mygene_{g}')
hpa = load(f'hpa_{g}')
if not mg and not hpa:
ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
'disease_involvement': '—'})
continue
ann_rows.append({
'gene': g,
'name': (mg.get('name') or '')[:55],
'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
if isinstance(hpa.get('protein_class'), list)
else str(hpa.get('protein_class') or '—')[:55],
'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
if isinstance(hpa.get('disease_involvement'), list)
else str(hpa.get('disease_involvement') or '')[:55],
})
pd.DataFrame(ann_rows)
| gene | name | protein_class | disease_involvement | |
|---|---|---|---|---|
| 0 | PDGFRB | platelet derived growth factor receptor beta | Cancer-related genes, CD markers | Cancer-related genes, Disease variant |
3. GO Biological Process enrichment (Enrichr)¶
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
print('No GO:BP enrichment data')
# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
top = go_bp[:8]
terms = [t['term'][:45] for t in top][::-1]
neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
else:
print('No GO:BP data to plot')
4. KEGG pathway enrichment¶
kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
kegg_df
else:
print('No KEGG enrichment data')
No KEGG enrichment data
5. STRING protein interaction network¶
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
print(f'{len(ppi_df)} STRING edges')
ppi_df[display_cols].head(20)
else:
print('No STRING edges returned')
11 STRING edges
# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
import math
nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
n = len(nodes)
pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
fig, ax = plt.subplots(figsize=(7, 7))
for e in ppi:
x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
linewidth=0.5+2*e['score'])
for name,(x,y) in pos.items():
ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
ax.set_aspect('equal'); ax.axis('off')
ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
plt.tight_layout(); plt.show()
else:
print('No STRING data to visualize')
6. Reactome pathway footprint¶
pw_rows = []
for g in ['PDGFRB']:
pws = load(f'reactome_{g}')
if isinstance(pws, list):
pw_rows.append({'gene': g, 'n_pathways': len(pws),
'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
else:
pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
| gene | n_pathways | top_pathway | |
|---|---|---|---|
| 0 | PDGFRB | 6 | PIP3 activates AKT signaling |
7. Allen Brain Atlas ISH regional expression¶
ish_rows = []
for g in ['PDGFRB']:
ish = load(f'allen_ish_{g}')
regions = ish.get('regions') or [] if isinstance(ish, dict) else []
ish_rows.append({
'gene': g,
'n_ish_regions': len(regions),
'top_region': (regions[0].get('structure','') if regions else '—')[:45],
'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
})
pd.DataFrame(ish_rows)
| gene | n_ish_regions | top_region | top_energy | |
|---|---|---|---|---|
| 0 | PDGFRB | 0 | — | — |
8. Hypothesis ranking (7 hypotheses)¶
hyp_data = [('Circadian Glymphatic Entrainment via Targeted Orexin Re', 0.653), ('Astroglial Gap Junction Coordination via Connexin-43 Ph', 0.636), ('Endothelial Glycocalyx Regeneration via Syndecan-1 Upre', 0.627), ('Pericyte Contractility Reset via Selective PDGFR-β Agon', 0.614), ('Osmotic Gradient Restoration via Selective AQP1 Enhance', 0.607), ('Aquaporin-4 Polarization Enhancement via TREK-1 Channel', 0.606), ('Matrix Stiffness Normalization via Targeted Lysyl Oxida', 0.604)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Perivascular spaces and glymphatic clearance failure in AD')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
9. Score dimension heatmap (top 10)¶
labels = ['Circadian Glymphatic Entrainment via Tar', 'Astroglial Gap Junction Coordination via', 'Endothelial Glycocalyx Regeneration via ', 'Pericyte Contractility Reset via Selecti', 'Osmotic Gradient Restoration via Selecti', 'Aquaporin-4 Polarization Enhancement via', 'Matrix Stiffness Normalization via Targe']
matrix = np.array([[0.75, 0.9, 0.8, 0.85, 0.34, 0.85, 0.8, 0.95, 0.7], [0.68, 0.58, 0.7, 0.75, 0.436, 0.78, 0.68, 0.55, 0.65], [0.9, 0.5, 0.75, 0.75, 0.623, 0.65, 0.7, 0.45, 0.6], [0.9, 0.3, 0.6, 0.5, 0.525, 0.55, 0.5, 0.25, 0.35], [0.7, 0.25, 0.6, 0.55, 0.518, 0.5, 0.55, 0.2, 0.45], [0.85, 0.45, 0.5, 0.35, 0.436, 0.4, 0.35, 0.5, 0.6], [0.8, 0.75, 0.7, 0.7, 0.13, 0.7, 0.75, 0.8, 0.5]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
fig, ax = plt.subplots(figsize=(10, 5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(dims)))
ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
ax.set_title('Score dimensions — top hypotheses')
plt.colorbar(im, ax=ax, shrink=0.8)
plt.tight_layout(); plt.show()
else:
print('No score data available')
10. PubMed evidence per hypothesis¶
Hypothesis 1: Circadian Glymphatic Entrainment via Targeted Orexin Receptor Modulati¶
Target genes: HCRTR1/HCRTR2 · Composite score: 0.653
Overview
This therapeutic hypothesis proposes leveraging orexin (hypocretin) receptor modulation to enhance glymphatic system function through strengthening circadian rhythms in Alzheimer's disease. The glymphatic system—a brain-wide cerebrospinal fluid (CSF) clearance pathway most active during sleep—shows dysfunction in AD, leading to impaired clearance of toxic protein aggregates including Aβ and tau. By targeting orexin receptors (OX1R and OX2R), this approach aims to restore circadian-
hid = 'h-9e9fee95'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 2: Astroglial Gap Junction Coordination via Connexin-43 Phosphorylation M¶
Target genes: GJA1 · Composite score: 0.636
Molecular Mechanism and Rationale¶
The connexin-43 (Cx43) protein, encoded by the GJA1 gene, forms the structural basis of gap junctions between astrocytes in the central nervous system, creating a highly interconnected glial network essential for brain homeostasis and waste clearance. The molecular mechanism underlying this therapeutic hypothesis centers on the phosphorylation-dependent regulation of Cx43 gap junction permeability and the consequent coordination of calcium signaling that
hid = 'h-3a901ec3'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 3: Endothelial Glycocalyx Regeneration via Syndecan-1 Upregulation¶
Target genes: SDC1 · Composite score: 0.627
Molecular Mechanism and Rationale
The endothelial glycocalyx represents a critical interface between the vascular endothelium and the central nervous system's fluid dynamics, particularly in the context of glymphatic system function and cerebrospinal fluid (CSF) flow. Syndecan-1 (SDC1), a transmembrane heparan sulfate proteoglycan, serves as a primary structural component of this glycocalyx layer, anchoring a complex network of glycosaminoglycans, proteoglycans, and plasma proteins that cre
hid = 'h-fb56c8a0'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 4: Pericyte Contractility Reset via Selective PDGFR-β Agonism¶
Target genes: PDGFRB · Composite score: 0.614
Molecular Mechanism and Rationale¶
Pericytes are contractile cells that wrap around capillaries and play a crucial role in maintaining blood-brain barrier (BBB) integrity, regulating cerebral blood flow, and facilitating interstitial fluid drainage through the glymphatic system. In neurodegenerative diseases, pericyte dysfunction manifests as loss of contractile tone, altered perivascular space dimensions, and compromised vascular integrity. The platelet-derived growth factor receptor-β (
hid = 'h-73e4340b'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 5: Osmotic Gradient Restoration via Selective AQP1 Enhancement in Choroid¶
Target genes: AQP1 · Composite score: 0.607
Molecular Mechanism and Rationale
Aquaporin-1 (AQP1) represents a critical water channel protein predominantly expressed in the apical membrane of choroid plexus epithelial cells, where it facilitates the bulk water transport necessary for cerebrospinal fluid (CSF) production. The molecular mechanism underlying AQP1-mediated CSF formation involves the coordinated function of multiple transport proteins and ion channels within choroid plexus epithelial cells. AQP1 works in concert with the N
hid = 'h-0dea0ed5'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 6: Aquaporin-4 Polarization Enhancement via TREK-1 Channel Modulation¶
Target genes: KCNK2 · Composite score: 0.606
Molecular Mechanism and Rationale
The molecular foundation of this therapeutic hypothesis centers on the intricate relationship between TREK-1 potassium channels (encoded by KCNK2) and aquaporin-4 (AQP4) water channel polarization in astrocytic endfeet. TREK-1 channels are mechanosensitive, two-pore domain potassium channels that play crucial roles in maintaining astrocyte membrane potential and cellular homeostasis. Under physiological conditions, these channels facilitate potassium efflux
hid = 'h-9eae33ba'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 7: Matrix Stiffness Normalization via Targeted Lysyl Oxidase Inhibition¶
Target genes: LOX/LOXL1-4 · Composite score: 0.604
Molecular Mechanism and Rationale¶
The lysyl oxidase (LOX) family comprises six enzymes—LOX, LOXL1, LOXL2, LOXL3, and LOXL4—that catalyze the oxidative deamination of lysine and hydroxylysine residues in collagen and elastin, generating aldehydes (allysine and hydroxyallysine) that spontaneously condense to form covalent cross-links. These cross-links, including aldol condensation products, pyridinium compounds (pyridinoline and pyrrole), and advanced pyridoxine and pyrrole cross-links, a
hid = 'h-82922df8'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
11. Knowledge graph edges (142 total)¶
edge_data = [{'source': 'h-9e9fee95', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.8}, {'source': 'HCRTR1', 'relation': 'encodes', 'target': 'orexin_receptor_1', 'strength': 0.8}, {'source': 'orexin_receptor_1', 'relation': 'regulates', 'target': 'sleep_wake_regulation', 'strength': 0.8}, {'source': 'sleep_wake_regulation', 'relation': 'controls', 'target': 'glymphatic_clearance', 'strength': 0.8}, {'source': 'SDC1', 'relation': 'encodes', 'target': 'syndecan_1', 'strength': 0.8}, {'source': 'syndecan_1', 'relation': 'maintains', 'target': 'endothelial_glycocalyx', 'strength': 0.8}, {'source': 'endothelial_glycocalyx', 'relation': 'facilitates', 'target': 'paravascular_flow', 'strength': 0.8}, {'source': 'LOX', 'relation': 'encodes', 'target': 'lysyl_oxidase', 'strength': 0.8}, {'source': 'lysyl_oxidase', 'relation': 'catalyzes', 'target': 'collagen_crosslinking', 'strength': 0.8}, {'source': 'collagen_crosslinking', 'relation': 'increases', 'target': 'tissue_stiffness', 'strength': 0.8}, {'source': 'tissue_stiffness', 'relation': 'causes', 'target': 'glymphatic_dysfunction', 'strength': 0.8}, {'source': 'GJA1', 'relation': 'encodes', 'target': 'connexin_43', 'strength': 0.8}, {'source': 'connexin_43', 'relation': 'mediates', 'target': 'astrocyte_coupling', 'strength': 0.8}, {'source': 'astrocyte_coupling', 'relation': 'enables', 'target': 'calcium_wave_coordination', 'strength': 0.8}, {'source': 'calcium_wave_coordination', 'relation': 'drives', 'target': 'perivascular_pumping', 'strength': 0.8}, {'source': 'h-3a901ec3', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.72}, {'source': 'h-fb56c8a0', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.7}, {'source': 'HCRTR1', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.7}, {'source': 'HCRTR1', 'relation': 'interacts_with', 'target': 'HCRTR2', 'strength': 0.7}, {'source': 'HCRTR2', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.7}, {'source': 'HCRTR2', 'relation': 'interacts_with', 'target': 'HCRTR1', 'strength': 0.7}, {'source': 'h-82922df8', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.65}, {'source': 'LOX', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.64}, {'source': 'LOX', 'relation': 'interacts_with', 'target': 'LOXL1-4', 'strength': 0.64}, {'source': 'LOXL1-4', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.64}]
if edge_data:
pd.DataFrame(edge_data).head(25)
else:
print('No KG edge data available')
12. Caveats¶
This notebook uses real Forge tool calls cached from live APIs, but:
- Enrichment is against curated gene-set libraries, not genome-wide screens
- STRING/Reactome/HPA/MyGene reflect curated knowledge
- PubMed literature is search-relevance ranked, not systematic review
The cached evidence bundle is the minimum viable real-data analysis for this topic.