Sleep disruption as cause and consequence of neurodegeneration¶
Notebook ID: nb-sda-2026-04-01-gap-v2-18cf98ca · Analysis: sda-2026-04-01-gap-v2-18cf98ca · Generated: 2026-04-10
Research question¶
Sleep disruption as cause and consequence of neurodegeneration
Approach¶
This notebook is generated programmatically from real Forge tool calls and SciDEX debate data. Code cells load cached evidence bundles from data/forge_cache/seaad/*.json and query live data from scidex.db. Re-run python3 scripts/regenerate_notebooks.py --analysis sda-2026-04-01-gap-v2-18cf98ca --force to refresh.
7 hypotheses were generated and debated. The knowledge graph has 191 edges.
Debate Summary¶
Quality score: 0.59 · Rounds: 4 · Personas: Theorist, Skeptic, Domain_Expert, Synthesizer
1. Forge tool provenance¶
import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))
CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB
def load(name):
p = CACHE / f'{name}.json'
if p.exists():
return json.loads(p.read_text())
return {}
db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
db = sqlite3.connect(str(db_path))
prov = pd.read_sql_query('''
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
''', db)
db.close()
prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
print(f'{len(prov)} tool-call aggregates (last 30 days):')
prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
print(f'Provenance unavailable: {e}')
77 tool-call aggregates (last 30 days):
2. Target gene annotations¶
ann_rows = []
for g in ['CLOCK', 'HCRT']:
mg = load(f'mygene_{g}')
hpa = load(f'hpa_{g}')
if not mg and not hpa:
ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
'disease_involvement': '—'})
continue
ann_rows.append({
'gene': g,
'name': (mg.get('name') or '')[:55],
'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
if isinstance(hpa.get('protein_class'), list)
else str(hpa.get('protein_class') or '—')[:55],
'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
if isinstance(hpa.get('disease_involvement'), list)
else str(hpa.get('disease_involvement') or '')[:55],
})
pd.DataFrame(ann_rows)
| gene | name | protein_class | disease_involvement | |
|---|---|---|---|---|
| 0 | CLOCK | — | — | — |
| 1 | HCRT | — | — | — |
3. GO Biological Process enrichment (Enrichr)¶
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
print('No GO:BP enrichment data')
# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
top = go_bp[:8]
terms = [t['term'][:45] for t in top][::-1]
neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
else:
print('No GO:BP data to plot')
4. KEGG pathway enrichment¶
kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
kegg_df
else:
print('No KEGG enrichment data')
No KEGG enrichment data
5. STRING protein interaction network¶
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
print(f'{len(ppi_df)} STRING edges')
ppi_df[display_cols].head(20)
else:
print('No STRING edges returned')
11 STRING edges
# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
import math
nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
n = len(nodes)
pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
fig, ax = plt.subplots(figsize=(7, 7))
for e in ppi:
x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
linewidth=0.5+2*e['score'])
for name,(x,y) in pos.items():
ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
ax.set_aspect('equal'); ax.axis('off')
ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
plt.tight_layout(); plt.show()
else:
print('No STRING data to visualize')
6. Reactome pathway footprint¶
pw_rows = []
for g in ['CLOCK', 'HCRT']:
pws = load(f'reactome_{g}')
if isinstance(pws, list):
pw_rows.append({'gene': g, 'n_pathways': len(pws),
'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
else:
pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
| gene | n_pathways | top_pathway | |
|---|---|---|---|
| 0 | CLOCK | 0 | — |
| 1 | HCRT | 0 | — |
7. Allen Brain Atlas ISH regional expression¶
ish_rows = []
for g in ['CLOCK', 'HCRT']:
ish = load(f'allen_ish_{g}')
regions = ish.get('regions') or [] if isinstance(ish, dict) else []
ish_rows.append({
'gene': g,
'n_ish_regions': len(regions),
'top_region': (regions[0].get('structure','') if regions else '—')[:45],
'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
})
pd.DataFrame(ish_rows)
| gene | n_ish_regions | top_region | top_energy | |
|---|---|---|---|---|
| 0 | CLOCK | 0 | — | — |
| 1 | HCRT | 0 | — | — |
8. Hypothesis ranking (7 hypotheses)¶
hyp_data = [('Circadian Clock-Autophagy Synchronization', 0.623), ('Orexin-Microglia Modulation Therapy', 0.623), ('Circadian Glymphatic Rescue Therapy (Melatonin-focused)', 0.606), ('Noradrenergic-Tau Propagation Blockade', 0.604), ('Sleep Spindle-Synaptic Plasticity Enhancement', 0.604), ('Hypocretin-Neurogenesis Coupling Therapy', 0.569), ('Adenosine-Astrocyte Metabolic Reset', 0.558)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Sleep disruption as cause and consequence of neurodegeneration')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
9. Score dimension heatmap (top 10)¶
labels = ['Circadian Clock-Autophagy Synchronizatio', 'Orexin-Microglia Modulation Therapy', 'Circadian Glymphatic Rescue Therapy (Mel', 'Noradrenergic-Tau Propagation Blockade', 'Sleep Spindle-Synaptic Plasticity Enhanc', 'Hypocretin-Neurogenesis Coupling Therapy', 'Adenosine-Astrocyte Metabolic Reset']
matrix = np.array([[0.65, 0.6, 0.7, 0.75, 0.597, 0.65, 0.6, 0.55, 0.5], [0.8, 0.4, 0.6, 0.6, 0.673, 0.65, 0.55, 0.8, 0.3], [0.8, 0.85, 0.65, 0.6, 0.45, 0.7, 0.6, 0.9, 0.9], [0.75, 0.7, 0.55, 0.5, 0.634, 0.7, 0.6, 0.85, 0.5], [0.7, 0.5, 0.55, 0.55, 0.436, 0.6, 0.5, 0.6, 0.25], [0.85, 0.25, 0.4, 0.35, 0.669, 0.5, 0.25, 0.6, 0.3], [0.7, 0.8, 0.7, 0.75, 0.533, 0.8, 0.75, 0.9, 0.75]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
fig, ax = plt.subplots(figsize=(10, 5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(dims)))
ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
ax.set_title('Score dimensions — top hypotheses')
plt.colorbar(im, ax=ax, shrink=0.8)
plt.tight_layout(); plt.show()
else:
print('No score data available')
10. PubMed evidence per hypothesis¶
Hypothesis 1: Circadian Clock-Autophagy Synchronization¶
Target genes: CLOCK · Composite score: 0.623
Molecular Mechanism and Rationale
The circadian clock machinery represents a fundamental cellular timing system that coordinates temporal regulation of autophagy, a critical cellular quality control mechanism essential for neuronal survival. The core circadian transcriptional complex consists of CLOCK (Circadian Locomotor Output Cycles Kaput) and BMAL1 (Brain and Muscle ARNT-Like 1) proteins, which form heterodimers that bind to E-box elements in promoter regions of clock-controlled genes.
hid = 'h-b7898b79'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 2: Orexin-Microglia Modulation Therapy¶
Target genes: HCRTR2 · Composite score: 0.623
Molecular Mechanism and Rationale
The orexin system, comprising orexin-A (hypocretin-1) and orexin-B (hypocretin-2) neuropeptides and their cognate G-protein coupled receptors OX1R (HCRTR1) and OX2R (HCRTR2), represents a critical nexus between sleep-wake regulation and immune modulation in the central nervous system. The HCRTR2 gene encodes the orexin receptor 2 (OX2R), which exhibits predominant expression in hypothalamic nuclei, brainstem arousal centers, and notably, on microglial cells
hid = 'h-8597755b'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 3: Circadian Glymphatic Rescue Therapy (Melatonin-focused)¶
Target genes: MTNR1A · Composite score: 0.606
Molecular Mechanism and Rationale
The circadian glymphatic rescue therapy centers on the intricate molecular interplay between melatonin receptor signaling, circadian clock machinery, and glymphatic system components. Melatonin exerts its effects primarily through two G-protein coupled receptors: MTNR1A (MT1) and MTNR1B (MT2), both highly expressed in suprachiasmatic nucleus (SCN) neurons and astrocytes throughout the brain parenchyma. Upon binding to MT1 receptors, melatonin activates Gi/G
hid = 'h-de579caf'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 4: Noradrenergic-Tau Propagation Blockade¶
Target genes: ADRA2A · Composite score: 0.604
Molecular Mechanism and Rationale
The α2A-adrenergic receptor (ADRA2A) represents a critical nexus in the pathophysiology of neurodegenerative diseases, particularly through its dual regulation of sleep architecture and tau protein propagation. The locus coeruleus (LC), the brain's primary noradrenergic nucleus, exhibits selective vulnerability in Alzheimer's disease and related tauopathies, with neuronal loss beginning decades before clinical symptom onset. The ADRA2A receptor functions as
hid = 'h-4113b0e8'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 5: Sleep Spindle-Synaptic Plasticity Enhancement¶
Target genes: CACNA1G · Composite score: 0.604
Molecular Mechanism and Rationale
The CACNA1G gene encodes the Cav3.1 T-type calcium channel α1G subunit, which plays a fundamental role in generating sleep spindles through its expression in thalamic reticular nucleus (TRN) neurons. These low-voltage-activated calcium channels are uniquely positioned to orchestrate the rhythmic burst firing patterns essential for sleep spindle generation, operating through a precise molecular mechanism involving voltage-dependent activation and inactivatio
hid = 'h-8d270062'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 6: Hypocretin-Neurogenesis Coupling Therapy¶
Target genes: HCRT · Composite score: 0.569
Molecular Mechanism and Rationale
The hypocretin-neurogenesis coupling therapy targets the intricate molecular network connecting the hypocretin (orexin) system to adult hippocampal neurogenesis through multiple converging pathways. Hypocretin-1 (HCRT-1) and hypocretin-2 (HCRT-2), derived from the HCRT gene, are neuropeptides produced exclusively by approximately 10,000-20,000 neurons in the lateral hypothalamus. These peptides bind to two G-protein coupled receptors: hypocretin receptor 1
hid = 'h-6b84d8e2'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 7: Adenosine-Astrocyte Metabolic Reset¶
Target genes: ADORA2A · Composite score: 0.558
Molecular Mechanism and Rationale
The molecular underpinnings of adenosine A2A receptor (ADORA2A) modulation in astrocytic metabolism represent a sophisticated interplay of cellular signaling, metabolic regulation, and neuroenergetic optimization. At the core of this hypothesis lies a complex molecular mechanism that integrates multiple cellular processes through a nuanced receptor-mediated signaling cascade.
ADORA2A activation triggers a multi-step molecular response that begins with G-pr
hid = 'h-41bc2d38'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
11. Knowledge graph edges (191 total)¶
edge_data = [{'source': 'ADORA2A', 'relation': 'regulates', 'target': 'adenosine_metabolism', 'strength': 0.8}, {'source': 'adenosine_metabolism', 'relation': 'controls', 'target': 'sleep_homeostasis', 'strength': 0.8}, {'source': 'MTNR1A', 'relation': 'regulates_expression', 'target': 'AQP4', 'strength': 0.8}, {'source': 'AQP4', 'relation': 'mediates', 'target': 'glymphatic_clearance', 'strength': 0.8}, {'source': 'glymphatic_clearance', 'relation': 'promotes', 'target': 'amyloid_beta_clearance', 'strength': 0.8}, {'source': 'HCRTR2', 'relation': 'modulates_via_microglia', 'target': 'CX3CR1', 'strength': 0.8}, {'source': 'CX3CR1', 'relation': 'controls', 'target': 'microglial_activation', 'strength': 0.8}, {'source': 'ADRA2A', 'relation': 'regulates_propagation', 'target': 'MAPT', 'strength': 0.8}, {'source': 'MAPT', 'relation': 'causes', 'target': 'tau_pathology', 'strength': 0.8}, {'source': 'CLOCK', 'relation': 'co_regulates', 'target': 'TFEB', 'strength': 0.8}, {'source': 'CACNA1G', 'relation': 'generates', 'target': 'sleep_spindles', 'strength': 0.8}, {'source': 'sleep_spindles', 'relation': 'promotes', 'target': 'memory_consolidation', 'strength': 0.8}, {'source': 'h-b7898b79', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.7}, {'source': 'h-41bc2d38', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.65}, {'source': 'ADORA2A', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.65}, {'source': 'ADRA2A', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.57}, {'source': 'h-de579caf', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.55}, {'source': 'ADORA2A', 'relation': 'participates_in', 'target': 'Astrocyte reactivity signaling', 'strength': 0.52}, {'source': 'CACNA1G', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.51}, {'source': 'MTNR1A', 'relation': 'participates_in', 'target': 'Circadian rhythm / glymphatic ', 'strength': 0.51}, {'source': 'h-8597755b', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'CACNA1G', 'relation': 'participates_in', 'target': 'Synaptic function / plasticity', 'strength': 0.48}, {'source': 'ADRA2A', 'relation': 'participates_in', 'target': 'Tau protein / microtubule-asso', 'strength': 0.47}, {'source': 'HCRT', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.47}, {'source': 'h-4113b0e8', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.45}]
if edge_data:
pd.DataFrame(edge_data).head(25)
else:
print('No KG edge data available')
12. Caveats¶
This notebook uses real Forge tool calls cached from live APIs, but:
- Enrichment is against curated gene-set libraries, not genome-wide screens
- STRING/Reactome/HPA/MyGene reflect curated knowledge
- PubMed literature is search-relevance ranked, not systematic review
The cached evidence bundle is the minimum viable real-data analysis for this topic.