Digital biomarkers and AI-driven early detection of neurodegeneration¶
Notebook ID: nb-sda-2026-04-01-gap-012 · Analysis: sda-2026-04-01-gap-012 · Generated: 2026-04-10
Research question¶
Can speech, gait, retinal imaging, sleep, and smartphone data detect neurodegeneration 5-10 years before diagnosis?
Approach¶
This notebook is generated programmatically from real Forge tool calls and SciDEX debate data. Code cells load cached evidence bundles from data/forge_cache/seaad/*.json and query live data from scidex.db. Re-run python3 scripts/regenerate_notebooks.py --analysis sda-2026-04-01-gap-012 --force to refresh.
7 hypotheses were generated and debated. The knowledge graph has 309 edges.
Debate Summary¶
Quality score: 0.51 · Rounds: 4 · Personas: Theorist, Skeptic, Domain_Expert, Synthesizer
1. Forge tool provenance¶
import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))
CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB
def load(name):
p = CACHE / f'{name}.json'
if p.exists():
return json.loads(p.read_text())
return {}
db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
db = sqlite3.connect(str(db_path))
prov = pd.read_sql_query('''
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
''', db)
db.close()
prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
print(f'{len(prov)} tool-call aggregates (last 30 days):')
prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
print(f'Provenance unavailable: {e}')
2. Target gene annotations¶
ann_rows = []
for g in []:
mg = load(f'mygene_{g}')
hpa = load(f'hpa_{g}')
if not mg and not hpa:
ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
'disease_involvement': '—'})
continue
ann_rows.append({
'gene': g,
'name': (mg.get('name') or '')[:55],
'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
if isinstance(hpa.get('protein_class'), list)
else str(hpa.get('protein_class') or '—')[:55],
'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
if isinstance(hpa.get('disease_involvement'), list)
else str(hpa.get('disease_involvement') or '')[:55],
})
pd.DataFrame(ann_rows)
3. GO Biological Process enrichment (Enrichr)¶
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
print('No GO:BP enrichment data')
# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
top = go_bp[:8]
terms = [t['term'][:45] for t in top][::-1]
neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
else:
print('No GO:BP data to plot')
4. KEGG pathway enrichment¶
kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
kegg_df
else:
print('No KEGG enrichment data')
5. STRING protein interaction network¶
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
print(f'{len(ppi_df)} STRING edges')
ppi_df[display_cols].head(20)
else:
print('No STRING edges returned')
# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
import math
nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
n = len(nodes)
pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
fig, ax = plt.subplots(figsize=(7, 7))
for e in ppi:
x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
linewidth=0.5+2*e['score'])
for name,(x,y) in pos.items():
ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
ax.set_aspect('equal'); ax.axis('off')
ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
plt.tight_layout(); plt.show()
else:
print('No STRING data to visualize')
6. Reactome pathway footprint¶
pw_rows = []
for g in []:
pws = load(f'reactome_{g}')
if isinstance(pws, list):
pw_rows.append({'gene': g, 'n_pathways': len(pws),
'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
else:
pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
7. Allen Brain Atlas ISH regional expression¶
ish_rows = []
for g in []:
ish = load(f'allen_ish_{g}')
regions = ish.get('regions') or [] if isinstance(ish, dict) else []
ish_rows.append({
'gene': g,
'n_ish_regions': len(regions),
'top_region': (regions[0].get('structure','') if regions else '—')[:45],
'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
})
pd.DataFrame(ish_rows)
8. Hypothesis ranking (7 hypotheses)¶
hyp_data = [('Vocal Cord Neuroplasticity Stimulation', 0.593), ('Ocular Immune Privilege Extension', 0.572), ('Digital Twin-Guided Metabolic Reprogramming', 0.57), ('Retinal Vascular Microcirculation Rescue', 0.561), ('Circadian-Synchronized Proteostasis Enhancement', 0.536), ('Multi-Modal Stress Response Harmonization', 0.515), ('Smartphone-Detected Motor Variability Correction', 0.492)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Digital biomarkers and AI-driven early detection of neurodegeneration')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
9. Score dimension heatmap (top 10)¶
labels = ['Vocal Cord Neuroplasticity Stimulation', 'Ocular Immune Privilege Extension', 'Digital Twin-Guided Metabolic Reprogramm', 'Retinal Vascular Microcirculation Rescue', 'Circadian-Synchronized Proteostasis Enha', 'Multi-Modal Stress Response Harmonizatio', 'Smartphone-Detected Motor Variability Co']
matrix = np.array([[0.9, 0.2, 0.4, 0.3, 0.436, 0.3, 0.3, 0.3, 0.2], [0.8, 0.2, 0.3, 0.2, 0.676, 0.2, 0.2, 0.3, 0.3], [0.8, 0.8, 0.6, 0.7, 0.454, 0.6, 0.5, 0.6, 0.8], [0.7, 0.4, 0.6, 0.5, 0.436, 0.5, 0.4, 0.5, 0.5], [0.8, 0.6, 0.7, 0.7, 0.436, 0.6, 0.6, 0.7, 0.4], [0.7, 0.7, 0.7, 0.8, 0.453, 0.7, 0.6, 0.8, 0.5], [0.6, 0.8, 0.5, 0.6, 0.453, 0.7, 0.7, 0.9, 0.2]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
fig, ax = plt.subplots(figsize=(10, 5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(dims)))
ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
ax.set_title('Score dimensions — top hypotheses')
plt.colorbar(im, ax=ax, shrink=0.8)
plt.tight_layout(); plt.show()
else:
print('No score data available')
10. PubMed evidence per hypothesis¶
Hypothesis 1: Vocal Cord Neuroplasticity Stimulation¶
Target genes: CHR2/BDNF · Composite score: 0.593
Molecular Mechanism and Rationale
The proposed therapeutic approach centers on the fundamental understanding that vocal cord dysfunction represents an early manifestation of brainstem neurodegeneration, specifically involving the vagal motor complex and its downstream effector pathways. The recurrent laryngeal nerve, a branch of the vagus nerve (cranial nerve X), innervates the intrinsic laryngeal muscles responsible for vocal cord adduction, abduction, and tension regulation. Degeneration
hid = 'h-e0183502'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 2: Ocular Immune Privilege Extension¶
Target genes: FOXP3/TGFB1 · Composite score: 0.572
Molecular Mechanism and Rationale¶
The concept of ocular immune privilege extension leverages the unique immunoregulatory environment of the eye to establish systemic neuroprotection through engineered immune-regulatory cell therapy targeting FOXP3 and TGFB1 pathways. The eye maintains immune privilege through multiple molecular mechanisms, including the blood-retinal barrier, expression of immunosuppressive factors, and specialized antigen-presenting cell populations. Central to this pri
hid = 'h-6a065252'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 3: Digital Twin-Guided Metabolic Reprogramming¶
Target genes: PPARGC1A/PRKAA1 · Composite score: 0.57
Molecular Mechanism and Rationale
The digital twin-guided metabolic reprogramming approach targets the fundamental bioenergetic dysfunction underlying neurodegenerative diseases through precise modulation of the PGC-1α (PPARGC1A) and AMPK α1 (PRKAA1) signaling axis. PGC-1α serves as the master regulator of mitochondrial biogenesis and oxidative metabolism, orchestrating the transcription of nuclear respiratory factors NRF1 and NRF2, which subsequently activate mitochondrial transcription fa
hid = 'h-b0cda336'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 4: Retinal Vascular Microcirculation Rescue¶
Target genes: PDGFRB/ANGPT1 · Composite score: 0.561
Molecular Mechanism and Rationale
The blood-brain barrier (BBB) and blood-retinal barrier (BRB) share fundamental structural and functional similarities, particularly in their reliance on pericyte-endothelial cell interactions to maintain vascular integrity. This hypothesis centers on the critical role of pericyte dysfunction as a convergent mechanism underlying neurodegenerative diseases, with particular focus on the platelet-derived growth factor receptor beta (PDGFRB) and angiopoietin-1
hid = 'h-35f04e1b'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 5: Circadian-Synchronized Proteostasis Enhancement¶
Target genes: CLOCK/ULK1 · Composite score: 0.536
Detailed Scientific Description: Circadian-Synchronized Proteostasis Enhancement¶
Molecular Mechanism and Rationale¶
The circadian clock system exerts profound control over cellular proteostasis through coordinate regulation of autophagy, proteasomal degradation, and heat shock protein expression. At the molecular core of this system lies the CLOCK/BMAL1 heterodimer, which functions as the master transcriptional regulator of circadian gene expression. CLOCK (Circadian Locomotor Output Cycle
hid = 'h-0e0cc0c1'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 6: Multi-Modal Stress Response Harmonization¶
Target genes: NR3C1/CRH/TNFA · Composite score: 0.515
Molecular Mechanism and Rationale
The multi-modal stress response harmonization hypothesis centers on the interconnected dysregulation of three critical biological systems that converge to accelerate neurodegenerative processes. The primary molecular targets include the glucocorticoid receptor (NR3C1), corticotropin-releasing hormone (CRH), and tumor necrosis factor alpha (TNFA), which form a pathological triad driving neuronal dysfunction and death.
The hypothalamic-pituitary-adrenal (HPA
hid = 'h-1e564178'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 7: Smartphone-Detected Motor Variability Correction¶
Target genes: DRD2/SNCA · Composite score: 0.492
Molecular Mechanism and Rationale
The fundamental molecular mechanism underlying smartphone-detected motor variability correction centers on the intricate relationship between dopaminergic signaling and alpha-synuclein pathology within the basal ganglia circuitry. The dopamine D2 receptor (DRD2) serves as a critical mediator of motor control through its expression on medium spiny neurons in the striatum, particularly within the indirect pathway that regulates movement initiation and executi
hid = 'h-072b2f5d'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
11. Knowledge graph edges (309 total)¶
edge_data = [{'source': 'CLOCK', 'relation': 'transcriptional_complex', 'target': 'BMAL1_protein', 'strength': 0.8}, {'source': 'ULK1', 'relation': 'initiates', 'target': 'autophagy_pathway', 'strength': 0.8}, {'source': 'autophagy_pathway', 'relation': 'prevents', 'target': 'neurodegeneration', 'strength': 0.8}, {'source': 'NR3C1', 'relation': 'regulates', 'target': 'HPA_axis', 'strength': 0.8}, {'source': 'CRH', 'relation': 'activates', 'target': 'stress_response', 'strength': 0.8}, {'source': 'PPARGC1A', 'relation': 'master_regulator', 'target': 'mitochondrial_biogenesis', 'strength': 0.8}, {'source': 'PRKAA1', 'relation': 'encodes', 'target': 'AMPK_signaling', 'strength': 0.8}, {'source': 'PDGFRB', 'relation': 'maintains', 'target': 'pericyte_function', 'strength': 0.8}, {'source': 'pericyte_function', 'relation': 'preserves', 'target': 'BBB_integrity', 'strength': 0.8}, {'source': 'DRD2', 'relation': 'modulates', 'target': 'basal_ganglia_circuit', 'strength': 0.8}, {'source': 'PPARGC1A', 'relation': 'interacts_with', 'target': 'PRKAA1', 'strength': 0.61}, {'source': 'PRKAA1', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.61}, {'source': 'PRKAA1', 'relation': 'interacts_with', 'target': 'PPARGC1A', 'strength': 0.61}, {'source': 'NR3C1', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.61}, {'source': 'NR3C1', 'relation': 'interacts_with', 'target': 'CRH', 'strength': 0.61}, {'source': 'NR3C1', 'relation': 'interacts_with', 'target': 'TNFA', 'strength': 0.61}, {'source': 'CRH', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.61}, {'source': 'CRH', 'relation': 'interacts_with', 'target': 'NR3C1', 'strength': 0.61}, {'source': 'CRH', 'relation': 'interacts_with', 'target': 'TNFA', 'strength': 0.61}, {'source': 'TNFA', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.61}, {'source': 'TNFA', 'relation': 'interacts_with', 'target': 'NR3C1', 'strength': 0.61}, {'source': 'TNFA', 'relation': 'interacts_with', 'target': 'CRH', 'strength': 0.61}, {'source': 'NR3C1', 'relation': 'participates_in', 'target': 'Glucocorticoid receptor / stre', 'strength': 0.61}, {'source': 'CRH', 'relation': 'participates_in', 'target': 'Glucocorticoid receptor / stre', 'strength': 0.61}, {'source': 'TNFA', 'relation': 'participates_in', 'target': 'Glucocorticoid receptor / stre', 'strength': 0.61}]
if edge_data:
pd.DataFrame(edge_data).head(25)
else:
print('No KG edge data available')
12. Caveats¶
This notebook uses real Forge tool calls cached from live APIs, but:
- Enrichment is against curated gene-set libraries, not genome-wide screens
- STRING/Reactome/HPA/MyGene reflect curated knowledge
- PubMed literature is search-relevance ranked, not systematic review
The cached evidence bundle is the minimum viable real-data analysis for this topic.