Epigenetic clocks and biological aging in neurodegeneration¶
Notebook ID: nb-sda-2026-04-01-gap-v2-bc5f270e · Analysis: sda-2026-04-01-gap-v2-bc5f270e · Generated: 2026-04-10
Research question¶
Epigenetic clocks and biological aging in neurodegeneration
Approach¶
This notebook is generated programmatically from real Forge tool calls and SciDEX debate data. Code cells load cached evidence bundles from data/forge_cache/seaad/*.json and query live data from scidex.db. Re-run python3 scripts/regenerate_notebooks.py --analysis sda-2026-04-01-gap-v2-bc5f270e --force to refresh.
6 hypotheses were generated and debated. The knowledge graph has 98 edges.
Debate Summary¶
Quality score: 0.51 · Rounds: 4 · Personas: Theorist, Skeptic, Domain_Expert, Synthesizer
1. Forge tool provenance¶
import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))
CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB
def load(name):
p = CACHE / f'{name}.json'
if p.exists():
return json.loads(p.read_text())
return {}
db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
db = sqlite3.connect(str(db_path))
prov = pd.read_sql_query('''
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
''', db)
db.close()
prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
print(f'{len(prov)} tool-call aggregates (last 30 days):')
prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
print(f'Provenance unavailable: {e}')
2. Target gene annotations¶
ann_rows = []
for g in []:
mg = load(f'mygene_{g}')
hpa = load(f'hpa_{g}')
if not mg and not hpa:
ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
'disease_involvement': '—'})
continue
ann_rows.append({
'gene': g,
'name': (mg.get('name') or '')[:55],
'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
if isinstance(hpa.get('protein_class'), list)
else str(hpa.get('protein_class') or '—')[:55],
'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
if isinstance(hpa.get('disease_involvement'), list)
else str(hpa.get('disease_involvement') or '')[:55],
})
pd.DataFrame(ann_rows)
3. GO Biological Process enrichment (Enrichr)¶
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
print('No GO:BP enrichment data')
# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
top = go_bp[:8]
terms = [t['term'][:45] for t in top][::-1]
neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
else:
print('No GO:BP data to plot')
4. KEGG pathway enrichment¶
kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
kegg_df
else:
print('No KEGG enrichment data')
5. STRING protein interaction network¶
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
print(f'{len(ppi_df)} STRING edges')
ppi_df[display_cols].head(20)
else:
print('No STRING edges returned')
# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
import math
nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
n = len(nodes)
pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
fig, ax = plt.subplots(figsize=(7, 7))
for e in ppi:
x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
linewidth=0.5+2*e['score'])
for name,(x,y) in pos.items():
ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
ax.set_aspect('equal'); ax.axis('off')
ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
plt.tight_layout(); plt.show()
else:
print('No STRING data to visualize')
6. Reactome pathway footprint¶
pw_rows = []
for g in []:
pws = load(f'reactome_{g}')
if isinstance(pws, list):
pw_rows.append({'gene': g, 'n_pathways': len(pws),
'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
else:
pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
7. Allen Brain Atlas ISH regional expression¶
ish_rows = []
for g in []:
ish = load(f'allen_ish_{g}')
regions = ish.get('regions') or [] if isinstance(ish, dict) else []
ish_rows.append({
'gene': g,
'n_ish_regions': len(regions),
'top_region': (regions[0].get('structure','') if regions else '—')[:45],
'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
})
pd.DataFrame(ish_rows)
8. Hypothesis ranking (6 hypotheses)¶
hyp_data = [('TET2-Mediated Demethylation Rejuvenation Therapy', 0.568), ('KDM6A-Mediated H3K27me3 Rejuvenation', 0.568), ('HDAC3-Selective Inhibition for Clock Reset', 0.561), ('DNMT1-Targeting Antisense Oligonucleotide Reset', 0.556), ('FOXO3-Longevity Pathway Epigenetic Reprogramming', 0.548), ('SIRT6-NAD+ Axis Enhancement Therapy', 0.544)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Epigenetic clocks and biological aging in neurodegeneration')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
9. Score dimension heatmap (top 10)¶
labels = ['TET2-Mediated Demethylation Rejuvenation', 'KDM6A-Mediated H3K27me3 Rejuvenation', 'HDAC3-Selective Inhibition for Clock Res', 'DNMT1-Targeting Antisense Oligonucleotid', 'FOXO3-Longevity Pathway Epigenetic Repro', 'SIRT6-NAD+ Axis Enhancement Therapy']
matrix = np.array([[0.72, 0.58, 0.71, 0.75, 0.532, 0.65, 0.62, 0.62, 0.45], [0.8, 0.3, 0.3, 0.4, 0.661, 0.4, 0.3, 0.3, 0.3], [0.8, 0.6, 0.5, 0.7, 0.614, 0.6, 0.5, 0.8, 0.4], [0.6, 0.4, 0.3, 0.3, 0.517, 0.4, 0.3, 0.5, 0.2], [0.7, 0.2, 0.4, 0.4, 0.512, 0.5, 0.4, 0.2, 0.3], [0.7, 0.5, 0.4, 0.5, 0.47, 0.4, 0.3, 0.4, 0.6]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
fig, ax = plt.subplots(figsize=(10, 5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(dims)))
ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
ax.set_title('Score dimensions — top hypotheses')
plt.colorbar(im, ax=ax, shrink=0.8)
plt.tight_layout(); plt.show()
else:
print('No score data available')
10. PubMed evidence per hypothesis¶
Hypothesis 1: TET2-Mediated Demethylation Rejuvenation Therapy¶
Target genes: TET2 · Composite score: 0.568
Molecular Mechanism and Rationale¶
The TET2-mediated demethylation rejuvenation therapy operates through the strategic restoration of epigenetic homeostasis in neurodegenerative conditions by targeting aberrant DNA methylation patterns that accumulate during pathological aging. TET2 (Ten-eleven translocation methylcytosine dioxygenase 2) belongs to the TET family of α-ketoglutarate-dependent dioxygenases that catalyze the oxidation of 5-methylcytosine (5mC) to 5-hydroxymethylcytosine (5hm
hid = 'h-d7121bcc'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 2: KDM6A-Mediated H3K27me3 Rejuvenation¶
Target genes: KDM6A · Composite score: 0.568
Molecular Mechanism and Rationale
The lysine demethylase 6A (KDM6A), also known as UTX (Ubiquitously Transcribed Tetratricopeptide Repeat, X chromosome), represents a critical epigenetic regulator that catalyzes the removal of repressive histone H3 lysine 27 trimethylation (H3K27me3) marks through its Jumonji C (JmjC) domain-containing demethylase activity. This chromatin-modifying enzyme functions as part of the larger COMPASS-like complexes and operates in direct opposition to the Polycom
hid = 'h-881362dc'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 3: HDAC3-Selective Inhibition for Clock Reset¶
Target genes: HDAC3 · Composite score: 0.561
Molecular Mechanism and Rationale
Histone deacetylase 3 (HDAC3) represents a critical epigenetic regulator that orchestrates circadian rhythms and metabolic homeostasis through its role in chromatin remodeling. HDAC3 functions as the catalytic subunit of the nuclear receptor co-repressor (NCoR/SMRT) complex, which removes acetyl groups from specific lysine residues on histones H3 and H4, leading to chromatin condensation and transcriptional repression. The molecular mechanism underlying HDA
hid = 'h-a9571dbb'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 4: DNMT1-Targeting Antisense Oligonucleotide Reset¶
Target genes: DNMT1 · Composite score: 0.556
Molecular Mechanism and Rationale
DNA methyltransferase 1 (DNMT1) serves as the primary maintenance methyltransferase in mammalian cells, responsible for preserving DNA methylation patterns during cell division by adding methyl groups to hemimethylated CpG dinucleotides. In the context of neurodegeneration, DNMT1 dysregulation leads to aberrant hypermethylation of critical neuronal genes, particularly at promoter regions containing CpG islands. This pathological methylation silences neuropr
hid = 'h-782e55f6'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 5: FOXO3-Longevity Pathway Epigenetic Reprogramming¶
Target genes: FOXO3 · Composite score: 0.548
Molecular Mechanism and Rationale
The FOXO3 (Forkhead Box O3) transcription factor represents a pivotal regulatory node in cellular longevity pathways that becomes progressively silenced during neurodegeneration through epigenetic modifications. FOXO3 belongs to the forkhead family of transcription factors and functions as a master regulator of stress resistance, DNA repair, autophagy, and mitochondrial biogenesis—all processes that decline during neurodegenerative disease progression. The
hid = 'h-fd52a7a0'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
Hypothesis 6: SIRT6-NAD+ Axis Enhancement Therapy¶
Target genes: SIRT6 · Composite score: 0.544
Molecular Mechanism and Rationale
The SIRT6-NAD+ axis represents a critical regulatory network governing cellular aging, DNA repair, and chromatin homeostasis, with profound implications for neurodegeneration. SIRT6, a member of the sirtuin family of NAD+-dependent deacetylases, functions as a chromatin-associated enzyme that modulates histone acetylation patterns at telomeres and throughout the genome. The molecular mechanism centers on SIRT6's ability to deacetylate histone H3 lysine 9 (H
hid = 'h-50a535f9'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
11. Knowledge graph edges (98 total)¶
edge_data = [{'source': 'TET2', 'relation': 'demethylates', 'target': 'DNA_methylation', 'strength': 0.8}, {'source': 'HDAC3', 'relation': 'deacetylates', 'target': 'H3K27_acetylation', 'strength': 0.8}, {'source': 'SIRT6', 'relation': 'requires', 'target': 'NAD+_pathway', 'strength': 0.8}, {'source': 'KDM6A', 'relation': 'demethylates', 'target': 'H3K27me3', 'strength': 0.8}, {'source': 'FOXO3', 'relation': 'activates', 'target': 'autophagy_pathway', 'strength': 0.8}, {'source': 'DNMT1', 'relation': 'methylates', 'target': 'CpG_methylation', 'strength': 0.8}, {'source': 'epigenetic_clock', 'relation': 'predicts', 'target': 'neurodegeneration', 'strength': 0.8}, {'source': 'circadian_rhythm', 'relation': 'regulated_by', 'target': 'HDAC3', 'strength': 0.8}, {'source': 'h-d7121bcc', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.68}, {'source': 'h-a9571dbb', 'relation': 'implicated_in', 'target': 'neurodegeneration', 'strength': 0.6}, {'source': 'HDAC3', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.57}, {'source': 'SIRT6', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.46}, {'source': 'HDAC3', 'relation': 'participates_in', 'target': 'Classical complement cascade', 'strength': 0.45}, {'source': 'KDM6A', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.42}, {'source': 'FOXO3', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.41}, {'source': 'HDAC3', 'relation': 'co_discussed', 'target': 'TET2', 'strength': 0.4}, {'source': 'HDAC3', 'relation': 'co_discussed', 'target': 'KDM6A', 'strength': 0.4}, {'source': 'HDAC3', 'relation': 'co_discussed', 'target': 'SIRT6', 'strength': 0.4}, {'source': 'HDAC3', 'relation': 'co_discussed', 'target': 'DNMT1', 'strength': 0.4}, {'source': 'HDAC3', 'relation': 'co_discussed', 'target': 'FOXO3', 'strength': 0.4}, {'source': 'TET2', 'relation': 'co_discussed', 'target': 'KDM6A', 'strength': 0.4}, {'source': 'TET2', 'relation': 'co_discussed', 'target': 'SIRT6', 'strength': 0.4}, {'source': 'TET2', 'relation': 'co_discussed', 'target': 'DNMT1', 'strength': 0.4}, {'source': 'TET2', 'relation': 'co_discussed', 'target': 'FOXO3', 'strength': 0.4}, {'source': 'KDM6A', 'relation': 'co_discussed', 'target': 'SIRT6', 'strength': 0.4}]
if edge_data:
pd.DataFrame(edge_data).head(25)
else:
print('No KG edge data available')
12. Caveats¶
This notebook uses real Forge tool calls cached from live APIs, but:
- Enrichment is against curated gene-set libraries, not genome-wide screens
- STRING/Reactome/HPA/MyGene reflect curated knowledge
- PubMed literature is search-relevance ranked, not systematic review
The cached evidence bundle is the minimum viable real-data analysis for this topic.