Microglia-astrocyte crosstalk amplification loops in neurodegeneration¶
Notebook ID: nb-sda-2026-04-01-gap-009 · Analysis: sda-2026-04-01-gap-009 · Generated: 2026-04-10
Research question¶
Microglia activate astrocytes via IL-1alpha/TNF/C1q, and reactive astrocytes feed back to microglia via complement/chemokines.
Approach¶
This notebook is generated programmatically from real Forge tool calls and SciDEX debate data. Code cells load cached evidence bundles from data/forge_cache/seaad/*.json and query live data from scidex.db. Re-run python3 scripts/regenerate_notebooks.py --analysis sda-2026-04-01-gap-009 --force to refresh.
7 hypotheses were generated and debated. The knowledge graph has 110 edges.
Debate Summary¶
Quality score: 0.54 · Rounds: 4 · Personas: Theorist, Skeptic, Domain_Expert, Synthesizer
1. Forge tool provenance¶
import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'
REPO = Path('.').resolve()
sys.path.insert(0, str(REPO))
CACHE_SUB = 'seaad'
CACHE = REPO / 'data' / 'forge_cache' / CACHE_SUB
def load(name):
p = CACHE / f'{name}.json'
if p.exists():
return json.loads(p.read_text())
return {}
db_path = Path('/home/ubuntu/scidex/scidex.db')
try:
db = sqlite3.connect(str(db_path))
prov = pd.read_sql_query('''
SELECT skill_id, status, COUNT(*) AS n_calls,
ROUND(AVG(duration_ms),0) AS mean_ms
FROM tool_calls
WHERE created_at >= date('now','-30 days')
GROUP BY skill_id, status
ORDER BY n_calls DESC
''', db)
db.close()
prov['tool'] = prov['skill_id'].str.replace('tool_', '', regex=False)
print(f'{len(prov)} tool-call aggregates (last 30 days):')
prov[['tool','status','n_calls','mean_ms']].head(20)
except Exception as e:
print(f'Provenance unavailable: {e}')
77 tool-call aggregates (last 30 days):
2. Target gene annotations¶
ann_rows = []
for g in ['CLOCK', 'CNO']:
mg = load(f'mygene_{g}')
hpa = load(f'hpa_{g}')
if not mg and not hpa:
ann_rows.append({'gene': g, 'name': '—', 'protein_class': '—',
'disease_involvement': '—'})
continue
ann_rows.append({
'gene': g,
'name': (mg.get('name') or '')[:55],
'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55]
if isinstance(hpa.get('protein_class'), list)
else str(hpa.get('protein_class') or '—')[:55],
'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55]
if isinstance(hpa.get('disease_involvement'), list)
else str(hpa.get('disease_involvement') or '')[:55],
})
pd.DataFrame(ann_rows)
| gene | name | protein_class | disease_involvement | |
|---|---|---|---|---|
| 0 | CLOCK | — | — | — |
| 1 | CNO | — | — | — |
3. GO Biological Process enrichment (Enrichr)¶
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
go_df = pd.DataFrame(go_bp[:10])[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]
else:
print('No GO:BP enrichment data')
# Visualize top GO BP enrichment
go_bp = load('enrichr_GO_Biological_Process')
if isinstance(go_bp, list) and go_bp:
top = go_bp[:8]
terms = [t['term'][:45] for t in top][::-1]
neglogp = [-np.log10(max(t['p_value'], 1e-300)) for t in top][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
else:
print('No GO:BP data to plot')
4. KEGG pathway enrichment¶
kegg = load('enrichr_KEGG_Pathways')
if isinstance(kegg, list) and kegg:
kegg_df = pd.DataFrame(kegg[:10])[['term','p_value','odds_ratio','genes']]
kegg_df['genes'] = kegg_df['genes'].apply(lambda g: ', '.join(g))
kegg_df['p_value'] = kegg_df['p_value'].apply(lambda p: f'{p:.2e}')
kegg_df['odds_ratio'] = kegg_df['odds_ratio'].round(1)
kegg_df
else:
print('No KEGG enrichment data')
No KEGG enrichment data
5. STRING protein interaction network¶
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
ppi_df = pd.DataFrame(ppi).sort_values('score', ascending=False)
display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
print(f'{len(ppi_df)} STRING edges')
ppi_df[display_cols].head(20)
else:
print('No STRING edges returned')
11 STRING edges
# Network figure
ppi = load('string_network')
if isinstance(ppi, list) and ppi:
import math
nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
n = len(nodes)
pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
fig, ax = plt.subplots(figsize=(7, 7))
for e in ppi:
x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'],
linewidth=0.5+2*e['score'])
for name,(x,y) in pos.items():
ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
ax.annotate(name, (x,y), ha='center', va='center', fontsize=8, fontweight='bold', zorder=4)
ax.set_aspect('equal'); ax.axis('off')
ax.set_title(f'STRING PPI network ({len(ppi)} edges)')
plt.tight_layout(); plt.show()
else:
print('No STRING data to visualize')
6. Reactome pathway footprint¶
pw_rows = []
for g in ['CLOCK', 'CNO']:
pws = load(f'reactome_{g}')
if isinstance(pws, list):
pw_rows.append({'gene': g, 'n_pathways': len(pws),
'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
else:
pw_rows.append({'gene': g, 'n_pathways': 0, 'top_pathway': '—'})
pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
| gene | n_pathways | top_pathway | |
|---|---|---|---|
| 0 | CLOCK | 0 | — |
| 1 | CNO | 0 | — |
7. Allen Brain Atlas ISH regional expression¶
ish_rows = []
for g in ['CLOCK', 'CNO']:
ish = load(f'allen_ish_{g}')
regions = ish.get('regions') or [] if isinstance(ish, dict) else []
ish_rows.append({
'gene': g,
'n_ish_regions': len(regions),
'top_region': (regions[0].get('structure','') if regions else '—')[:45],
'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
})
pd.DataFrame(ish_rows)
| gene | n_ish_regions | top_region | top_energy | |
|---|---|---|---|---|
| 0 | CLOCK | 0 | — | — |
| 1 | CNO | 0 | — | — |
8. Hypothesis ranking (7 hypotheses)¶
hyp_data = [('Phase-Separated Organelle Targeting', 0.593), ('Metabolic Circuit Breaker via Lipid Droplet Modulation', 0.576), ('Extracellular Matrix Stiffness Modulation', 0.569), ('Biorhythmic Interference via Controlled Sleep Oscillati', 0.56), ('Synthetic Biology Rewiring via Orthogonal Receptors', 0.556), ('Quantum Coherence Disruption in Cellular Communication', 0.542), ('Temporal Decoupling via Circadian Clock Reset', 0.524)]
titles = [h[0] for h in hyp_data][::-1]
scores = [h[1] for h in hyp_data][::-1]
fig, ax = plt.subplots(figsize=(10, max(8, len(titles)*0.4)))
colors = ['#ef5350' if s >= 0.6 else '#ffa726' if s >= 0.5 else '#66bb6a' for s in scores]
ax.barh(range(len(titles)), scores, color=colors)
ax.set_yticks(range(len(titles))); ax.set_yticklabels(titles, fontsize=7)
ax.set_xlabel('Composite Score'); ax.set_title('Microglia-astrocyte crosstalk amplification loops in neurodegeneration')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout(); plt.show()
9. Score dimension heatmap (top 10)¶
labels = ['Phase-Separated Organelle Targeting', 'Metabolic Circuit Breaker via Lipid Drop', 'Extracellular Matrix Stiffness Modulatio', 'Biorhythmic Interference via Controlled ', 'Synthetic Biology Rewiring via Orthogona', 'Quantum Coherence Disruption in Cellular', 'Temporal Decoupling via Circadian Clock ']
matrix = np.array([[0.7, 0.75, 0.8, 0.85, 0.078, 0.85, 0.75, 0.65, 0.6], [0.8, 0.7, 0.7, 0.7, 0.09, 0.8, 0.7, 0.6, 0.5], [0.7, 0.3, 0.5, 0.6, 0.463, 0.5, 0.5, 0.4, 0.4], [0.8, 0.5, 0.5, 0.4, 0.48, 0.5, 0.4, 0.2, 0.6], [0.9, 0.3, 0.6, 0.7, 0.623, 0.4, 0.3, 0.4, 0.4], [1.0, 0.1, 0.2, 0.1, 0.41, 0.2, 0.1, 0.1, 0.3], [0.65, 0.55, 0.68, 0.75, 0.597, 0.65, 0.5, 0.6, 0.45]])
dims = ['novelty_score', 'feasibility_score', 'impact_score', 'mechanistic_plausibility_score', 'clinical_relevance_score', 'data_availability_score', 'reproducibility_score', 'druggability_score', 'safety_profile_score']
if matrix.size:
fig, ax = plt.subplots(figsize=(10, 5))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(dims)))
ax.set_xticklabels([d.replace('_score','').replace('_',' ').title() for d in dims],
rotation=45, ha='right', fontsize=8)
ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=7)
ax.set_title('Score dimensions — top hypotheses')
plt.colorbar(im, ax=ax, shrink=0.8)
plt.tight_layout(); plt.show()
else:
print('No score data available')
10. PubMed evidence per hypothesis¶
Hypothesis 1: Phase-Separated Organelle Targeting¶
Target genes: G3BP1 · Composite score: 0.593
Molecular Mechanism and Rationale¶
Stress granules (SGs) are membraneless, phase-separated ribonucleoprotein organelles that form through liquid-liquid phase separation in response to cellular stress, representing a critical intersection between RNA metabolism and neuroinflammation in neurodegenerative diseases. The formation and persistence of pathological stress granules is orchestrated primarily by G3BP1 (GTPase-activating protein SH3 domain-binding protein 1) and its paralog G3BP2, which
hid = 'h-ec731b7a'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 2: Metabolic Circuit Breaker via Lipid Droplet Modulation¶
Target genes: PLIN2 · Composite score: 0.576
Molecular Mechanism and Rationale¶
The molecular foundation of this therapeutic strategy centers on perilipin-2 (PLIN2), a member of the perilipin family of lipid droplet coat proteins that orchestrates the dynamic interface between lipid storage and cellular metabolism. PLIN2 functions as a critical gatekeeper controlling the accessibility of stored triacylglycerols and cholesteryl esters within cytoplasmic lipid droplets. Under physiological conditions, PLIN2 coating prevents premature
hid = 'h-3d993b5d'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 3: Extracellular Matrix Stiffness Modulation¶
Target genes: PIEZO1 · Composite score: 0.569
Molecular Mechanism and Rationale
The extracellular matrix (ECM) undergoes progressive stiffening during neurodegeneration, creating a pathological mechanical microenvironment that perpetuates inflammatory responses through mechanotransduction pathways. This hypothesis centers on the mechanosensitive ion channels Piezo1 and TRPV4, which serve as primary mechanotransducers converting mechanical stimuli into intracellular calcium signaling cascades. Piezo1, a mechanically-activated cation cha
hid = 'h-725c62e9'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 4: Biorhythmic Interference via Controlled Sleep Oscillations¶
Target genes: GABRA1 · Composite score: 0.56
Molecular Mechanism and Rationale
The therapeutic enhancement of sleep spindles through targeted GABRA1 modulation represents a novel approach to neurodegeneration that leverages the fundamental relationship between sleep architecture and glial-neuronal communication networks. Sleep spindles, generated by the thalamic reticular nucleus (TRN) through rhythmic bursts of GABAergic inhibition, are critically dependent on GABRA1-containing receptors that mediate fast synaptic transmission. The G
hid = 'h-49791706'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 5: Synthetic Biology Rewiring via Orthogonal Receptors¶
Target genes: CNO · Composite score: 0.556
Molecular Mechanism and Rationale
The orthogonal receptor hijacking approach leverages Designer Receptors Exclusively Activated by Designer Drugs (DREADDs) to create synthetic biology circuits that can precisely redirect inflammatory signaling cascades in neurodegenerative diseases. At the molecular level, this strategy involves engineering modified muscarinic acetylcholine receptors, specifically hM3Dq and hM4Di variants, that respond exclusively to clozapine-N-oxide (CNO) while remaining
hid = 'h-e3506e5a'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 6: Quantum Coherence Disruption in Cellular Communication¶
Target genes: TUBB3 · Composite score: 0.542
Molecular Mechanism and Rationale
The quantum coherence disruption hypothesis proposes that neurodegeneration results from interference with quantum coherent networks that facilitate long-range cellular communication within neural tissues. At the molecular level, this mechanism centers on the microtubule protein TUBB3 (β-tubulin III), which forms the structural backbone of microtubules in neuronal cells. TUBB3 differs from other tubulin isoforms through its unique C-terminal domain and spec
hid = 'h-4a31c1e0'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
Hypothesis 7: Temporal Decoupling via Circadian Clock Reset¶
Target genes: CLOCK · Composite score: 0.524
Molecular Mechanism and Rationale
The core molecular mechanism underlying temporal decoupling via circadian clock reset centers on disrupting pathological microglia-astrocyte feedback loops through targeted modulation of the master circadian transcription factors CLOCK and BMAL1. Under normal physiological conditions, CLOCK and BMAL1 form heterodimeric complexes that bind to E-box elements in gene promoters, driving rhythmic expression of approximately 10-15% of the mammalian genome. Howeve
hid = 'h-019ad538'
papers = load(f'pubmed_{hid}')
if isinstance(papers, list) and papers:
lit = pd.DataFrame(papers)
cols = [c for c in ['year','journal','title','pmid'] if c in lit.columns]
if cols:
lit = lit[cols]
lit['title'] = lit['title'].str[:80]
if 'journal' in lit.columns:
lit['journal'] = lit['journal'].str[:30]
lit.sort_values('year', ascending=False, inplace=True)
display_df = lit
else:
display_df = pd.DataFrame(papers[:5])
else:
display_df = pd.DataFrame([{'note':'no PubMed results'}])
display_df
| note | |
|---|---|
| 0 | no PubMed results |
11. Knowledge graph edges (110 total)¶
edge_data = [{'source': 'PLIN2', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.61}, {'source': 'CLOCK', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.56}, {'source': 'CNO', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.55}, {'source': 'GABRA1', 'relation': 'associated_with', 'target': 'neurodegeneration', 'strength': 0.5}, {'source': 'PLIN2', 'relation': 'participates_in', 'target': 'Insulin/IGF metabolic signalin', 'strength': 0.47}, {'source': 'CLOCK', 'relation': 'participates_in', 'target': 'Circadian clock / CLOCK-BMAL1 ', 'strength': 0.45}, {'source': 'CNO', 'relation': 'participates_in', 'target': 'Synthetic biology / chemogenet', 'strength': 0.41}, {'source': 'GABRA1', 'relation': 'participates_in', 'target': 'GABA-A receptor / inhibitory n', 'strength': 0.4}, {'source': 'PIEZO1', 'relation': 'participates_in', 'target': 'Iron homeostasis / ferroptosis', 'strength': 0.4}, {'source': 'BMAL1', 'relation': 'co_discussed', 'target': 'PLIN2', 'strength': 0.4}, {'source': 'BMAL1', 'relation': 'co_discussed', 'target': 'G3BP1', 'strength': 0.4}, {'source': 'CLOCK', 'relation': 'co_discussed', 'target': 'PLIN2', 'strength': 0.4}, {'source': 'CLOCK', 'relation': 'co_discussed', 'target': 'G3BP1', 'strength': 0.4}, {'source': 'PLIN2', 'relation': 'co_discussed', 'target': 'G3BP1', 'strength': 0.4}, {'source': 'CLOCK', 'relation': 'co_discussed', 'target': 'DGAT1', 'strength': 0.4}, {'source': 'PLIN2', 'relation': 'co_discussed', 'target': 'DGAT1', 'strength': 0.4}, {'source': 'DGAT1', 'relation': 'co_discussed', 'target': 'G3BP1', 'strength': 0.4}, {'source': 'BMAL1', 'relation': 'co_discussed', 'target': 'CNO', 'strength': 0.4}, {'source': 'BMAL1', 'relation': 'co_discussed', 'target': 'TUBB3', 'strength': 0.4}, {'source': 'BMAL1', 'relation': 'co_discussed', 'target': 'GABRA1', 'strength': 0.4}, {'source': 'CNO', 'relation': 'co_discussed', 'target': 'TUBB3', 'strength': 0.4}, {'source': 'CNO', 'relation': 'co_discussed', 'target': 'CLOCK', 'strength': 0.4}, {'source': 'CNO', 'relation': 'co_discussed', 'target': 'PLIN2', 'strength': 0.4}, {'source': 'CNO', 'relation': 'co_discussed', 'target': 'PIEZO1', 'strength': 0.4}, {'source': 'CNO', 'relation': 'co_discussed', 'target': 'GABRA1', 'strength': 0.4}]
if edge_data:
pd.DataFrame(edge_data).head(25)
else:
print('No KG edge data available')
12. Caveats¶
This notebook uses real Forge tool calls cached from live APIs, but:
- Enrichment is against curated gene-set libraries, not genome-wide screens
- STRING/Reactome/HPA/MyGene reflect curated knowledge
- PubMed literature is search-relevance ranked, not systematic review
The cached evidence bundle is the minimum viable real-data analysis for this topic.