import json, sys, sqlite3
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['figure.dpi'] = 110
matplotlib.rcParams['figure.facecolor'] = 'white'

# Find worktree root by looking for AGENTS.md marker
_cwd = Path('.').resolve()
if (_cwd / 'AGENTS.md').exists():
    REPO = _cwd
else:
    REPO = _cwd
    for _p in [_cwd] + list(_cwd.parents):
        if (_p / 'AGENTS.md').exists():
            REPO = _p
            break

CACHE = REPO / 'data' / 'forge_cache' / 'seaad'

# TARGET_GENES: 11-gene AD vulnerability set (from forge/seaad_analysis.py)
TARGET_GENES = [
    "TREM2", "GFAP", "SLC17A7", "PDGFRA", "PDGFRB",
    "APOE", "MAPT", "APP", "PSEN1", "TYROBP", "CLU"
]

def load(name, _cache=CACHE):
    """Load cached JSON, return empty dict if file missing."""
    _path = _cache / f'{name}.json'
    if _path.exists():
        return json.loads(_path.read_text())
    return {}

# Forge provenance: tool calls this session invoked
try:
    db = sqlite3.connect(str(REPO / 'scidex.db'))
    prov = pd.read_sql_query("""
        SELECT skill_id, status, COUNT(*) AS n_calls,
               ROUND(AVG(duration_ms),0) AS mean_ms,
               MIN(created_at) AS first_call
        FROM tool_calls
        WHERE created_at >= date('now','-1 day')
        GROUP BY skill_id, status
        ORDER BY n_calls DESC
    """, db)
    db.close()
    prov.rename(columns={'skill_id':'tool'}, inplace=True)
    prov['tool'] = prov['tool'].str.replace('tool_', '', regex=False)
    print(f'{len(prov)} tool-call aggregates from the last 24h of Forge provenance:')
    prov.head(20)
except Exception as e:
    print(f"Provenance query skipped: {e}")
    print("(tool_calls table may not exist in this worktree)")

/home/ubuntu/.config/matplotlib is not a writable directory

Matplotlib created a temporary cache directory at /tmp/matplotlib-854iswv9 because there was an issue with the default path (/home/ubuntu/.config/matplotlib); it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.

Provenance query skipped: Execution failed on sql '
        SELECT skill_id, status, COUNT(*) AS n_calls,
               ROUND(AVG(duration_ms),0) AS mean_ms,
               MIN(created_at) AS first_call
        FROM tool_calls
        WHERE created_at >= date('now','-1 day')
        GROUP BY skill_id, status
        ORDER BY n_calls DESC
    ': no such table: tool_calls
(tool_calls table may not exist in this worktree)

anno = load('mygene_TREM2')  # probe one
ann_rows = []
for g in ['TREM2','GFAP','SLC17A7','PDGFRA','PDGFRB','APOE','MAPT','APP','PSEN1','TYROBP','CLU']:
    mg = load(f'mygene_{g}')
    hpa = load(f'hpa_{g}')
    ann_rows.append({
        'gene': g,
        'name': (mg.get('name') or '')[:55],
        'protein_class': ', '.join((hpa.get('protein_class') or [])[:2])[:55],
        'disease_involvement': ', '.join((hpa.get('disease_involvement') or [])[:2])[:55] if isinstance(hpa.get('disease_involvement'), list) else str(hpa.get('disease_involvement') or '')[:55],
        'ensembl_id': hpa.get('ensembl_id') or '',
    })
anno_df = pd.DataFrame(ann_rows)
anno_df

go_bp = load('enrichr_GO_Biological_Process')[:10]
go_df = pd.DataFrame(go_bp)[['term','p_value','odds_ratio','genes']]
go_df['p_value'] = go_df['p_value'].apply(lambda p: f'{p:.2e}')
go_df['odds_ratio'] = go_df['odds_ratio'].round(1)
go_df['term'] = go_df['term'].str[:60]
go_df['n_hits'] = go_df['genes'].apply(len)
go_df['genes'] = go_df['genes'].apply(lambda g: ', '.join(g))
go_df[['term','n_hits','p_value','odds_ratio','genes']]

# Visualize top GO BP enrichment (−log10 p-value bar chart)
import numpy as np
go_bp = load('enrichr_GO_Biological_Process')[:8]
terms = [t['term'][:45] for t in go_bp][::-1]
neglogp = [-np.log10(t['p_value']) for t in go_bp][::-1]
fig, ax = plt.subplots(figsize=(9, 4.5))
ax.barh(terms, neglogp, color='#4fc3f7')
ax.set_xlabel('-log10(p-value)')
ax.set_title('Top GO:BP enrichment for SEA-AD vulnerability gene set (Enrichr)')
ax.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

cm = load('enrichr_CellMarker_Cell_Types')[:10]
cm_df = pd.DataFrame(cm)[['term','p_value','odds_ratio','genes']]
cm_df['genes'] = cm_df['genes'].apply(lambda g: ', '.join(g))
cm_df['p_value'] = cm_df['p_value'].apply(lambda p: f'{p:.2e}')
cm_df['odds_ratio'] = cm_df['odds_ratio'].round(1)
cm_df

ppi = load('string_network')
ppi_df = pd.DataFrame(ppi)
if not ppi_df.empty:
    ppi_df = ppi_df.sort_values('score', ascending=False)
    display_cols = [c for c in ['protein1','protein2','score','escore','tscore'] if c in ppi_df.columns]
    print(f'{len(ppi_df)} STRING edges among {len(set(list(ppi_df.protein1)+list(ppi_df.protein2)))} proteins')
    ppi_df[display_cols].head(20)
else:
    print('No STRING edges returned (API may be rate-limited)')

11 STRING edges among 9 proteins

# Simple network figure using matplotlib (no networkx dep)
ppi = load('string_network')
if ppi:
    import math
    nodes = sorted({p for e in ppi for p in (e['protein1'], e['protein2'])})
    n = len(nodes)
    pos = {n_: (math.cos(2*math.pi*i/n), math.sin(2*math.pi*i/n)) for i, n_ in enumerate(nodes)}
    fig, ax = plt.subplots(figsize=(7, 7))
    for e in ppi:
        x1,y1 = pos[e['protein1']]; x2,y2 = pos[e['protein2']]
        ax.plot([x1,x2],[y1,y2], color='#888', alpha=0.3+0.5*e['score'], linewidth=0.5+2*e['score'])
    for name,(x,y) in pos.items():
        ax.scatter([x],[y], s=450, color='#ffd54f', edgecolors='#333', zorder=3)
        ax.annotate(name, (x,y), ha='center', va='center', fontsize=9, fontweight='bold', zorder=4)
    ax.set_aspect('equal'); ax.axis('off')
    ax.set_title(f'STRING physical PPI network ({len(ppi)} edges, score ≥ 0.4)')
    plt.tight_layout(); plt.show()

pw_rows = []
for g in ['TREM2','GFAP','SLC17A7','PDGFRA','PDGFRB','APOE','MAPT','APP','PSEN1','TYROBP','CLU']:
    pws = load(f'reactome_{g}')
    pw_rows.append({'gene': g, 'n_pathways': len(pws),
                    'top_pathway': (pws[0]['name'] if pws else '—')[:70]})
pw_df = pd.DataFrame(pw_rows).sort_values('n_pathways', ascending=False)
pw_df

from collections import Counter
ac = load('allen_celltypes_TREM2')  # same for any gene (not gene-filtered at API level)
ct = pd.DataFrame(ac.get('cell_types', []))
if not ct.empty:
    ct_display = ct.head(15)
else:
    ct_display = pd.DataFrame()
ct_display

ish_rows = []
for g in TARGET_GENES:
    ish = load(f'allen_ish_{g}')
    regions = ish.get('regions') or []
    ish_rows.append({
        'gene': g,
        'n_ish_regions': len(regions),
        'top_region': (regions[0].get('structure','') if regions else '—')[:45],
        'top_energy': round(regions[0].get('expression_energy',0), 2) if regions else None,
        'note': (ish.get('note') or '')[:60],
    })
ish_df = pd.DataFrame(ish_rows)
ish_df

hid = 'h-seaad-5b3cb8ea'
papers = load(f'pubmed_{hid}')
if papers:
    lit = pd.DataFrame(papers)[['year','journal','title','pmid']]
    lit['title'] = lit['title'].str[:80]
    lit['journal'] = lit['journal'].str[:30]
    lit.sort_values('year', ascending=False, inplace=True)
    display_df = lit
else:
    display_df = pd.DataFrame([{'note':'no PubMed results for this hypothesis query'}])
display_df

hid = 'h-seaad-51323624'
papers = load(f'pubmed_{hid}')
if papers:
    lit = pd.DataFrame(papers)[['year','journal','title','pmid']]
    lit['title'] = lit['title'].str[:80]
    lit['journal'] = lit['journal'].str[:30]
    lit.sort_values('year', ascending=False, inplace=True)
    display_df = lit
else:
    display_df = pd.DataFrame([{'note':'no PubMed results for this hypothesis query'}])
display_df

hid = 'h-seaad-7f15df4c'
papers = load(f'pubmed_{hid}')
if papers:
    lit = pd.DataFrame(papers)[['year','journal','title','pmid']]
    lit['title'] = lit['title'].str[:80]
    lit['journal'] = lit['journal'].str[:30]
    lit.sort_values('year', ascending=False, inplace=True)
    display_df = lit
else:
    display_df = pd.DataFrame([{'note':'no PubMed results for this hypothesis query'}])
display_df

hid = 'h-seaad-fa5ea82d'
papers = load(f'pubmed_{hid}')
if papers:
    lit = pd.DataFrame(papers)[['year','journal','title','pmid']]
    lit['title'] = lit['title'].str[:80]
    lit['journal'] = lit['journal'].str[:30]
    lit.sort_values('year', ascending=False, inplace=True)
    display_df = lit
else:
    display_df = pd.DataFrame([{'note':'no PubMed results for this hypothesis query'}])
display_df

hid = 'h-seaad-56fa6428'
papers = load(f'pubmed_{hid}')
if papers:
    lit = pd.DataFrame(papers)[['year','journal','title','pmid']]
    lit['title'] = lit['title'].str[:80]
    lit['journal'] = lit['journal'].str[:30]
    lit.sort_values('year', ascending=False, inplace=True)
    display_df = lit
else:
    display_df = pd.DataFrame([{'note':'no PubMed results for this hypothesis query'}])
display_df

from pathlib import Path
bundle_path = REPO / 'data/analysis_outputs/analysis-SEAAD-20260402/mechanistic_de/bundle.json'
if bundle_path.exists():
    mech_bundle = json.loads(bundle_path.read_text())
    print("Mechanistic highlights:")
    for item in mech_bundle.get('mechanistic_highlights', []):
        print(f"- {item}")
    mech_df = pd.DataFrame([
        {
            'gene': gene,
            'dx_hits': len((payload.get('differential_expression') or {}).get('experiments', [])),
            'top_pathway': ((payload.get('reactome_pathways') or [{}])[0].get('name', '')),
            'top_paper': ((payload.get('literature') or [{}])[0].get('title', '')),
        }
        for gene, payload in mech_bundle.get('per_gene', {}).items()
    ])
    mech_df
else:
    print(f"Missing mechanistic evidence bundle: {bundle_path}")

Mechanistic highlights:
- C1QA: 12 human differential-expression experiments matched the disease filter; top hit E-GEOD-67333 (Transcriptomics profiling of Alzheimer's disease reveal novel molecular targets).
- TREM2: 12 human differential-expression experiments matched the disease filter; top hit E-GEOD-67333 (Transcriptomics profiling of Alzheimer's disease reveal novel molecular targets).
- SLC17A7: 12 human differential-expression experiments matched the disease filter; top hit E-GEOD-67333 (Transcriptomics profiling of Alzheimer's disease reveal novel molecular targets).
- APOE: 12 human differential-expression experiments matched the disease filter; top hit E-GEOD-67333 (Transcriptomics profiling of Alzheimer's disease reveal novel molecular targets).
- GFAP: 12 human differential-expression experiments matched the disease filter; top hit E-GEOD-67333 (Transcriptomics profiling of Alzheimer's disease reveal novel molecular targets).
- STRING physical interactions support a shared mechanism network: APOE–TREM2 (0.99).
- GO_Biological_Process enrichment is led by 'Positive Regulation Of Amyloid-Beta Clearance (GO:1900223)' with p=7.496663581039455e-07, linking the gene set to a coherent pathway-level mechanism.
- Reactome enrichment is led by 'Nuclear Signaling By ERBB4 R-HSA-1251985' with p=2.4725579551199923e-05, linking the gene set to a coherent pathway-level mechanism.
- KEGG enrichment is led by 'Nicotine addiction' with p=0.009960969495176103, linking the gene set to a coherent pathway-level mechanism.

benchmark_path = REPO / 'data/analysis_outputs/analysis-SEAAD-20260402/cell_type_benchmarks_v1.json'
benchmark = json.loads(benchmark_path.read_text())

def leave_one_out_interval(values):
    vals = list(values)
    loo = [(sum(vals) - v) / (len(vals) - 1) for v in vals]
    return min(loo), max(loo)

summary_rows = []
for item in benchmark['cell_type_scores']:
    values = list(item['study_scores'].values())
    ci_low, ci_high = leave_one_out_interval(values)
    summary_rows.append({
        'cell_type': item['display_name'],
        'mean_vulnerability': round(sum(values) / len(values), 3),
        'ci_low': round(ci_low, 3),
        'ci_high': round(ci_high, 3),
        'n_benchmarks': len(values),
        'top_markers': ', '.join(item['primary_markers'][:4]),
        'rationale': item['rationale'],
    })
summary = pd.DataFrame(summary_rows).sort_values('mean_vulnerability', ascending=False)

score_lookup = {item['cell_type']: list(item['study_scores'].values()) for item in benchmark['cell_type_scores']}
display_lookup = {item['cell_type']: item['display_name'] for item in benchmark['cell_type_scores']}
driver_rows = []
for driver in benchmark['candidate_drivers']:
    values = score_lookup[driver['cell_type']]
    ci_low, ci_high = leave_one_out_interval(values)
    mean_support = sum(values) / len(values)
    driver_score = 0.55 * driver['prior_confidence'] + 0.45 * mean_support
    driver_rows.append({
        'candidate_driver': driver['driver'],
        'cell_type': display_lookup[driver['cell_type']],
        'driver_score': round(driver_score, 3),
        'ci_low': round(0.55 * driver['prior_confidence'] + 0.45 * ci_low, 3),
        'ci_high': round(0.55 * driver['prior_confidence'] + 0.45 * ci_high, 3),
        'genes': ', '.join(driver['genes']),
        'testable_direction': driver['expected_direction'],
    })
driver_df = pd.DataFrame(driver_rows).sort_values('driver_score', ascending=False)

source_df = pd.DataFrame([
    {
        'benchmark': s['short_name'],
        'pmid': s.get('pmid', ''),
        'doi': s.get('doi', ''),
        'design': s['region_or_design'],
    }
    for s in benchmark['sources']
])

print('Benchmark artifact:', benchmark_path.relative_to(REPO))
print('Interval method: leave-one-benchmark-out sensitivity over source-study support scores')
display(summary)
display(driver_df)
display(source_df)

Benchmark artifact: data/analysis_outputs/analysis-SEAAD-20260402/cell_type_benchmarks_v1.json
Interval method: leave-one-benchmark-out sensitivity over source-study support scores

driver_plan_path = REPO / 'data/analysis_outputs/analysis-SEAAD-20260402/driver_candidate_tests_v2.json'
driver_plan = json.loads(driver_plan_path.read_text())

def sensitivity_interval_for_model(model):
    values = list(model['benchmark_support_by_source'].values())
    loo = [(sum(values) - v) / (len(values) - 1) for v in values]
    # Combine cross-dataset support, prior confidence, and assay readiness into a conservative
    # rank score. This is not a fitted causal effect; it prioritizes which test should be run first.
    center = (0.45 * (sum(values) / len(values))
              + 0.35 * model['prior_confidence']
              + 0.20 * model['assay_readiness'])
    lo = 0.45 * min(loo) + 0.35 * model['prior_confidence'] + 0.20 * model['assay_readiness']
    hi = 0.45 * max(loo) + 0.35 * model['prior_confidence'] + 0.20 * model['assay_readiness']
    return center, lo, hi

plan_rows = []
for model in driver_plan['candidate_driver_models']:
    center, lo, hi = sensitivity_interval_for_model(model)
    plan_rows.append({
        'candidate_driver': model['candidate_driver'],
        'cell_type': model['cell_type'].replace('_', ' '),
        'priority_score': round(center, 3),
        'interval_low': round(lo, 3),
        'interval_high': round(hi, 3),
        'assay_readiness': model['assay_readiness'],
        'module': ', '.join(model['marker_module']),
        'primary_endpoint': model['primary_endpoint'],
        'falsifying_pattern': model['falsifying_pattern'],
    })
plan_df = pd.DataFrame(plan_rows).sort_values('priority_score', ascending=False)

print('Driver-test artifact:', driver_plan_path.relative_to(REPO))
print('Shared covariates:', ', '.join(driver_plan['shared_covariates']))
display(plan_df[['candidate_driver', 'cell_type', 'priority_score', 'interval_low', 'interval_high', 'assay_readiness', 'module']])
display(plan_df[['candidate_driver', 'primary_endpoint', 'falsifying_pattern']])

fig, ax = plt.subplots(figsize=(8.5, 3.8))
plot_df = plan_df.sort_values('priority_score')
y = range(len(plot_df))
left_err = plot_df['priority_score'] - plot_df['interval_low']
right_err = plot_df['interval_high'] - plot_df['priority_score']
colors = ['#2d7dd2' if ct in {'astrocytes', 'excitatory neurons'} else '#7a7a7a' for ct in plot_df['cell_type']]
ax.errorbar(plot_df['priority_score'], y, xerr=[left_err, right_err], fmt='o', color='#1f2933', ecolor='#8aa0b8', capsize=4)
ax.scatter(plot_df['priority_score'], y, s=90, c=colors, zorder=3)
ax.set_yticks(list(y))
ax.set_yticklabels(plot_df['candidate_driver'])
ax.set_xlabel('Driver-test priority score with leave-one-source-out sensitivity')
ax.set_xlim(0.45, 0.90)
ax.axvline(0.70, color='#b23b3b', linestyle='--', linewidth=1, label='Exchange-ready threshold')
ax.grid(axis='x', alpha=0.25)
ax.legend(loc='lower right')
plt.tight_layout()
plt.show()

Driver-test artifact: data/analysis_outputs/analysis-SEAAD-20260402/driver_candidate_tests_v2.json
Shared covariates: tau_burden, amyloid_burden, donor_age, sex, brain_region, neuronal_fraction, astrocyte_module, microglial_DAM_score

	gene	name	protein_class	disease_involvement	ensembl_id
0	TREM2	triggering receptor expressed on myeloid cells 2	Disease related genes, Human disease related g...	Alzheimer disease, Amyloidosis	ENSG00000095970
1	GFAP	glial fibrillary acidic protein	Candidate cardiovascular disease genes, Diseas...	Disease variant, Leukodystrophy	ENSG00000131095
2	SLC17A7	solute carrier family 17 member 7	Metabolic proteins, Predicted membrane proteins		ENSG00000104888
3	PDGFRA	platelet derived growth factor receptor alpha	Cancer-related genes, CD markers	Cancer-related genes, Disease variant	ENSG00000134853
4	PDGFRB	platelet derived growth factor receptor beta	Cancer-related genes, CD markers	Cancer-related genes, Disease variant	ENSG00000113721
5	APOE	apolipoprotein E	Cancer-related genes, Candidate cardiovascular...	Alzheimer disease, Amyloidosis	ENSG00000130203
6	MAPT	microtubule associated protein tau
7	APP	amyloid beta precursor protein	Disease related genes, FDA approved drug targets	Alzheimer disease, Amyloidosis	ENSG00000142192
8	PSEN1	presenilin 1	Cancer-related genes, Disease related genes	Alzheimer disease, Amyloidosis	ENSG00000080815
9	TYROBP	transmembrane immune signaling adaptor TYROBP	Disease related genes, Human disease related g...		ENSG00000011600
10	CLU	clusterin	Cancer-related genes, Candidate cardiovascular...	Cancer-related genes	ENSG00000120885

	term	n_hits	p_value	odds_ratio	genes
0	Microglial Cell Activation (GO:0001774)	5	2.68e-13	1109.7	APP, TYROBP, TREM2, MAPT, CLU
1	Astrocyte Activation (GO:0048143)	4	2.44e-11	1427.2	APP, TREM2, MAPT, PSEN1
2	Astrocyte Development (GO:0014002)	4	6.74e-11	1037.8	APP, TREM2, MAPT, PSEN1
3	Regulation Of Amyloid Fibril Formation (GO:190...	4	6.74e-11	1037.8	APP, TREM2, PSEN1, CLU
4	Positive Regulation Of Supramolecular Fiber Or...	5	1.04e-09	182.2	APP, MAPT, APOE, PSEN1, CLU
5	Macrophage Activation (GO:0042116)	4	2.57e-09	367.9	APP, TREM2, MAPT, CLU
6	Negative Regulation Of Long-Term Synaptic Pote...	3	6.92e-09	1498.8	APP, TYROBP, APOE
7	Memory (GO:0007613)	4	2.07e-08	211.0	TREM2, MAPT, APOE, PSEN1
8	Positive Regulation Of ERK1 And ERK2 Cascade (...	5	2.40e-08	94.9	PDGFRB, APP, PDGFRA, TREM2, APOE
9	Regulation Of Amyloid-Beta Clearance (GO:1900221)	3	5.61e-08	624.3	TREM2, APOE, CLU

	term	p_value	odds_ratio	genes
0	Glial cell:Undefined	5.61e-08	624.3	PDGFRB, PDGFRA, GFAP
1	T Helper cell:Undefined	1.62e-06	182.5	PDGFRB, PDGFRA, APOE
2	Pericyte:Muscle	2.75e-06	1480.4	PDGFRB, PDGFRA
3	Fibroblast:Lung	9.88e-06	634.3	PDGFRB, PDGFRA
4	Megakaryocyte Erythroid cell:Undefined	1.42e-05	85.8	PDGFRB, PDGFRA, APOE
5	T cell:Undefined	1.67e-05	81.1	PDGFRB, PDGFRA, APOE
6	Cardiac Progenitor cell:Embryonic Stem Cell	1.94e-05	76.9	PDGFRB, APP, PDGFRA
7	Cardiomyocyte:Embryonic Stem Cell	1.94e-05	76.9	PDGFRB, APP, PDGFRA
8	Periosteum-derived Progenitor cell:Periosteum	2.00e-05	76.1	PDGFRB, PDGFRA, TREM2
9	Hepatoblast:Liver	2.06e-05	75.3	PDGFRB, PDGFRA, APOE

	brain_region	dendrite_type	layer	specimen_count
0	"middle temporal gyrus"	spiny	L3	47
1	"middle temporal gyrus"	aspiny	L3	8
2	"middle temporal gyrus"	spiny	L5	7
3	"frontal lobe"	spiny	L3	7
4	"middle temporal gyrus"	spiny	L4	6
5	"temporal lobe"	spiny	L3	6
6	"middle temporal gyrus"	aspiny	L4	3
7	"inferior frontal gyrus"	spiny	L3	2
8	"inferior frontal gyrus"	aspiny	L3	2
9	"middle temporal gyrus"	spiny	L2	2
10	"temporal lobe"	spiny	L2	2
11	"temporal lobe"	spiny	L5	2
12	"inferior temporal gyrus"	spiny	L5	1
13	"frontal lobe"	spiny	L6	1
14	"middle temporal gyrus"	aspiny	L5	1

Gap	Task
Bulk SEA-AD h5ad download + local cache	`19c06875`
Per-cell DE from SEA-AD in the debate loop	`70b96f50`
ABC Atlas + MERFISH spatial queries	`f9ba4c33`
Forge data-validation layer	`4bd2f9de`

SEA-AD Cell-Type Vulnerability Analysis

Notebook Overview

SEA-AD Cell-Type Vulnerability Analysis¶

Research question¶

Approach¶

1. Forge tool chain¶

2. Target gene annotations (MyGene.info + Human Protein Atlas)¶

3. GO Biological Process enrichment (Enrichr)¶

4. Cell-type enrichment (Enrichr CellMarker)¶

5. STRING physical protein interaction network¶

6. Reactome pathway footprint per gene¶

7. Allen Brain Cell Atlas — cell-type specimen metadata¶

8. Allen Brain Atlas ISH regional expression¶

9. Evidence bound to analysis hypotheses¶

Hypothesis 1: Complement C1QA Spatial Gradient in Cortical Layers¶

Molecular Mechanism of C1QA-Mediated Synaptic Elimination¶

Hypothesis 2: Cell-Type Specific TREM2 Upregulation in DAM Microglia¶

TREM2 Molecular Biology and Signaling¶

Hypothesis 3: Excitatory Neuron Vulnerability via SLC17A7 Downregulation¶

Molecular Function of SLC17A7/VGLUT1¶

Hypothesis 4: APOE Isoform Expression Across Glial Subtypes¶

Hypothesis 5: GFAP-Positive Reactive Astrocyte Subtype Delineation¶

GFAP Biology and the Astrocyte Reactivity Spectrum¶

10. Mechanistic differential-expression synthesis¶

11. Caveats & what's still aggregated¶

12. Cross-dataset benchmark: microglia, neurons, astrocytes, oligodendrocytes, and OPCs¶

13. Exchange-ready debate seed and falsification plan¶

What would falsify this¶

14. Candidate-driver test matrix with falsifiable readouts¶

Notebook Outline

Cell Navigator

💬 Discussion

	gene	n_pathways	top_pathway
8	PSEN1	8	Nuclear signaling by ERBB4
5	APOE	8	Nuclear signaling by ERBB4
3	PDGFRA	8	PIP3 activates AKT signaling
7	APP	8	Platelet degranulation
9	TYROBP	6	Immunoregulatory interactions between a Lympho...
4	PDGFRB	6	PIP3 activates AKT signaling
0	TREM2	4	Immunoregulatory interactions between a Lympho...
10	CLU	4	Platelet degranulation
6	MAPT	3	Caspase-mediated cleavage of cytoskeletal prot...
2	SLC17A7	2	Glutamate Neurotransmitter Release Cycle
1	GFAP	2	Nuclear signaling by ERBB4

	gene	top_region	top_energy	note
0	TREM2	—	—	No ISH data available; check portal for microa...
1	GFAP	—	—	No ISH data available; check portal for microa...
2	SLC17A7	—	—	No ISH data available; check portal for microa...
3	PDGFRA	—	—	No ISH data available; check portal for microa...
4	PDGFRB	—	—	No ISH data available; check portal for microa...
5	APOE	—	—	No ISH data available; check portal for microa...
6	MAPT	—	—	No ISH data available; check portal for microa...
7	APP	—	—	No ISH data available; check portal for microa...
8	PSEN1	—	—	No ISH data available; check portal for microa...
9	TYROBP	—	—	No ISH data available; check portal for microa...
10	CLU	—	—	No ISH data available; check portal for microa...

	cell_type	mean_vulnerability	ci_low	ci_high	n_benchmarks	top_markers	rationale
0	Excitatory neurons	0.854	0.837	0.882	5	SLC17A7, RORB, RELN, APP	Neuron loss and tau-bearing vulnerable excitat...
1	Astrocytes	0.716	0.690	0.750	5	APOE, GFAP, C3, S100B	Reactive and resilience-associated astrocyte p...
2	Microglia	0.674	0.638	0.730	5	TREM2, TYROBP, APOE, CST7	Disease-associated microglial activation is ro...
3	Oligodendrocytes	0.612	0.570	0.665	5	MBP, MOG, PLP1, MAG	Myelination and oligodendrocyte programs are r...
4	OPCs	0.448	0.425	0.473	5	PDGFRA, CSPG4, VCAN, SOX10	OPC expansion/differentiation stress is plausi...

	candidate_driver	cell_type	driver_score	ci_low	ci_high	genes	testable_direction
0	RORB/RELN excitatory-neuron vulnerability	Excitatory neurons	0.835	0.828	0.848	RORB, RELN, SLC17A7, MAPT	Higher RORB/RELN vulnerable-subtype depletion ...
1	APOE/GFAP/C3 reactive astrocyte transition	Astrocytes	0.729	0.718	0.745	APOE, GFAP, C3, CXCL10	APOE/GFAP/C3 modules should rise with plaque a...
2	TREM2/TYROBP disease-associated microglia	Microglia	0.688	0.672	0.714	TREM2, TYROBP, APOE, CST7, LPL	TREM2/TYROBP module strength should be highest...
3	MBP/MOG/PLP1 oligodendrocyte myelin stress	Oligodendrocytes	0.616	0.598	0.640	MBP, MOG, PLP1, MAG	Myelin-module suppression should predict vulne...
4	PDGFRA/CSPG4 OPC differentiation bottleneck	OPCs	0.471	0.461	0.482	PDGFRA, CSPG4, VCAN, SOX10	High OPC abundance coupled to low mature-oligo...

	benchmark	pmid	doi	design
0	SEA-AD multimodal atlas	39402332	10.1038/s43587-024-00719-8	Middle temporal gyrus and companion modalities...
1	Mathys ROSMAP PFC snRNA-seq	31042697	10.1038/s41586-019-1195-2	80,660 prefrontal-cortex single-nucleus transc...
2	Grubman entorhinal cortex scRNA-seq	31802004	10.1038/s41593-019-0539-4	Entorhinal-cortex single-cell atlas contrastin...
3	Leng vulnerable-neuron atlas	33432193	10.1038/s41593-020-00764-7	Caudal entorhinal cortex and superior frontal ...
4	Mathys multiregion AD atlas	39048816	10.1038/s41586-024-07606-7	1.3 million nuclei from six brain regions in 4...

Persona	Argument
Theorist	Astrocytes repeatedly carry APOE, GFAP, complement, and chemokine programs across AD atlases. If this module remains predictive after cell-fraction and pathology adjustment, it explains why bulk APOE/complement signals map to synaptic failure rather than only to plaque-associated microglia.
Skeptic	Reactive astrocyte signatures can be a downstream response to local neuron death, plaque load, or microglial cytokines. Without spatial or longitudinal perturbation, an independent regression coefficient may still reflect unmeasured disease severity.
Expert	The right test is not astrocyte activation versus no activation; it is whether astrocyte module strength adds out-of-sample predictive information beyond tau, amyloid, cell composition, donor covariates, and DAM score across regions and cohorts.

	candidate_driver	cell_type	priority_score	interval_low	interval_high	assay_readiness	module
0	RORB/RELN excitatory-neuron vulnerability	excitatory neurons	0.857	0.850	0.870	0.93	RORB, RELN, SLC17A7, MAPT
1	APOE/GFAP/C3 reactive astrocyte transition	astrocytes	0.761	0.750	0.777	0.90	APOE, GFAP, C3, CXCL10
2	TREM2/TYROBP disease-associated microglia	microglia	0.720	0.704	0.746	0.86	TREM2, TYROBP, APOE, CST7, LPL
3	MBP/MOG/PLP1 oligodendrocyte myelin stress	oligodendrocytes	0.648	0.630	0.672	0.78	MBP, MOG, PLP1, MAG
4	PDGFRA/CSPG4 OPC differentiation bottleneck	opcs	0.497	0.487	0.508	0.62	PDGFRA, CSPG4, VCAN, SOX10

	candidate_driver	primary_endpoint	falsifying_pattern
0	RORB/RELN excitatory-neuron vulnerability	Excitatory-neuron subtype depletion and SLC17A...	RORB/RELN depletion fails to track tau stage o...
1	APOE/GFAP/C3 reactive astrocyte transition	Out-of-sample prediction of excitatory-neuron ...	Astrocyte module coefficient is non-positive i...
2	TREM2/TYROBP disease-associated microglia	Plaque-proximal DAM module strength and mediat...	DAM module adds no explanatory power once plaq...
3	MBP/MOG/PLP1 oligodendrocyte myelin stress	Residual myelin-module suppression after adjus...	Myelin-module suppression vanishes after model...
4	PDGFRA/CSPG4 OPC differentiation bottleneck	OPC expansion paired with failed mature-oligod...	OPC abundance changes are not coupled to matur...