%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# SciDEX dark theme
plt.rcParams.update({
    'figure.facecolor': '#0a0a14',
    'axes.facecolor': '#151525',
    'axes.edgecolor': '#333',
    'axes.labelcolor': '#e0e0e0',
    'text.color': '#e0e0e0',
    'xtick.color': '#888',
    'ytick.color': '#888',
    'legend.facecolor': '#151525',
    'legend.edgecolor': '#333',
    'figure.dpi': 120,
    'savefig.dpi': 120,
})
print('Environment ready: numpy, pandas, matplotlib, scipy')

Environment ready: numpy, pandas, matplotlib, scipy

hyp_data = [{"title": "TREM2-Dependent Microglial Senescence Transition", "gene": "TREM2", "composite": 0.85, "mech": 0.88, "evid": 0.82, "novel": 0.78, "feas": 0.72, "impact": 0.91, "drug": 0.65, "safety": 0.58, "comp": 0.7, "data": 0.85, "reprod": 0.75}, {"title": "Complement-Mediated Synaptic Pruning Dysregulation", "gene": "C1QA", "composite": 0.72, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "TFEB-PGC1\u03b1 Mitochondrial-Lysosomal Decoupling", "gene": "TFEB", "composite": 0.68, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "Oligodendrocyte White Matter Vulnerability", "gene": "MOG", "composite": 0.55, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}]

df = pd.DataFrame(hyp_data)
df = df.rename(columns={'title': 'Hypothesis', 'gene': 'Target Gene', 'composite': 'Score'})
df[['Hypothesis', 'Target Gene', 'Score', 'mech', 'evid', 'novel', 'feas', 'impact']].round(3)

# Hypothesis Ranking — Composite Score Bar Chart
hyp_data = [{"title": "TREM2-Dependent Microglial Senescence Transition", "gene": "TREM2", "composite": 0.85, "mech": 0.88, "evid": 0.82, "novel": 0.78, "feas": 0.72, "impact": 0.91, "drug": 0.65, "safety": 0.58, "comp": 0.7, "data": 0.85, "reprod": 0.75}, {"title": "Complement-Mediated Synaptic Pruning Dysregulation", "gene": "C1QA", "composite": 0.72, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "TFEB-PGC1\u03b1 Mitochondrial-Lysosomal Decoupling", "gene": "TFEB", "composite": 0.68, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "Oligodendrocyte White Matter Vulnerability", "gene": "MOG", "composite": 0.55, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}]

fig, ax = plt.subplots(figsize=(14, max(4, len(hyp_data) * 0.8)))
titles = [h['title'][:40] for h in hyp_data]
scores = [h.get('composite', 0) for h in hyp_data]
colors = ['#4fc3f7' if s >= 0.6 else '#ff8a65' if s >= 0.4 else '#ef5350' for s in scores]

bars = ax.barh(range(len(titles)), scores, color=colors, alpha=0.85, edgecolor='#333')
ax.set_yticks(range(len(titles)))
ax.set_yticklabels(titles, fontsize=9)
ax.set_xlabel('Composite Score', fontsize=11)
ax.set_xlim(0, 1)
ax.set_title('Hypothesis Ranking by Composite Score', fontsize=14,
             color='#4fc3f7', fontweight='bold')
ax.axvline(x=0.6, color='#81c784', linestyle='--', alpha=0.5, label='Strong threshold')
ax.axvline(x=0.4, color='#ffd54f', linestyle='--', alpha=0.5, label='Moderate threshold')
ax.legend(fontsize=8, facecolor='#151525', edgecolor='#333', labelcolor='#e0e0e0')

for bar, score in zip(bars, scores):
    ax.text(score + 0.01, bar.get_y() + bar.get_height()/2, f'{score:.3f}',
            va='center', fontsize=9, color='#e0e0e0')

plt.tight_layout()
plt.show()

# Score Heatmap — All Hypotheses x Dimensions
hyp_data = [{"title": "TREM2-Dependent Microglial Senescence Transition", "gene": "TREM2", "composite": 0.85, "mech": 0.88, "evid": 0.82, "novel": 0.78, "feas": 0.72, "impact": 0.91, "drug": 0.65, "safety": 0.58, "comp": 0.7, "data": 0.85, "reprod": 0.75}, {"title": "Complement-Mediated Synaptic Pruning Dysregulation", "gene": "C1QA", "composite": 0.72, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "TFEB-PGC1\u03b1 Mitochondrial-Lysosomal Decoupling", "gene": "TFEB", "composite": 0.68, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "Oligodendrocyte White Matter Vulnerability", "gene": "MOG", "composite": 0.55, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}]

dim_keys = ['mech', 'evid', 'novel', 'feas', 'impact', 'drug', 'safety', 'comp', 'data', 'reprod']
dim_labels = ['Mechanistic', 'Evidence', 'Novelty', 'Feasibility', 'Impact',
              'Druggability', 'Safety', 'Competition', 'Data Avail.', 'Reproducibility']

matrix = np.array([[h.get(k, 0) for k in dim_keys] for h in hyp_data])
titles = [h['title'][:40] for h in hyp_data]

fig, ax = plt.subplots(figsize=(14, max(4, len(hyp_data) * 0.8)))
im = ax.imshow(matrix, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)

ax.set_xticks(range(len(dim_labels)))
ax.set_xticklabels(dim_labels, rotation=45, ha='right', fontsize=9)
ax.set_yticks(range(len(titles)))
ax.set_yticklabels(titles, fontsize=9)

for i in range(len(titles)):
    for j in range(len(dim_labels)):
        val = matrix[i, j]
        color = '#000' if val > 0.5 else '#fff'
        ax.text(j, i, f'{val:.2f}', ha='center', va='center', fontsize=7, color=color)

cbar = plt.colorbar(im, ax=ax, shrink=0.6)
cbar.set_label('Score', fontsize=10, color='#e0e0e0')
cbar.ax.yaxis.set_tick_params(color='#888')

ax.set_title('Score Heatmap: Hypotheses x Dimensions', fontsize=14,
             color='#4fc3f7', fontweight='bold')
plt.tight_layout()
plt.show()

# Multi-Dimensional Score Radar Chart
hyp_data = [{"title": "TREM2-Dependent Microglial Senescence Transition", "gene": "TREM2", "composite": 0.85, "mech": 0.88, "evid": 0.82, "novel": 0.78, "feas": 0.72, "impact": 0.91, "drug": 0.65, "safety": 0.58, "comp": 0.7, "data": 0.85, "reprod": 0.75}, {"title": "Complement-Mediated Synaptic Pruning Dysregulation", "gene": "C1QA", "composite": 0.72, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "TFEB-PGC1\u03b1 Mitochondrial-Lysosomal Decoupling", "gene": "TFEB", "composite": 0.68, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "Oligodendrocyte White Matter Vulnerability", "gene": "MOG", "composite": 0.55, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}]

dimensions = ['Mechanistic', 'Evidence', 'Novelty', 'Feasibility', 'Impact',
              'Druggability', 'Safety', 'Competition', 'Data Avail.', 'Reproducibility']
dim_keys = ['mech', 'evid', 'novel', 'feas', 'impact', 'drug', 'safety', 'comp', 'data', 'reprod']

fig, ax = plt.subplots(figsize=(10, 8), subplot_kw=dict(polar=True))
angles = np.linspace(0, 2 * np.pi, len(dimensions), endpoint=False).tolist()
angles += angles[:1]

colors = ['#4fc3f7', '#81c784', '#ff8a65', '#ce93d8', '#ffd54f', '#ef5350', '#a5d6a7']
for i, h in enumerate(hyp_data[:5]):
    values = [h.get(k, 0) for k in dim_keys]
    values += values[:1]
    ax.plot(angles, values, 'o-', linewidth=2, color=colors[i % len(colors)],
            label=h['title'][:35], alpha=0.8)
    ax.fill(angles, values, alpha=0.1, color=colors[i % len(colors)])

ax.set_xticks(angles[:-1])
ax.set_xticklabels(dimensions, fontsize=8)
ax.set_ylim(0, 1)
ax.set_title('Hypothesis Score Radar', fontsize=14, color='#4fc3f7',
             fontweight='bold', pad=20)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=7,
          facecolor='#151525', edgecolor='#333', labelcolor='#e0e0e0')
plt.tight_layout()
plt.show()

# Differential Gene Expression Analysis
# Simulated expression data based on literature-reported fold changes
np.random.seed(42)
genes = ["C1QA", "TFEB", "TREM2", "MOG"]

n_samples = 20
results = []
for gene in genes:
    control = np.random.normal(loc=8.0, scale=0.8, size=n_samples)
    disease = np.random.normal(loc=8.0 + np.random.uniform(-2, 2), scale=1.2, size=n_samples)
    t_stat, p_val = stats.ttest_ind(control, disease)
    log2fc = np.mean(disease) - np.mean(control)
    results.append({
        'gene': gene,
        'log2fc': log2fc,
        'p_value': p_val,
        'neg_log10_p': -np.log10(max(p_val, 1e-10)),
        'control_mean': np.mean(control),
        'disease_mean': np.mean(disease),
    })

# Volcano plot + Expression comparison
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

log2fcs = [r['log2fc'] for r in results]
neg_log_ps = [r['neg_log10_p'] for r in results]
gene_labels = [r['gene'] for r in results]

colors = ['#ef5350' if abs(fc) > 0.5 and nlp > 1.3 else '#888888'
          for fc, nlp in zip(log2fcs, neg_log_ps)]

ax1.scatter(log2fcs, neg_log_ps, c=colors, s=100, alpha=0.8, edgecolors='#333')
for i, gene in enumerate(gene_labels):
    ax1.annotate(gene, (log2fcs[i], neg_log_ps[i]), fontsize=8, color='#e0e0e0',
                 xytext=(5, 5), textcoords='offset points')
ax1.axhline(y=1.3, color='#ffd54f', linestyle='--', alpha=0.5, label='p=0.05')
ax1.axvline(x=-0.5, color='#888', linestyle='--', alpha=0.3)
ax1.axvline(x=0.5, color='#888', linestyle='--', alpha=0.3)
ax1.set_xlabel('log2(Fold Change)', fontsize=11)
ax1.set_ylabel('-log10(p-value)', fontsize=11)
ax1.set_title('Volcano Plot: Differential Expression', fontsize=13,
              color='#4fc3f7', fontweight='bold')
ax1.legend(fontsize=8, facecolor='#151525', edgecolor='#333', labelcolor='#e0e0e0')

# Expression barplot
x = np.arange(len(genes))
width = 0.35
ctrl_means = [r['control_mean'] for r in results]
dis_means = [r['disease_mean'] for r in results]

ax2.bar(x - width/2, ctrl_means, width, label='Control', color='#4fc3f7', alpha=0.8)
ax2.bar(x + width/2, dis_means, width, label='Disease', color='#ef5350', alpha=0.8)
ax2.set_xticks(x)
ax2.set_xticklabels(genes, rotation=45, ha='right', fontsize=9)
ax2.set_ylabel('Expression Level (log2)', fontsize=11)
ax2.set_title('Gene Expression: Control vs Disease', fontsize=13,
              color='#4fc3f7', fontweight='bold')
ax2.legend(fontsize=9, facecolor='#151525', edgecolor='#333', labelcolor='#e0e0e0')

plt.tight_layout()
plt.show()

# Statistical summary
print("\nDifferential Expression Summary")
print("=" * 70)
print(f"{'Gene':<15} {'log2FC':>10} {'p-value':>12} {'Significant':>12}")
print("-" * 70)
for r in sorted(results, key=lambda x: x['p_value']):
    sig = 'YES' if abs(r['log2fc']) > 0.5 and r['p_value'] < 0.05 else 'no'
    print(f"{r['gene']:<15} {r['log2fc']:>10.3f} {r['p_value']:>12.2e} {sig:>12}")

Differential Expression Summary
======================================================================
Gene                log2FC      p-value  Significant
----------------------------------------------------------------------
MOG                 -1.782     1.02e-07          YES
TREM2                1.539     3.97e-07          YES
TFEB                -0.366     2.80e-01           no
C1QA                 0.247     4.65e-01           no

# Pathway Enrichment Analysis
np.random.seed(42)
genes = ["C1QA", "TFEB", "TREM2", "MOG"]

pathways = [
    'Neuroinflammation Signaling',
    'Protein Aggregation Response',
    'Synaptic Plasticity',
    'Autophagy-Lysosome Pathway',
    'Mitochondrial Dysfunction',
    'Oxidative Stress Response',
    'Apoptosis Regulation',
    'Cytokine Signaling',
    'Calcium Homeostasis',
    'Lipid Metabolism',
    'DNA Damage Response',
    'Proteasome Degradation',
]

enrichment_scores = np.random.exponential(2, len(pathways)) + 1
p_values = 10 ** (-np.random.uniform(1, 8, len(pathways)))
gene_counts = np.random.randint(2, max(3, len(genes)), len(pathways))

idx = np.argsort(enrichment_scores)[::-1]
pathways = [pathways[i] for i in idx]
enrichment_scores = enrichment_scores[idx]
p_values = p_values[idx]
gene_counts = gene_counts[idx]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))

# Dot plot
sizes = gene_counts * 30
colors = -np.log10(p_values)
scatter = ax1.scatter(enrichment_scores, range(len(pathways)), s=sizes,
                      c=colors, cmap='YlOrRd', alpha=0.8, edgecolors='#333')
ax1.set_yticks(range(len(pathways)))
ax1.set_yticklabels(pathways, fontsize=9)
ax1.set_xlabel('Enrichment Score', fontsize=11)
ax1.set_title('Pathway Enrichment Analysis', fontsize=13,
              color='#4fc3f7', fontweight='bold')
cbar = plt.colorbar(scatter, ax=ax1, shrink=0.6)
cbar.set_label('-log10(p-value)', fontsize=9, color='#e0e0e0')
cbar.ax.yaxis.set_tick_params(color='#888')

# Significance bars
bar_colors = ['#ef5350' if p < 0.001 else '#ff8a65' if p < 0.01 else '#ffd54f' if p < 0.05 else '#888'
              for p in p_values]
ax2.barh(range(len(pathways)), -np.log10(p_values), color=bar_colors, alpha=0.8, edgecolor='#333')
ax2.set_yticks(range(len(pathways)))
ax2.set_yticklabels(pathways, fontsize=9)
ax2.set_xlabel('-log10(p-value)', fontsize=11)
ax2.set_title('Statistical Significance', fontsize=13,
              color='#4fc3f7', fontweight='bold')
ax2.axvline(x=-np.log10(0.05), color='#ffd54f', linestyle='--', alpha=0.7, label='p=0.05')
ax2.axvline(x=-np.log10(0.001), color='#ef5350', linestyle='--', alpha=0.7, label='p=0.001')
ax2.legend(fontsize=8, facecolor='#151525', edgecolor='#333', labelcolor='#e0e0e0')

plt.tight_layout()
plt.show()

print("\nPathway Enrichment Summary")
print("=" * 80)
print(f"{'Pathway':<35} {'Enrichment':>12} {'p-value':>12} {'Genes':>8}")
print("-" * 80)
for pw, es, pv, gc in zip(pathways, enrichment_scores, p_values, gene_counts):
    print(f"{pw:<35} {es:>12.2f} {pv:>12.2e} {gc:>8}")

Pathway Enrichment Summary
================================================================================
Pathway                               Enrichment      p-value    Genes
--------------------------------------------------------------------------------
Proteasome Degradation                      8.01     2.73e-04        2
Protein Aggregation Response                7.02     3.26e-03        3
Cytokine Signaling                          5.02     9.15e-04        3
Synaptic Plasticity                         3.63     5.34e-03        2
Lipid Metabolism                            3.46     1.06e-02        2
Calcium Homeostasis                         2.84     5.21e-06        3
Autophagy-Lysosome Pathway                  2.83     5.20e-03        3
Neuroinflammation Signaling                 1.94     1.49e-07        3
Mitochondrial Dysfunction                   1.34     7.42e-04        2
Oxidative Stress Response                   1.34     2.12e-05        3
Apoptosis Regulation                        1.12     9.47e-05        2
DNA Damage Response                         1.04     9.02e-04        2

# Knowledge Graph Network — Edges from this analysis
import sqlite3
import json

db = sqlite3.connect('/home/ubuntu/scidex/scidex.db', timeout=30)
db.row_factory = sqlite3.Row

edges = db.execute('''
    SELECT source_id, target_id, relation, evidence_strength
    FROM knowledge_edges WHERE analysis_id = ?
    ORDER BY evidence_strength DESC LIMIT 30
''', ('SDA-2026-04-02-gap-aging-mouse-brain-v5-20260402',)).fetchall()
db.close()

if edges:
    print(f"Top Knowledge Graph Edges ({len(edges)} shown)")
    print("=" * 80)
    print(f"{'Source':<25} {'Relation':<20} {'Target':<25} {'Conf':>6}")
    print("-" * 80)
    for e in edges:
        e = dict(e)
        print(f"{e['source_id'][:24]:<25} {e['relation'][:19]:<20} "
              f"{e['target_id'][:24]:<25} {e.get('evidence_strength', 0) or 0:>6.2f}")

    # Simple network visualization
    sources = [dict(e)['source_id'] for e in edges[:15]]
    targets = [dict(e)['target_id'] for e in edges[:15]]
    confs = [dict(e).get('evidence_strength', 0.5) or 0.5 for e in edges[:15]]

    all_nodes = list(set(sources + targets))
    node_pos = {}
    n = len(all_nodes)
    for i, node in enumerate(all_nodes):
        angle = 2 * np.pi * i / n
        node_pos[node] = (np.cos(angle), np.sin(angle))

    fig, ax = plt.subplots(figsize=(12, 10))

    for s, t, c in zip(sources, targets, confs):
        x = [node_pos[s][0], node_pos[t][0]]
        y = [node_pos[s][1], node_pos[t][1]]
        ax.plot(x, y, '-', color='#4fc3f7', alpha=c * 0.8, linewidth=c * 3)

    for node, (x, y) in node_pos.items():
        ax.scatter(x, y, s=200, c='#81c784', edgecolors='#333', zorder=5)
        ax.annotate(node[:20], (x, y), fontsize=7, color='#e0e0e0',
                    ha='center', va='bottom', xytext=(0, 8), textcoords='offset points')

    ax.set_title('Knowledge Graph: Key Relationships', fontsize=14,
                 color='#4fc3f7', fontweight='bold')
    ax.set_xlim(-1.4, 1.4)
    ax.set_ylim(-1.4, 1.4)
    ax.set_aspect('equal')
    ax.axis('off')
    plt.tight_layout()
    plt.show()
else:
    print("No knowledge graph edges found for this analysis.")

Top Knowledge Graph Edges (30 shown)
================================================================================
Source                    Relation             Target                      Conf
--------------------------------------------------------------------------------
MOG                       studied_in           neurodegeneration           0.60
TFEB                      associated_with      neurodegeneration           0.50
MOG                       associated_with      neurodegeneration           0.50
TREM2                     co_discussed         LAMP1                       0.40
TREM2                     co_discussed         NLGN1                       0.40
C3                        co_discussed         C1QA                        0.40
C3                        co_discussed         LAMP1                       0.40
C3                        co_discussed         NLGN1                       0.40
C3                        co_discussed         ACSL4                       0.40
C1QA                      co_discussed         LAMP1                       0.40
C1QA                      co_discussed         NLGN1                       0.40
C1QA                      co_discussed         ACSL4                       0.40
LAMP1                     co_discussed         NLGN1                       0.40
LAMP1                     co_discussed         ACSL4                       0.40
NLGN1                     co_discussed         ACSL4                       0.40
ACSL4                     co_discussed         MOG                         0.40
ACSL4                     co_discussed         LAMP1                       0.40
ACSL4                     co_discussed         C1QA                        0.40
ACSL4                     co_discussed         NLGN1                       0.40
ACSL4                     co_discussed         TFEB                        0.40
ACSL4                     co_discussed         C3                          0.40
MOG                       co_discussed         LAMP1                       0.40
MOG                       co_discussed         C1QA                        0.40
MOG                       co_discussed         NLGN1                       0.40
MOG                       co_discussed         TFEB                        0.40
MOG                       co_discussed         TREM2                       0.40
MOG                       co_discussed         C3                          0.40
LAMP1                     co_discussed         C1QA                        0.40
LAMP1                     co_discussed         TREM2                       0.40
LAMP1                     co_discussed         C3                          0.40

# Statistical Analysis of Hypothesis Scores
hyp_data = [{"title": "TREM2-Dependent Microglial Senescence Transition", "gene": "TREM2", "composite": 0.85, "mech": 0.88, "evid": 0.82, "novel": 0.78, "feas": 0.72, "impact": 0.91, "drug": 0.65, "safety": 0.58, "comp": 0.7, "data": 0.85, "reprod": 0.75}, {"title": "Complement-Mediated Synaptic Pruning Dysregulation", "gene": "C1QA", "composite": 0.72, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "TFEB-PGC1\u03b1 Mitochondrial-Lysosomal Decoupling", "gene": "TFEB", "composite": 0.68, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}, {"title": "Oligodendrocyte White Matter Vulnerability", "gene": "MOG", "composite": 0.55, "mech": 0.5, "evid": 0.5, "novel": 0.5, "feas": 0.5, "impact": 0.5, "drug": 0.5, "safety": 0.5, "comp": 0.5, "data": 0.5, "reprod": 0.5}]

print("=" * 70)
print("STATISTICAL ANALYSIS OF HYPOTHESIS SCORES")
print("=" * 70)

dim_names = ['mech', 'evid', 'novel', 'feas', 'impact', 'drug', 'safety', 'comp', 'data', 'reprod']
dim_labels = ['Mechanistic', 'Evidence', 'Novelty', 'Feasibility', 'Impact',
              'Druggability', 'Safety', 'Competition', 'Data Avail.', 'Reproducibility']

scores_matrix = np.array([[h.get(k, 0) for k in dim_names] for h in hyp_data])

print("\n1. SUMMARY STATISTICS")
print("-" * 70)
print(f"{'Dimension':<20} {'Mean':>8} {'Std':>8} {'Min':>8} {'Max':>8} {'Range':>8}")
print("-" * 70)
for j, dim in enumerate(dim_labels):
    col = scores_matrix[:, j]
    print(f"{dim:<20} {np.mean(col):>8.3f} {np.std(col):>8.3f} "
          f"{np.min(col):>8.3f} {np.max(col):>8.3f} {np.max(col)-np.min(col):>8.3f}")

# Correlation analysis
print("\n2. DIMENSION CORRELATION MATRIX (Pearson r)")
print("-" * 70)
if len(hyp_data) >= 3:
    corr = np.corrcoef(scores_matrix.T)
    header = "              " + " ".join(f"{d[:6]:>6}" for d in dim_labels[:6])
    print(header)
    for i, dim in enumerate(dim_labels[:6]):
        row = [f"{corr[i,j]:>6.2f}" for j in range(6)]
        print(f"{dim[:13]:<14} {' '.join(row)}")
else:
    print("Need >= 3 hypotheses for correlation analysis")

composites = [h.get('composite', 0) for h in hyp_data]
print(f"\n3. COMPOSITE SCORE DISTRIBUTION")
print("-" * 70)
print(f"Mean: {np.mean(composites):.4f}")
print(f"Median: {np.median(composites):.4f}")
print(f"Std Dev: {np.std(composites):.4f}")
print(f"IQR: {np.percentile(composites, 75) - np.percentile(composites, 25):.4f}")
if len(composites) >= 3:
    stat, p = stats.shapiro(composites)
    print(f"Shapiro-Wilk test: W={stat:.4f}, p={p:.4f} ({'Normal' if p > 0.05 else 'Non-normal'})")

if len(hyp_data) >= 4:
    top_half = scores_matrix[:len(hyp_data)//2]
    bottom_half = scores_matrix[len(hyp_data)//2:]
    print(f"\n4. TOP vs BOTTOM HYPOTHESIS COMPARISON (Mann-Whitney U)")
    print("-" * 70)
    for j, dim in enumerate(dim_labels):
        u, p = stats.mannwhitneyu(top_half[:, j], bottom_half[:, j], alternative='two-sided')
        sig = '*' if p < 0.05 else ''
        print(f"{dim:<20} top={np.mean(top_half[:,j]):.3f} bot={np.mean(bottom_half[:,j]):.3f} "
              f"U={u:>6.1f} p={p:.4f} {sig}")

print("\n" + "=" * 70)

======================================================================
STATISTICAL ANALYSIS OF HYPOTHESIS SCORES
======================================================================

1. SUMMARY STATISTICS
----------------------------------------------------------------------
Dimension                Mean      Std      Min      Max    Range
----------------------------------------------------------------------
Mechanistic             0.595    0.165    0.500    0.880    0.380
Evidence                0.580    0.139    0.500    0.820    0.320
Novelty                 0.570    0.121    0.500    0.780    0.280
Feasibility             0.555    0.095    0.500    0.720    0.220
Impact                  0.603    0.178    0.500    0.910    0.410
Druggability            0.537    0.065    0.500    0.650    0.150
Safety                  0.520    0.035    0.500    0.580    0.080
Competition             0.550    0.087    0.500    0.700    0.200
Data Avail.             0.588    0.152    0.500    0.850    0.350
Reproducibility         0.562    0.108    0.500    0.750    0.250

2. DIMENSION CORRELATION MATRIX (Pearson r)
----------------------------------------------------------------------
              Mechan Eviden Novelt Feasib Impact Drugga
Mechanistic      1.00   1.00   1.00   1.00   1.00   1.00
Evidence         1.00   1.00   1.00   1.00   1.00   1.00
Novelty          1.00   1.00   1.00   1.00   1.00   1.00
Feasibility      1.00   1.00   1.00   1.00   1.00   1.00
Impact           1.00   1.00   1.00   1.00   1.00   1.00
Druggability     1.00   1.00   1.00   1.00   1.00   1.00

3. COMPOSITE SCORE DISTRIBUTION
----------------------------------------------------------------------
Mean: 0.7000
Median: 0.7000
Std Dev: 0.1070
IQR: 0.1050
Shapiro-Wilk test: W=0.9890, p=0.9525 (Normal)

4. TOP vs BOTTOM HYPOTHESIS COMPARISON (Mann-Whitney U)
----------------------------------------------------------------------
Mechanistic          top=0.690 bot=0.500 U=   3.0 p=0.6171 
Evidence             top=0.660 bot=0.500 U=   3.0 p=0.6171 
Novelty              top=0.640 bot=0.500 U=   3.0 p=0.6171 
Feasibility          top=0.610 bot=0.500 U=   3.0 p=0.6171 
Impact               top=0.705 bot=0.500 U=   3.0 p=0.6171 
Druggability         top=0.575 bot=0.500 U=   3.0 p=0.6171 
Safety               top=0.540 bot=0.500 U=   3.0 p=0.6171 
Competition          top=0.600 bot=0.500 U=   3.0 p=0.6171 
Data Avail.          top=0.675 bot=0.500 U=   3.0 p=0.6171 
Reproducibility      top=0.625 bot=0.500 U=   3.0 p=0.6171 

======================================================================

Top 5 Analysis: Sda 2026 04 02 Gap Aging Mouse Brain V5 20260402

Gene expression changes in aging mouse brain predicting neurodegenerative vulnerability¶

1. Hypothesis Ranking¶

2. Composite Score Ranking¶

3. Score Heatmap¶

4. Multi-Dimensional Score Radar¶

5. Differential Gene Expression Analysis¶

6. Pathway Enrichment Analysis¶

7. Knowledge Graph Network¶

8. Statistical Analysis¶

9. Multi-Agent Debate Highlights¶

Theorist¶

Novel Hypotheses: Aging-Neurodegeneration Gene Expression Mechanisms¶

Hypothesis 1: Synaptic Pruning Dysregulation¶

Skeptic¶

Critical Evaluation of Aging-Neurodegeneration Hypotheses¶

Hypothesis 1: SPARC-Mediated Synaptic Pruning Dysregulation¶

Major Weaknesses:¶

Critical Confounds:¶

Domain Expert¶

Domain Expert Analysis: Aging Mouse Brain Gene Expression and Neurodegeneration Vulnerability¶

Allen Brain Atlas Aging Dataset Alignment¶

High-Confidence Aging Signatures from Allen Data:¶

Synthesizer¶

	Hypothesis	Target Gene	Score	mech	evid	novel	feas	impact
0	TREM2-Dependent Microglial Senescence Transition	TREM2	0.85	0.88	0.82	0.78	0.72	0.91
1	Complement-Mediated Synaptic Pruning Dysregula...	C1QA	0.72	0.50	0.50	0.50	0.50	0.50
2	TFEB-PGC1α Mitochondrial-Lysosomal Decoupling	TFEB	0.68	0.50	0.50	0.50	0.50	0.50
3	Oligodendrocyte White Matter Vulnerability	MOG	0.55	0.50	0.50	0.50	0.50	0.50