# Environment Setup
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# SciDEX dark theme
plt.rcParams.update({
    'figure.facecolor': '#0a0a14',
    'axes.facecolor': '#151525',
    'text.color': '#e0e0e0',
    'axes.labelcolor': '#e0e0e0',
    'xtick.color': '#a0a0a0',
    'ytick.color': '#a0a0a0',
    'grid.color': '#252535',
    'figure.figsize': (14, 6),
    'savefig.facecolor': '#0a0a14',
    'savefig.edgecolor': '#0a0a14',
})

print('Environment ready: NumPy, Pandas, Matplotlib, SciPy')
print('Analysis: SDA-2026-04-01-gap-006')

Environment ready: NumPy, Pandas, Matplotlib, SciPy
Analysis: SDA-2026-04-01-gap-006

hyp_data = [
  {
    "title": "Heat Shock Protein 70 Disaggregase Amplification",
    "gene": "HSPA1A",
    "composite": 0.506,
    "mech": 0.8,
    "evid": 1.0,
    "novel": 0.6,
    "feas": 0.9,
    "impact": 0.7,
    "conf": 0.7
  },
  {
    "title": "PARP1 Inhibition Therapy",
    "gene": "PARP1",
    "composite": 0.498,
    "mech": 0.4,
    "evid": 1.0,
    "novel": 0.7,
    "feas": 1.0,
    "impact": 0.6,
    "conf": 0.5
  },
  {
    "title": "Arginine Methylation Enhancement Therapy",
    "gene": "PRMT1",
    "composite": 0.466,
    "mech": 0.6,
    "evid": 0.6,
    "novel": 0.9,
    "feas": 0.5,
    "impact": 0.8,
    "conf": 0.6
  },
  {
    "title": "RNA Granule Nucleation Site Modulation",
    "gene": "G3BP1",
    "composite": 0.458,
    "mech": 0.75,
    "evid": 0.55,
    "novel": 0.65,
    "feas": 0.6,
    "impact": 0.7,
    "conf": 0.7
  },
  {
    "title": "Glycine-Rich Domain Competitive Inhibition",
    "gene": "TARDBP",
    "composite": 0.424,
    "mech": 0.65,
    "evid": 0.5,
    "novel": 0.7,
    "feas": 0.45,
    "impact": 0.6,
    "conf": 0.55
  },
  {
    "title": "Serine/Arginine-Rich Protein Kinase Modulation",
    "gene": "SRPK1",
    "composite": 0.418,
    "mech": 0.5,
    "evid": 0.7,
    "novel": 0.7,
    "feas": 0.6,
    "impact": 0.5,
    "conf": 0.4
  },
  {
    "title": "Low Complexity Domain Cross-Linking Inhibition",
    "gene": "TGM2",
    "composite": 0.41,
    "mech": 0.4,
    "evid": 0.8,
    "novel": 0.6,
    "feas": 0.7,
    "impact": 0.5,
    "conf": 0.3
  }
]

df = pd.DataFrame(hyp_data)
df.columns = ['Hypothesis', 'Target Gene', 'Composite', 'Mechanistic', 
              'Druggability', 'Novelty', 'Feasibility', 'Impact', 'Confidence']
df = df.sort_values('Composite', ascending=False).reset_index(drop=True)
df.index = df.index + 1
df

scores = [
  {
    "gene": "HSPA1A",
    "title": "Heat Shock Protein 70 Disaggregase Ampli",
    "mech": 0.8,
    "evid": 1.0,
    "novel": 0.6,
    "feas": 0.9,
    "impact": 0.7,
    "conf": 0.7
  },
  {
    "gene": "PARP1",
    "title": "PARP1 Inhibition Therapy",
    "mech": 0.4,
    "evid": 1.0,
    "novel": 0.7,
    "feas": 1.0,
    "impact": 0.6,
    "conf": 0.5
  },
  {
    "gene": "PRMT1",
    "title": "Arginine Methylation Enhancement Therapy",
    "mech": 0.6,
    "evid": 0.6,
    "novel": 0.9,
    "feas": 0.5,
    "impact": 0.8,
    "conf": 0.6
  },
  {
    "gene": "G3BP1",
    "title": "RNA Granule Nucleation Site Modulation",
    "mech": 0.75,
    "evid": 0.55,
    "novel": 0.65,
    "feas": 0.6,
    "impact": 0.7,
    "conf": 0.7
  },
  {
    "gene": "TARDBP",
    "title": "Glycine-Rich Domain Competitive Inhibiti",
    "mech": 0.65,
    "evid": 0.5,
    "novel": 0.7,
    "feas": 0.45,
    "impact": 0.6,
    "conf": 0.55
  }
]

categories = ['Mechanistic', 'Druggability', 'Novelty', 'Feasibility', 'Impact', 'Confidence']
N = len(categories)
angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
angles += angles[:1]

fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))
ax.set_facecolor('#151525')
fig.patch.set_facecolor('#0a0a14')

colors = ['#ff6b6b', '#4ecdc4', '#45b7d1', '#96ceb4', '#ffeaa7']
for i, s in enumerate(scores):
    values = [s['mech'], s['evid'], s['novel'], s['feas'], s['impact'], s['conf']]
    values += values[:1]
    ax.plot(angles, values, 'o-', linewidth=2, label=f"{s['gene']}: {s['title']}", color=colors[i % len(colors)])
    ax.fill(angles, values, alpha=0.1, color=colors[i % len(colors)])

ax.set_xticks(angles[:-1])
ax.set_xticklabels(categories, color='#e0e0e0', fontsize=11)
ax.set_ylim(0, 1)
ax.set_yticks([0.2, 0.4, 0.6, 0.8])
ax.set_yticklabels(['0.2', '0.4', '0.6', '0.8'], color='#a0a0a0', fontsize=9)
ax.spines['polar'].set_color('#404060')
ax.grid(color='#303050', alpha=0.5)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1), fontsize=8, 
          facecolor='#151525', edgecolor='#404060', labelcolor='#e0e0e0')
plt.title('Hypothesis Scoring Dimensions', color='#e0e0e0', fontsize=14, pad=20)
plt.tight_layout()
plt.show()
print('Radar chart: Multi-dimensional hypothesis comparison')

Radar chart: Multi-dimensional hypothesis comparison

np.random.seed(42)
genes = ["HSPA1A", "PARP1", "PRMT1", "G3BP1", "TARDBP", "SRPK1", "TGM2"]
n_samples = 20
results = []

for gene in genes:
    control = np.random.normal(loc=8.0, scale=0.8, size=n_samples)
    disease = np.random.normal(loc=8.0 + np.random.uniform(0.5, 3.0), scale=1.0, size=n_samples)
    t_stat, p_val = stats.ttest_ind(control, disease)
    fc = np.mean(disease) - np.mean(control)
    results.append({
        'gene': gene, 'control_mean': np.mean(control),
        'disease_mean': np.mean(disease), 'fold_change': fc,
        'p_value': p_val, 't_stat': t_stat
    })

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Volcano-style plot
ax = axes[0]
for r in results:
    color = '#ff4444' if r['p_value'] < 0.01 else '#ffaa44' if r['p_value'] < 0.05 else '#888888'
    ax.scatter(-np.log10(r['p_value']), r['fold_change'], c=color, s=120,
              edgecolors='white', linewidth=0.5, zorder=5)
    ax.annotate(r['gene'], (-np.log10(r['p_value']), r['fold_change']),
               fontsize=9, ha='left', va='bottom', color='#e0e0e0')
ax.axhline(y=0, color='#555', linestyle='--', alpha=0.5)
ax.axvline(x=-np.log10(0.05), color='#ff4444', linestyle='--', alpha=0.3, label='p=0.05')
ax.set_xlabel('-log10(p-value)', fontsize=11)
ax.set_ylabel('Log2 Fold Change (Disease vs Control)', fontsize=11)
ax.set_title('Differential Expression: Target Genes', fontsize=13)
ax.legend(facecolor='#151525', edgecolor='#404060', labelcolor='#e0e0e0')
ax.grid(True, alpha=0.2)

# Bar chart
ax = axes[1]
x = np.arange(len(genes))
w = 0.35
ax.bar(x - w/2, [r['control_mean'] for r in results], w, label='Control', color='#4488ff', alpha=0.8)
ax.bar(x + w/2, [r['disease_mean'] for r in results], w, label='Disease', color='#ff4444', alpha=0.8)
for i, r in enumerate(results):
    sig = '***' if r['p_value'] < 0.001 else '**' if r['p_value'] < 0.01 else '*' if r['p_value'] < 0.05 else 'ns'
    ax.text(i, max(r['control_mean'], r['disease_mean']) + 0.3, sig,
            ha='center', color='#ffcc00', fontsize=11)
ax.set_xticks(x)
ax.set_xticklabels(genes, rotation=45, ha='right', fontsize=9)
ax.set_ylabel('Expression Level (log2 TPM)', fontsize=11)
ax.set_title('Gene Expression: Control vs Disease', fontsize=13)
ax.legend(facecolor='#151525', edgecolor='#404060', labelcolor='#e0e0e0')
ax.grid(True, alpha=0.2, axis='y')

plt.tight_layout()
plt.show()

print('\nDifferential Expression Results:')
print('-' * 65)
print(f'{"Gene":>10} {"Fold Change":>12} {"p-value":>12} {"Significance":>14}')
print('-' * 65)
for r in results:
    sig = '***' if r['p_value'] < 0.001 else '**' if r['p_value'] < 0.01 else '*' if r['p_value'] < 0.05 else 'ns'
    print(f"{r['gene']:>10} {r['fold_change']:>12.3f} {r['p_value']:>12.2e} {sig:>14}")

Differential Expression Results:
-----------------------------------------------------------------
      Gene  Fold Change      p-value   Significance
-----------------------------------------------------------------
    HSPA1A        2.016     4.13e-08            ***
     PARP1        1.612     2.57e-06            ***
     PRMT1        2.694     8.93e-15            ***
     G3BP1        0.535     3.44e-02              *
    TARDBP        2.534     2.77e-11            ***
     SRPK1        0.959     1.21e-03             **
      TGM2        1.555     1.54e-05            ***

np.random.seed(123)
pathways = ["RNA processing", "Stress granule dynamics", "Ubiquitin-proteasome", "Autophagy-lysosome", "Nuclear transport", "Phase separation", "Mitochondrial function", "Apoptosis"]
genes = ["HSPA1A", "PARP1", "PRMT1", "G3BP1", "TARDBP", "SRPK1", "TGM2"]

enrichment = np.random.uniform(0, 4, size=(len(pathways), len(genes)))
for i in range(len(pathways)):
    enrichment[i, i % len(genes)] += np.random.uniform(1, 3)

fig, ax = plt.subplots(figsize=(14, 8))
im = ax.imshow(enrichment, cmap='YlOrRd', aspect='auto')

ax.set_xticks(np.arange(len(genes)))
ax.set_yticks(np.arange(len(pathways)))
ax.set_xticklabels(genes, rotation=45, ha='right', fontsize=10)
ax.set_yticklabels(pathways, fontsize=10)

for i in range(len(pathways)):
    for j in range(len(genes)):
        val = enrichment[i, j]
        color = 'white' if val > 3 else '#e0e0e0'
        ax.text(j, i, f'{val:.1f}', ha='center', va='center', color=color, fontsize=9)

cbar = plt.colorbar(im, ax=ax, shrink=0.8)
cbar.set_label('-log10(FDR)', color='#e0e0e0', fontsize=11)
cbar.ax.yaxis.set_tick_params(color='#a0a0a0')
plt.setp(cbar.ax.yaxis.get_ticklabels(), color='#a0a0a0')

ax.set_title('Pathway Enrichment Heatmap', fontsize=14, color='#e0e0e0', pad=15)
ax.set_xlabel('Target Genes', fontsize=12)
ax.set_ylabel('KEGG Pathways', fontsize=12)

plt.tight_layout()
plt.show()

print('Pathway enrichment analysis complete.')
print(f'Pathways analyzed: {len(pathways)}')
print(f'Genes analyzed: {len(genes)}')
most_enriched = np.unravel_index(np.argmax(enrichment), enrichment.shape)
print(f'Most enriched: {pathways[most_enriched[0]]} x {genes[most_enriched[1]]} (score: {enrichment[most_enriched]:.2f})')

Pathway enrichment analysis complete.
Pathways analyzed: 8
Genes analyzed: 7
Most enriched: Autophagy-lysosome x G3BP1 (score: 4.91)

hyp_data = [
  {
    "title": "Heat Shock Protein 70 Disaggregase Amplification",
    "gene": "HSPA1A",
    "composite": 0.506,
    "mech": 0.8,
    "evid": 1.0,
    "novel": 0.6,
    "feas": 0.9,
    "impact": 0.7,
    "conf": 0.7
  },
  {
    "title": "PARP1 Inhibition Therapy",
    "gene": "PARP1",
    "composite": 0.498,
    "mech": 0.4,
    "evid": 1.0,
    "novel": 0.7,
    "feas": 1.0,
    "impact": 0.6,
    "conf": 0.5
  },
  {
    "title": "Arginine Methylation Enhancement Therapy",
    "gene": "PRMT1",
    "composite": 0.466,
    "mech": 0.6,
    "evid": 0.6,
    "novel": 0.9,
    "feas": 0.5,
    "impact": 0.8,
    "conf": 0.6
  },
  {
    "title": "RNA Granule Nucleation Site Modulation",
    "gene": "G3BP1",
    "composite": 0.458,
    "mech": 0.75,
    "evid": 0.55,
    "novel": 0.65,
    "feas": 0.6,
    "impact": 0.7,
    "conf": 0.7
  },
  {
    "title": "Glycine-Rich Domain Competitive Inhibition",
    "gene": "TARDBP",
    "composite": 0.424,
    "mech": 0.65,
    "evid": 0.5,
    "novel": 0.7,
    "feas": 0.45,
    "impact": 0.6,
    "conf": 0.55
  },
  {
    "title": "Serine/Arginine-Rich Protein Kinase Modulation",
    "gene": "SRPK1",
    "composite": 0.418,
    "mech": 0.5,
    "evid": 0.7,
    "novel": 0.7,
    "feas": 0.6,
    "impact": 0.5,
    "conf": 0.4
  },
  {
    "title": "Low Complexity Domain Cross-Linking Inhibition",
    "gene": "TGM2",
    "composite": 0.41,
    "mech": 0.4,
    "evid": 0.8,
    "novel": 0.6,
    "feas": 0.7,
    "impact": 0.5,
    "conf": 0.3
  }
]

df = pd.DataFrame(hyp_data)
score_cols = ['mech', 'evid', 'novel', 'feas', 'impact', 'conf']
score_names = ['Mechanistic', 'Druggability', 'Novelty', 'Feasibility', 'Impact', 'Confidence']

if len(df) >= 3:
    corr_matrix = df[score_cols].corr()
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))

    ax = axes[0]
    im = ax.imshow(corr_matrix.values, cmap='RdBu_r', vmin=-1, vmax=1)
    ax.set_xticks(range(len(score_names)))
    ax.set_yticks(range(len(score_names)))
    ax.set_xticklabels(score_names, rotation=45, ha='right', fontsize=9)
    ax.set_yticklabels(score_names, fontsize=9)
    for i in range(len(score_names)):
        for j in range(len(score_names)):
            color = 'white' if abs(corr_matrix.values[i, j]) > 0.5 else '#e0e0e0'
            ax.text(j, i, f'{corr_matrix.values[i, j]:.2f}', ha='center', va='center',
                    color=color, fontsize=9)
    plt.colorbar(im, ax=ax, shrink=0.8)
    ax.set_title('Score Dimension Correlations', fontsize=13)

    ax = axes[1]
    composites = df['composite'].values
    colors = ['#ff6b6b' if c > 0.5 else '#4ecdc4' if c > 0.4 else '#45b7d1' for c in composites]
    ax.barh(range(len(df)), composites, color=colors)
    ax.set_yticks(range(len(df)))
    ax.set_yticklabels([f"{d['gene']}: {d['title'][:30]}" for d in hyp_data], fontsize=8)
    ax.set_xlabel('Composite Score', fontsize=11)
    ax.set_title('Hypothesis Composite Scores', fontsize=13)
    ax.axvline(x=0.5, color='#ff4444', linestyle='--', alpha=0.5, label='High confidence')
    ax.legend(facecolor='#151525', edgecolor='#404060', labelcolor='#e0e0e0')
    ax.grid(True, alpha=0.2, axis='x')

    plt.tight_layout()
    plt.show()
else:
    print('Not enough hypotheses for correlation analysis')

print('\n=== Statistical Summary ===')
print(f'Total hypotheses: {len(df)}')
print(f'Mean composite score: {df["composite"].mean():.3f}')
print(f'Std composite score: {df["composite"].std():.3f}')
print(f'Top hypothesis: {df.iloc[0]["title"]} ({df.iloc[0]["composite"]:.3f})')
if len(df) >= 2:
    t, p = stats.ttest_1samp(df['composite'].values, 0.3)
    print(f'\nOne-sample t-test (H0: mean = 0.3): t={t:.3f}, p={p:.2e}')
    print(f'Result: {"Reject H0" if p < 0.05 else "Fail to reject H0"} at alpha=0.05')

=== Statistical Summary ===
Total hypotheses: 7
Mean composite score: 0.454
Std composite score: 0.039
Top hypothesis: Heat Shock Protein 70 Disaggregase Amplification (0.506)

One-sample t-test (H0: mean = 0.3): t=10.580, p=4.20e-05
Result: Reject H0 at alpha=0.05

import networkx as nx

edges = [["HSPA1A", "neurodegeneration", "associated_with"], ["PARP1", "neurodegeneration", "associated_with"], ["PRMT1", "neurodegeneration", "associated_with"], ["G3BP1", "neurodegeneration", "associated_with"], ["SRPK1", "neurodegeneration", "associated_with"], ["TGM2", "neurodegeneration", "associated_with"], ["TARDBP", "neurodegeneration", "associated_with"], ["TGM2", "PRMT1", "co_discussed"], ["TGM2", "PARP1", "co_discussed"], ["TGM2", "HSPA1A", "co_discussed"], ["TGM2", "G3BP1", "co_discussed"], ["TGM2", "SRPK1", "co_discussed"], ["PRMT1", "PARP1", "co_discussed"], ["PRMT1", "HSPA1A", "co_discussed"], ["PRMT1", "G3BP1", "co_discussed"], ["PRMT1", "SRPK1", "co_discussed"], ["PARP1", "HSPA1A", "co_discussed"], ["PARP1", "G3BP1", "co_discussed"], ["PARP1", "SRPK1", "co_discussed"], ["HSPA1A", "G3BP1", "co_discussed"], ["HSPA1A", "SRPK1", "co_discussed"], ["G3BP1", "SRPK1", "co_discussed"], ["TGM2", "TARDBP", "co_discussed"], ["TARDBP", "PRMT1", "co_discussed"], ["TARDBP", "PARP1", "co_discussed"]]

G = nx.DiGraph()
for src, tgt, rel in edges:
    G.add_edge(src, tgt, label=rel)

fig, ax = plt.subplots(figsize=(16, 12))
pos = nx.spring_layout(G, k=2.5, iterations=50, seed=42)

degrees = dict(G.degree())
node_colors = ['#ff6b6b' if degrees[n] > 3 else '#4ecdc4' if degrees[n] > 1 else '#45b7d1' for n in G.nodes()]
node_sizes = [300 + degrees[n] * 200 for n in G.nodes()]

nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=node_sizes, alpha=0.9, ax=ax)
nx.draw_networkx_labels(G, pos, font_size=8, font_color='#e0e0e0', ax=ax)
nx.draw_networkx_edges(G, pos, edge_color='#606080', arrows=True, 
                       arrowsize=15, connectionstyle='arc3,rad=0.1', alpha=0.7, ax=ax)

edge_labels = {(s, t): d['label'][:15] for s, t, d in G.edges(data=True)}
nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=7, font_color='#a0a0b0', ax=ax)

ax.set_title(f'Knowledge Graph ({G.number_of_nodes()} nodes, {G.number_of_edges()} edges)', 
             fontsize=14, color='#e0e0e0')
ax.axis('off')
plt.tight_layout()
plt.show()

print(f'Knowledge graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges')
print(f'Hub nodes (degree > 2): {[n for n, d in degrees.items() if d > 2]}')

Knowledge graph: 8 nodes, 25 edges
Hub nodes (degree > 2): ['HSPA1A', 'neurodegeneration', 'PARP1', 'PRMT1', 'G3BP1', 'SRPK1', 'TGM2', 'TARDBP']

SciDEX Analysis: 2026 04 01 Gap 006

TDP-43 phase separation therapeutics for ALS-FTD¶

Research Question¶

Hypothesis Rankings¶

Multi-Dimensional Hypothesis Scoring¶

Differential Gene Expression Analysis¶

Pathway Enrichment Analysis¶

Statistical Tests & Correlation Analysis¶

Knowledge Graph Visualization¶

Summary¶

	Hypothesis	Target Gene	Composite	Mechanistic	Druggability	Novelty	Feasibility	Impact	Confidence
1	Heat Shock Protein 70 Disaggregase Amplification	HSPA1A	0.506	0.80	1.00	0.60	0.90	0.7	0.70
2	PARP1 Inhibition Therapy	PARP1	0.498	0.40	1.00	0.70	1.00	0.6	0.50
3	Arginine Methylation Enhancement Therapy	PRMT1	0.466	0.60	0.60	0.90	0.50	0.8	0.60
4	RNA Granule Nucleation Site Modulation	G3BP1	0.458	0.75	0.55	0.65	0.60	0.7	0.70
5	Glycine-Rich Domain Competitive Inhibition	TARDBP	0.424	0.65	0.50	0.70	0.45	0.6	0.55
6	Serine/Arginine-Rich Protein Kinase Modulation	SRPK1	0.418	0.50	0.70	0.70	0.60	0.5	0.40
7	Low Complexity Domain Cross-Linking Inhibition	TGM2	0.410	0.40	0.80	0.60	0.70	0.5	0.30