# Dependency verification and installation
import subprocess
import sys

def install_and_import(package, import_name=None):
    import_name = import_name or package
    try:
        __import__(import_name)
        print(f"OK {package} available")
    except ImportError:
        print(f"Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
        print(f"OK {package} installed")

install_and_import("numpy")
install_and_import("pandas")
install_and_import("scipy")
install_and_import("matplotlib")
install_and_import("seaborn")
install_and_import("scanpy")
install_and_import("anndata")
install_and_import("gseapy")
install_and_import("allensdk")
install_and_import("requests")
print("OK All dependencies ready")

OK numpy available

OK pandas available
OK scipy available
OK matplotlib available

OK seaborn available
Installing scanpy...

ERROR: Could not install packages due to an OSError: [Errno 30] Read-only file system: '/home/ubuntu/.local/bin/scanpy'

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 12, in install_and_import(package, import_name)
     10     except ImportError:
     11         print(f"Installing {package}...")
---> 12         subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
     13         print(f"OK {package} installed")

ModuleNotFoundError: No module named 'scanpy'

During handling of the above exception, another exception occurred:

CalledProcessError                        Traceback (most recent call last)
Cell In[1], line 20
     16 install_and_import("pandas")
     17 install_and_import("scipy")
     18 install_and_import("matplotlib")
     19 install_and_import("seaborn")
---> 20 install_and_import("scanpy")
     21 install_and_import("anndata")
     22 install_and_import("gseapy")
     23 install_and_import("allensdk")

Cell In[1], line 12, in install_and_import(package, import_name)
      8         __import__(import_name)
      9         print(f"OK {package} available")
     10     except ImportError:
     11         print(f"Installing {package}...")
---> 12         subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
     13         print(f"OK {package} installed")

File ~/miniconda3/lib/python3.13/subprocess.py:419, in check_call(*popenargs, **kwargs)
    417     if cmd is None:
    418         cmd = popenargs[0]
--> 419     raise CalledProcessError(retcode, cmd)
    420 return 0

CalledProcessError: Command '['/home/ubuntu/miniconda3/bin/python3', '-m', 'pip', 'install', '-q', 'scanpy']' returned non-zero exit status 1.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import gseapy as gp
import requests
import json
import os
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Optional, Tuple

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['figure.dpi'] = 100

SCIDEX_API = os.environ.get('SCIDEX_API', 'http://localhost:8000')
DB_PATH = os.environ.get('SCIDEX_DB', 'postgresql://scidex')

print(f"OK Imports successful. SciDEX API: {SCIDEX_API}")

ANALYSIS_ID = "{{ANALYSIS_ID}}"
DATASET_ID = "allen_mouse_brain"
AUTHOR = "SciDEX Agent"
TIMESTAMP = datetime.now().isoformat()

AD_GENES = ['MAPT', 'APP', 'APOE', 'SNCA', 'PSEN1', 'PSEN2', 'TREM2', 'GRN', 'LRRK2', 'GBAP1']
MICROGLIA_MARKERS = ['TYROBP', 'CSF1R', 'CX3CR1', 'P2RY12', 'TMEM119', 'HEXB']
NEURON_MARKERS = ['RBFOX3', 'MAP2', 'SYP', 'SNAP25', 'NCAM1']

CACHE_DIR = Path("allen_data_cache")
CACHE_DIR.mkdir(exist_ok=True)

print(f"Analysis ID: {ANALYSIS_ID}")
print(f"Dataset: {DATASET_ID}")
print(f"Timestamp: {TIMESTAMP}")

def fetch_allen_data(dataset_id: str, gene_list: List[str],
                     structure_ids: Optional[List[int]] = None,
                     cache_dir: Path = CACHE_DIR) -> pd.DataFrame:
    """Fetch gene expression data from Allen Brain Atlas."""
    cache_file = cache_dir / f"{dataset_id}_{'_'.join(gene_list[:3])}.parquet"
    
    if cache_file.exists():
        print(f"OK Loading from cache: {cache_file.name}")
        return pd.read_parquet(cache_file)
    
    print(f"Fetching {len(gene_list)} genes from Allen Brain Atlas...")
    
    try:
        from allensdk.core.mouse_connectivity_cache import MouseConnectivityCache
        from allensdk.api.queries.mouse_connectivity_api import MouseConnectivityApi
        
        mcc = MouseConnectivityCache(manifest_file=str(cache_dir / 'manifest.json'))
        mca = MouseConnectivityApi()
        
        results = []
        for gene in gene_list:
            try:
                experiments = mca.get_injection_experiments([gene])
                for exp in experiments[:3]:
                    exp_id = exp['id']
                    structure_unionizes = mca.get_structure_unionizes(
                        [exp_id], is_injection=False, structure_ids=structure_ids
                    )
                    for su in structure_unionizes:
                        results.append({
                            'gene': gene,
                            'experiment_id': exp_id,
                            'structure_id': su['structure_id'],
                            'structure_name': su.get('structure_name', ''),
                            'expression_density': su.get('expression_density', 0),
                            'expression_energy': su.get('expression_energy', 0)
                        })
            except Exception as e:
                print(f"  Error for {gene}: {e}")
        
        df = pd.DataFrame(results)
        
    except Exception as e:
        print(f"AllenSDK unavailable ({e}), using demo data")
        np.random.seed(42)
        n_samples = 100
        n_genes = len(gene_list)
        df = pd.DataFrame(
            np.random.rand(n_samples, n_genes) * 100,
            columns=gene_list
        )
        df['sample_id'] = [f"sample_{i}" for i in range(n_samples)]
        df['cell_type'] = np.random.choice(['neuron', 'microglia', 'astrocyte'], n_samples)
        df['region'] = np.random.choice(['cortex', 'hippocampus', 'cerebellum'], n_samples)
    
    if not df.empty:
        df.to_parquet(cache_file)
        print(f"OK Cached to {cache_file.name}")
    
    return df

print("OK fetch_allen_data defined")

def differential_expression(counts_matrix: pd.DataFrame,
                             metadata: pd.DataFrame,
                             group_col: str,
                             group1: str,
                             group2: str,
                             method: str = 'scanpy') -> pd.DataFrame:
    """Perform differential expression analysis between two groups."""
    from scipy import stats
    
    common_idx = counts_matrix.index.intersection(metadata.index)
    counts_aligned = counts_matrix.loc[common_idx]
    metadata_aligned = metadata.loc[common_idx]
    
    g1_mask = metadata_aligned[group_col] == group1
    g2_mask = metadata_aligned[group_col] == group2
    
    g1_data = counts_aligned.loc[g1_mask]
    g2_data = counts_aligned.loc[g2_mask]
    
    results = []
    
    if method == 'scanpy' and len(g1_data) >= 3 and len(g2_data) >= 3:
        try:
            adata = sc.AnnData(X=counts_aligned.values,
                               obs=metadata_aligned.reset_index(drop=True),
                               var=pd.DataFrame(index=counts_aligned.columns))
            
            sc.tl.rank_genes_groups(adata, groupby=group_col, groups=[group1],
                                   reference=group2, method='t-test')
            
            for i, gene in enumerate(adata.var_names):
                scores = adata.uns['rank_genes_groups']['scores'][group1]
                pvals = adata.uns['rank_genes_groups']['pvals'][group1]
                pvals_adj = adata.uns['rank_genes_groups']['pvals_adj'][group1]
                logg = adata.uns['rank_genes_groups']['logfoldchanges'][group1]
                
                results.append({
                    'gene': gene,
                    'log2_fold_change': logg[i],
                    'p_value': pvals[i],
                    'adjusted_p_value': pvals_adj[i],
                    'score': scores[i]
                })
            
            print(f"OK Scanpy DE: {len(results)} genes tested")
            
        except Exception as e:
            print(f"Scanpy failed ({e}), falling back to scipy")
            method = 'scipy'
    
    if method == 'scipy' or len(results) == 0:
        for gene in counts_aligned.columns:
            g1_vals = g1_data[gene].dropna()
            g2_vals = g2_data[gene].dropna()
            
            if len(g1_vals) >= 2 and len(g2_vals) >= 2:
                t_stat, p_val = stats.ttest_ind(g1_vals, g2_vals)
                mean1, mean2 = g1_vals.mean(), g2_vals.mean()
                fc = mean2 / mean1 if mean1 != 0 else 0
                log2_fc = np.log2(fc) if fc > 0 else 0
                
                results.append({
                    'gene': gene,
                    'log2_fold_change': log2_fc,
                    'p_value': p_val,
                    'adjusted_p_value': p_val * len(counts_aligned.columns),
                    'mean_group1': mean1,
                    'mean_group2': mean2
                })
        
        print(f"OK Scipy DE: {len(results)} genes tested")
    
    return pd.DataFrame(results).sort_values('p_value')

print("OK differential_expression defined")

def plot_heatmap(expr_data: pd.DataFrame,
                  genes: List[str],
                  cell_types: Optional[List[str]] = None,
                  title: str = "Gene Expression Heatmap",
                  figsize: Tuple[int, int] = (12, 8),
                  cmap: str = 'viridis'):
    """Plot a heatmap of gene expression across cell types or samples."""
    available_genes = [g for g in genes if g in expr_data.columns]
    if not available_genes:
        print(f"Warning: None of {genes} found in data")
        return None
    
    plot_data = expr_data[available_genes]
    plot_data_z = (plot_data - plot_data.mean()) / (plot_data.std() + 1e-8)
    
    fig, ax = plt.subplots(figsize=figsize)
    im = ax.imshow(plot_data_z.values[:50].T, aspect='auto', cmap=cmap)
    
    ax.set_xticks(range(len(plot_data.columns)))
    ax.set_xticklabels(plot_data.columns, rotation=45, ha='right')
    ax.set_yticks(range(min(10, len(plot_data))))
    ax.set_yticklabels(plot_data.index[:10])
    
    ax.set_title(title)
    ax.set_xlabel('Genes')
    ax.set_ylabel('Samples')
    
    plt.colorbar(im, ax=ax, label='Z-score')
    plt.tight_layout()
    return fig

print("OK plot_heatmap defined")

def pathway_enrichment(gene_list: List[str],
                        organism: str = 'human',
                        gene_sets: Optional[List[str]] = None,
                        min_set_size: int = 5,
                        max_set_size: int = 500) -> Dict[str, pd.DataFrame]:
    """Perform pathway enrichment analysis using gseapy."""
    if gene_sets is None:
        if organism == 'mouse':
            gene_sets = ['KEGG_2021_Mouse', 'Reactome_2022', 'GO_Biological_Process_2021']
        else:
            gene_sets = ['KEGG_2021_Human', 'Reactome_2022', 'GO_Biological_Process_2021']
    
    print(f"Enriching {len(gene_list)} genes via gseapy...")
    
    results = {}
    
    for library in gene_sets:
        try:
            enr = gp.enrichr(
                gene_list=gene_list,
                gene_sets=library,
                organism=organism,
                min_set_size=min_set_size,
                max_set_size=max_set_size,
                no_plot=True,
                verbose=False
            )
            
            if enr.results is not None and not enr.results.empty:
                sig_results = enr.results[enr.results['Adjusted P-value'] < 0.05]
                results[library] = sig_results
                print(f"  OK {library}: {len(sig_results)} significant terms")
            else:
                print(f"  No results for {library}")
                
        except Exception as e:
            print(f"  Error {library}: {e}")
    
    return results

print("OK pathway_enrichment defined")

def export_to_scidex_kg(source: str,
                         target: str,
                         relation: str,
                         evidence: str,
                         analysis_id: str = ANALYSIS_ID,
                         source_type: str = 'gene',
                         target_type: str = 'gene',
                         api_url: str = SCIDEX_API) -> Optional[dict]:
    """Export a knowledge graph edge to SciDEX."""
    edge_data = {
        'source': source,
        'source_type': source_type,
        'target': target,
        'target_type': target_type,
        'relation': relation,
        'evidence': evidence,
        'analysis_id': analysis_id
    }
    
    try:
        response = requests.post(
            f"{api_url}/api/kg/edges",
            json=edge_data,
            timeout=10
        )
        
        if response.status_code in (200, 201):
            result = response.json()
            print(f"OK KG edge: {source} --[{relation}]--> {target}")
            return result
        else:
            print(f"  API returned {response.status_code}")
            
    except requests.exceptions.ConnectionError:
        print(f"  SciDEX API unavailable")
    except Exception as e:
        print(f"  Error exporting edge: {e}")
    
    export_file = Path(f"kg_edges_{analysis_id}.json")
    existing = []
    if export_file.exists():
        with open(export_file) as f:
            existing = json.load(f)
    existing.append(edge_data)
    with open(export_file, 'w') as f:
        json.dump(existing, f, indent=2)
    print(f"  OK Saved to {export_file}")
    return edge_data

print("OK export_to_scidex_kg defined")

gene_data = fetch_allen_data(
    dataset_id=DATASET_ID,
    gene_list=AD_GENES,
    cache_dir=CACHE_DIR
)

print(f"\nData shape: {gene_data.shape}")
print(f"Columns: {list(gene_data.columns[:10])}")
gene_data.head()

print("=== Gene Expression Summary ===\n")

if 'cell_type' in gene_data.columns:
    print("Cell type distribution:")
    print(gene_data['cell_type'].value_counts())
    print()
    print("Region distribution:")
    print(gene_data['region'].value_counts())
    print()

gene_cols = [c for c in gene_data.columns if c not in ['sample_id', 'cell_type', 'region', 'experiment_id', 'structure_id', 'structure_name']]
gene_stats = gene_data[gene_cols].describe()
print("Gene expression statistics:")
gene_stats.head(10)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

gene_means = gene_data[gene_cols].mean().sort_values(ascending=False)
gene_means.plot(kind='bar', ax=axes[0], color='steelblue')
axes[0].set_title('Mean Expression by Gene')
axes[0].set_xlabel('Gene')
axes[0].set_ylabel('Mean Expression')
axes[0].tick_params(axis='x', rotation=45)

top_genes = gene_means.head(6).index.tolist()
gene_data[top_genes].boxplot(ax=axes[1])
axes[1].set_title('Expression Distribution (Top 6 Genes)')
axes[1].set_ylabel('Expression')
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

if 'cell_type' in gene_data.columns:
    fig, ax = plt.subplots(figsize=(8, 5))
    counts = gene_data['cell_type'].value_counts()
    colors = sns.color_palette('Set2', len(counts))
    bars = ax.bar(counts.index, counts.values, color=colors)
    ax.set_title('Cell Type Distribution')
    ax.set_xlabel('Cell Type')
    ax.set_ylabel('Count')
    for bar, count in zip(bars, counts.values):
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
                str(count), ha='center', va='bottom')
    plt.tight_layout()
    plt.show()
    print(f"\nTotal cells: {counts.sum()}")

if 'cell_type' in gene_data.columns:
    counts_for_de = gene_data[gene_cols].copy()
    counts_for_de.index = gene_data['sample_id']
    
    metadata_for_de = pd.DataFrame({
        'cell_type': gene_data['cell_type'].values
    }, index=gene_data['sample_id'])
    
    groups = sorted(gene_data['cell_type'].unique())
    print(f"Available groups: {groups}")
    
    if len(groups) >= 2:
        group1, group2 = groups[0], groups[1]
        print(f"Comparing: {group1} vs {group2}")
        
        de_results = differential_expression(
            counts_matrix=counts_for_de,
            metadata=metadata_for_de,
            group_col='cell_type',
            group1=group1,
            group2=group2,
            method='scanpy'
        )
        print(f"\nTop 10 differentially expressed genes:")
        de_results.head(10)

if 'de_results' in dir() and de_results is not None and not de_results.empty:
    fig, ax = plt.subplots(figsize=(10, 7))
    
    de_results_plot = de_results.copy()
    de_results_plot['neg_log10_p'] = -np.log10(de_results_plot['p_value'] + 1e-300)
    de_results_plot['significant'] = de_results_plot['adjusted_p_value'] < 0.05
    
    colors = de_results_plot['significant'].map({True: 'red', False: 'gray'})
    
    ax.scatter(de_results_plot['log2_fold_change'],
               de_results_plot['neg_log10_p'],
               c=colors, alpha=0.6, s=50)
    
    ax.axhline(y=-np.log10(0.05), color='blue', linestyle='--', 
               label='p=0.05', alpha=0.7)
    ax.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
    
    top_genes_de = de_results_plot.nlargest(5, 'neg_log10_p')
    for _, row in top_genes_de.iterrows():
        ax.annotate(row['gene'], (row['log2_fold_change'], row['neg_log10_p']),
                   xytext=(5, 5), textcoords='offset points', fontsize=9)
    
    ax.set_xlabel('Log2 Fold Change')
    ax.set_ylabel('-Log10 P-value')
    ax.set_title('Volcano Plot: Differential Expression')
    ax.legend()
    plt.tight_layout()
    plt.show()
    
    sig_genes = de_results_plot[de_results_plot['significant']]['gene'].tolist()
    print(f"\nSignificant genes (adj p < 0.05): {len(sig_genes)}")
    print(sig_genes[:20])

if 'cell_type' in gene_data.columns and len(gene_data) >= 10:
    print("Computing UMAP visualization...")
    
    gene_cols_available = [c for c in gene_cols if c in gene_data.columns]
    X = gene_data[gene_cols_available].values
    
    adata = sc.AnnData(X=X)
    adata.obs['cell_type'] = gene_data['cell_type'].values
    adata.obs_names = gene_data['sample_id'].values
    adata.var_names = gene_cols_available
    
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    
    n_pcs = min(10, X.shape[1]-1)
    sc.tl.pca(adata, n_comps=n_pcs)
    sc.pp.neighbors(adata, n_pcs=n_pcs)
    
    try:
        sc.tl.umap(adata)
        has_umap = True
        print("OK UMAP computed")
    except Exception as e:
        print(f"UMAP failed: {e}")
        has_umap = False
    
    try:
        sc.tl.tsne(adata)
        has_tsne = True
        print("OK tSNE computed")
    except Exception as e:
        print(f"tSNE failed: {e}")
        has_tsne = False

if 'has_umap' in dir() and has_umap:
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    for i, ct in enumerate(adata.obs['cell_type'].unique()):
        mask = adata.obs['cell_type'] == ct
        axes[0].scatter(adata.obsm['X_umap'][mask, 0], 
                   adata.obsm['X_umap'][mask, 1],
                   label=ct, alpha=0.6, s=30)
    axes[0].set_title('UMAP by Cell Type')
    axes[0].set_xlabel('UMAP1')
    axes[0].set_ylabel('UMAP2')
    axes[0].legend()
    
    if 'has_tsne' in dir() and has_tsne:
        for i, ct in enumerate(adata.obs['cell_type'].unique()):
            mask = adata.obs['cell_type'] == ct
            axes[1].scatter(adata.obsm['X_tsne'][mask, 0], 
                       adata.obsm['X_tsne'][mask, 1],
                       label=ct, alpha=0.6, s=30)
        axes[1].set_title('tSNE by Cell Type')
        axes[1].set_xlabel('tSNE1')
        axes[1].set_ylabel('tSNE2')
        axes[1].legend()
    
    plt.tight_layout()
    plt.show()
else:
    print("UMAP/tSNE visualization requires cell_type metadata")

if 'sig_genes' in dir() and sig_genes:
    enrichment_genes = sig_genes[:50]
else:
    enrichment_genes = AD_GENES

print(f"Enriching {len(enrichment_genes)} genes: {enrichment_genes[:10]}...")

enrichment_results = pathway_enrichment(
    gene_list=enrichment_genes,
    organism='human'
)

if enrichment_results:
    for library, df in enrichment_results.items():
        if df.empty:
            continue
            
        print(f"\n=== {library} ===")
        
        top_terms = df.head(10)
        
        fig, ax = plt.subplots(figsize=(10, 6))
        
        y_pos = range(len(top_terms))
        colors = plt.cm.Reds(np.linspace(0.3, 0.8, len(top_terms)))
        
        ax.barh(y_pos, -np.log10(top_terms['Adjusted P-value']),
                color=colors)
        ax.set_yticks(y_pos)
        ax.set_yticklabels([t[:60] for t in top_terms['Term']])
        ax.invert_yaxis()
        ax.set_xlabel('-Log10 Adjusted P-value')
        ax.set_title(f'{library}: Top Enriched Terms')
        
        plt.tight_layout()
        plt.show()
        
        print(f"Top 5 terms:")
        for _, row in top_terms.head(5).iterrows():
            print(f"  {row['Term'][:50]}: p={row['Adjusted P-value']:.2e}")

def query_scidex_hypotheses(gene: str, limit: int = 5) -> List[dict]:
    """Query SciDEX for hypotheses involving a gene."""
    try:
        response = requests.get(
            f"{SCIDEX_API}/api/hypotheses",
            params={'target_gene': gene, 'limit': limit},
            timeout=10
        )
        if response.status_code == 200:
            return response.json().get('hypotheses', [])
    except Exception as e:
        print(f"Error querying hypotheses for {gene}: {e}")
    return []

related_hypotheses = []
if 'sig_genes' in dir() and sig_genes:
    for gene in sig_genes[:5]:
        hyps = query_scidex_hypotheses(gene)
        for h in hyps:
            h['linked_gene'] = gene
            related_hypotheses.append(h)
        
if related_hypotheses:
    print(f"Found {len(related_hypotheses)} related hypotheses in SciDEX:")
    for h in related_hypotheses[:5]:
        print(f"  [{h.get('linked_gene')}] {h.get('title', 'Untitled')[:60]} (score={h.get('composite_score', 'N/A')})")
else:
    print("No related hypotheses found (SciDEX API may be unavailable)")

if enrichment_results and 'Reactome_2022' in enrichment_results:
    reactome_df = enrichment_results['Reactome_2022']
    
    print("Linking Reactome pathways to SciDEX KG...\n")
    
    for _, row in reactome_df.head(5).iterrows():
        pathway_name = row['Term']
        genes_in_pathway = str(row.get('Genes', ''))[:100]
        p_val = row['Adjusted P-value']
        
        export_to_scidex_kg(
            source=pathway_name,
            target=ANALYSIS_ID,
            relation='enriched_in',
            evidence=f"Reactome enrichment analysis, p={p_val:.2e}. Genes: {genes_in_pathway}",
            source_type='pathway',
            target_type='analysis'
        )

output_data = {
    'analysis_metadata': {
        'analysis_id': ANALYSIS_ID,
        'dataset_id': DATASET_ID,
        'author': AUTHOR,
        'timestamp': TIMESTAMP
    },
    'genes_analyzed': gene_cols if 'gene_cols' in dir() else AD_GENES,
    'cell_count': int(gene_data.shape[0]) if gene_data is not None else 0,
    'differential_expression': de_results.head(20).to_dict('records') if 'de_results' in dir() and de_results is not None else [],
    'enrichment_summary': {},
    'kg_edges_file': f'kg_edges_{ANALYSIS_ID}.json'
}

for library, df in enrichment_results.items() if 'enrichment_results' in dir() else []:
    if not df.empty:
        output_data['enrichment_summary'][library] = df.head(10)[['Term', 'Adjusted P-value', 'Genes']].to_dict('records')

output_file = f"allen_analysis_results_{ANALYSIS_ID}.json"
with open(output_file, 'w') as f:
    json.dump(output_data, f, indent=2)

print(f"OK Results exported to {output_file}")
print(f"\nSummary:")
print(f"  - Analysis ID: {ANALYSIS_ID}")
print(f"  - Genes analyzed: {len(output_data['genes_analyzed'])}")
print(f"  - Cell count: {output_data['cell_count']}")
print(f"  - DE results: {len(output_data['differential_expression'])} genes")
print(f"  - Enrichment libraries: {len(output_data['enrichment_summary'])}")

Allen Institute Data Analysis Template

Allen Institute Data Analysis Template¶

1. Environment Setup¶

2. Configuration and Metadata¶

3. Utility Functions¶

4. Data Loading¶

5. Exploratory Data Analysis¶

6. Differential Expression Analysis¶

7. Visualization: UMAP/tSNE¶

8. Pathway Enrichment Analysis¶

9. SciDEX Integration: Hypothesis Generation¶

10. Export Results¶

Summary and Next Steps¶