{
"model_family": "biophysical",
"equations": ["dA/dt = k_prod - k_clear * A - k_phago * M * A"],
"species": ["amyloid_beta", "microglia", "cytokines"],
"parameters": {
"k_prod": {"value": 0.1, "units": "uM/hr", "source": "PMID:12345"},
"k_clear": {"value": 0.05, "units": "1/hr", "source": "fitted"},
"k_phago": {"value": 0.02, "units": "1/(cells*hr)", "source": "PMID:67890"}
},
"reactions": ["production", "clearance", "phagocytosis"],
"steady_states": {"amyloid_beta": 2.0, "microglia": 100},
"solver": "scipy.integrate.solve_ivp",
"solver_config": {"method": "RK45", "t_span": [0, 100]}
}{
"model_family": "deep_learning",
"framework": "pytorch",
"architecture": "transformer",
"layer_config": {"n_layers": 6, "hidden_dim": 512, "n_heads": 8},
"parameter_count": 25000000,
"training_config": {
"optimizer": "adamw",
"learning_rate": 0.001,
"batch_size": 32,
"epochs": 100,
"early_stopping_patience": 10
},
"training_metrics": {
"final_loss": 0.023,
"best_val_loss": 0.031,
"training_time_hours": 2.5
},
"checkpoint_path": "/models/checkpoint_epoch_87.pt",
"input_schema": "gene expression matrix (genes x samples)",
"output_schema": "cell type probabilities"
}{
"model_family": "statistical",
"model_type": "logistic_regression",
"framework": "sklearn",
"features": ["gene_expression", "age", "sex", "apoe_genotype"],
"target": "disease_status",
"coefficients": {"gene_expression": 0.45, "age": 0.02},
"fit_metrics": {
"auc_roc": 0.87,
"accuracy": 0.82,
"f1_score": 0.79,
"n_samples": 500,
"cross_validation": "5-fold"
}
}def register_model(model_family, title, metadata,
trained_on_dataset_id=None,
tests_hypothesis_id=None,
produced_by_analysis_id=None):
metadata["model_family"] = model_family
artifact_id = register_artifact(
artifact_type="model",
title=title,
metadata=metadata,
quality_score=0.6 # default, updated after evaluation
)
# Auto-create links
if trained_on_dataset_id:
create_link(artifact_id, trained_on_dataset_id, "derives_from",
evidence=f"Model trained on dataset {trained_on_dataset_id}")
if tests_hypothesis_id:
create_link(artifact_id, tests_hypothesis_id, "supports",
evidence=f"Model tests hypothesis {tests_hypothesis_id}")
if produced_by_analysis_id:
create_link(artifact_id, produced_by_analysis_id, "derives_from",
evidence=f"Model produced by analysis {produced_by_analysis_id}")
return artifact_idcreate_version():
register_model() creates model artifact with correct metadataTask: Model artifact type: biophysical, deep learning, statistical models
Analysis:
register_model() exists in artifact_registry.py but was missing:trained_on_dataset_id, tests_hypothesis_id, produced_by_analysis_id
create_version() already merges metadata from parent, preserving model_familyscidex/atlas/artifact_registry.py:
MODEL_FAMILIES = {'biophysical', 'deep_learning', 'statistical'} constant (line ~113)_validate_model_family(model_family) validation functionvalidate_model_metadata(model_family, metadata) for family-specific schema validation:register_model() with:trained_on_dataset_id: creates 'derives_from' link to dataset artifacttests_hypothesis_id: creates 'supports' link to hypothesis artifactproduced_by_analysis_id: creates 'derives_from' link to analysis artifactmetadata parameter for additional family-specific fieldsVerification:
python3 -m py_compile)_validate_model_family and validate_model_metadata passedcreate_version() metadata merge preserves model_family from parentregister_model() creates model artifact with correct metadata