Use DiffUSE Hub programmatically in Jupyter notebooks, Python scripts, and data pipelines. The SDK provides a clean Python interface to all DiffUSE Hub functionality.
Run Python with the SDK using uvx:
# Quick start - run Python with the SDK available
uvx --with "diffuse-cli @ git+ssh://git@github.com/diff-use/webapp.git#subdirectory=cli" python
Or add to an existing project:
# uv
uv add "diffuse-cli @ git+ssh://git@github.com/diff-use/webapp.git#subdirectory=cli"
# pip
pip install "diffuse-cli @ git+ssh://git@github.com/diff-use/webapp.git#subdirectory=cli"
This example walks through authentication, creating an experiment with rich metadata, connecting related experiments, and searching.
Option A: If you don't have a token, use the built-in device flow (opens browser):
from diffuse_cli import login, Diffuse
# First time: authenticate via GitHub device flow
login() # Opens browser, prints code, waits for you to authorize
# Now create a client (uses stored token automatically)
diffuse = Diffuse()
print(diffuse.version()) # {'version': '0.2.1', ...}
Option B: If you already have a GitHub token:
from diffuse_cli import Diffuse
# Pass token directly (useful for CI/CD or existing tokens)
diffuse = Diffuse(token="ghp_xxxxxxxxxxxxxxxxxxxx")
# Or via environment variable
import os
diffuse = Diffuse(token=os.environ.get("GITHUB_TOKEN"))
# Create a new experiment
exp = diffuse.experiments.create(
title="Cryo-EM Structure of GPCR-Gαs Complex",
summary="High-resolution structure determination using single-particle cryo-EM",
tags=["cryo-em", "gpcr", "structural-biology", "2024-q4"],
)
print(f"Created: {exp['display_id']}") # e.g., EXP-42
# Add detailed metadata (fields must be pre-configured by an admin)
diffuse.metadata.update(exp["display_id"], {
"resolution_angstrom": 2.8,
"particle_count": 1_250_000,
"microscope": "Titan Krios G4",
"detector": "Falcon 4i",
"processing_software": "cryoSPARC v4.4",
"pdb_id": None, # To be deposited
})
# Find the data collection experiment to link
data_exps = diffuse.experiments.list(query="tag:data-collection AND tag:gpcr")
# Get the parent experiment
parent = data_exps[0] if data_exps else None
if parent:
# Link experiments (requires relationships API - coming soon)
print(f"Parent experiment: {parent['display_id']}")
# Find high-resolution structures from this year
results = diffuse.experiments.list(
query='tag:cryo-em AND resolution_angstrom:<3.0 AND created:>2024-01-01'
)
for exp in results:
meta = diffuse.metadata.get(exp["display_id"])
print(f"{exp['display_id']}: {exp['title']}")
print(f" Resolution: {meta.get('resolution_angstrom')} Å")
print(f" Particles: {meta.get('particle_count'):,}")
print()
# Boolean operators and grouping
diffuse.experiments.list(
query='(tag:xray OR tag:cryo-em) AND NOT tag:preliminary'
)
# Wildcard matching
diffuse.experiments.list(query='title:GPCR*')
# Exact phrase matching
diffuse.experiments.list(query='summary:"beta-arrestin complex"')
# List all experiments
experiments = diffuse.experiments.list()
# Filter with search query
experiments = diffuse.experiments.list(query="tag:crystal")
experiments = diffuse.experiments.list(query="tag:xray AND resolution:>1.5")
# Filter by visibility
public_exps = diffuse.experiments.list(public=True)
private_exps = diffuse.experiments.list(public=False)
# Sort results
experiments = diffuse.experiments.list(sort="recent") # Default: most recently updated
experiments = diffuse.experiments.list(sort="oldest") # Oldest first
experiments = diffuse.experiments.list(sort="title") # Alphabetical
# Pagination
experiments = diffuse.experiments.list(limit=50, offset=100)
# Get by display ID (recommended)
exp = diffuse.experiments.get("EXP-42")
# Get by UUID also works
exp = diffuse.experiments.get("550e8400-e29b-41d4-a716-446655440000")
print(exp["title"])
print(exp["summary"])
print(exp["tags"])
print(exp["artifact_count"])
# Minimal creation
exp = diffuse.experiments.create(title="My New Experiment")
# With all options
exp = diffuse.experiments.create(
title="Crystal Structure Analysis",
summary="Analyzing X-ray diffraction patterns from sample batch A",
tags=["xray", "crystal", "batch-a"],
markdown="# Overview\n\nDetailed notes here...",
type_id="protocol" # Optional: experiment type
)
print(f"Created: {exp['display_id']}")
# Update specific fields
exp = diffuse.experiments.update(
"EXP-42",
title="Updated Title",
summary="New summary",
tags=["new", "tags"]
)
# Update markdown content
exp = diffuse.experiments.update(
"EXP-42",
markdown="# Updated Content\n\nNew notes..."
)
diffuse.experiments.delete("EXP-42")
# Make experiment publicly accessible
exp = diffuse.experiments.publish("EXP-42")
print(exp["is_public"]) # True
# Make experiment private again
exp = diffuse.experiments.unpublish("EXP-42")
print(exp["is_public"]) # False
types = diffuse.experiments.types()
for t in types:
print(f"{t['slug']}: {t['name']}")
# Get recent activity for an experiment
activity = diffuse.experiments.activity("EXP-42", limit=20)
for event in activity:
print(f"{event['created_at']}: {event['action']}")
Artifacts in DiffUSE Hub follow a many-to-many model: a single artifact can be connected to multiple experiments, and experiments can share artifacts. This enables data reuse without duplication.
# List all artifacts
artifacts = diffuse.artifacts.list()
# Filter by experiment
artifacts = diffuse.artifacts.list(experiment_id="EXP-42")
# Pagination
artifacts = diffuse.artifacts.list(limit=100, offset=0)
artifact = diffuse.artifacts.get("artifact-uuid")
print(artifact["filename"])
print(artifact["size_bytes"])
print(artifact["content_type"])
print(artifact["checksum_sha256"])
print(artifact["file_mtime"]) # Original file modification time (if available)
# Connect an existing artifact to another experiment
diffuse.artifacts.connect(
artifact_id="artifact-uuid",
experiment_id="EXP-99"
)
# Optionally specify a display path
diffuse.artifacts.connect(
artifact_id="artifact-uuid",
experiment_id="EXP-99",
path="data/shared-dataset.csv"
)
# Remove artifact from experiment (doesn't delete the artifact)
diffuse.artifacts.disconnect(
artifact_id="artifact-uuid",
experiment_id="EXP-42"
)
# Delete artifact (only works if connected to 0-1 experiments)
diffuse.artifacts.delete("artifact-uuid")
# Force delete even if connected to multiple experiments
diffuse.artifacts.delete("artifact-uuid", force=True)
Store arbitrary key-value metadata on experiments. Values can be strings, numbers, booleans, or nested objects.
# Get all metadata for an experiment
metadata = diffuse.metadata.get("EXP-42")
print(metadata) # {"resolution": 1.5, "temperature": 298, ...}
# Get a specific key
resolution = diffuse.metadata.get("EXP-42", key="resolution")
print(resolution) # 1.5
# Set a single key-value pair
diffuse.metadata.set("EXP-42", "resolution", 1.5)
diffuse.metadata.set("EXP-42", "temperature", 298)
diffuse.metadata.set("EXP-42", "validated", True)
# Complex values work too
diffuse.metadata.set("EXP-42", "parameters", {
"wavelength": 0.154,
"detector": "Pilatus 6M",
"exposure_time": 0.1
})
# Update multiple metadata fields at once
diffuse.metadata.update("EXP-42", {
"resolution": 1.8,
"temperature": 310,
"status": "complete"
})
# Remove a metadata field
diffuse.metadata.delete("EXP-42", "obsolete_field")
# Get available metadata field definitions
fields = diffuse.metadata.fields()
for field in fields:
print(f"{field['name']} ({field['field_type']})")
from diffuse_cli import Diffuse, DiffuseError
diffuse = Diffuse()
try:
exp = diffuse.experiments.get("nonexistent")
except DiffuseError as e:
print(f"Error: {e}")
print(f"Status code: {e.status_code}") # e.g., 404
print(f"Detail: {e.detail}") # Server error message
Use the built-in device flow for interactive login, or pass a token directly:
from diffuse_cli import login, Diffuse
# Option 1: Device flow (interactive, opens browser)
login()
diffuse = Diffuse()
# Option 2: Pass token directly
diffuse = Diffuse(token="ghp_xxxxxxxxxxxxxxxxxxxx")
# Option 3: Environment variable
import os
diffuse = Diffuse(token=os.environ.get("GITHUB_TOKEN"))
# Set the API URL directly
export DIFFUSE_API_URL="https://app.diffuse.science"
# Or use environment shortcuts
export DIFFUSE_ENV="prod" # https://app.diffuse.science (default)
export DIFFUSE_ENV="staging" # https://dev.diffuse.science
export DIFFUSE_ENV="local" # http://127.0.0.1:8000
# Automatically close the HTTP client when done
with Diffuse(token=token) as diffuse:
experiments = diffuse.experiments.list()
# ... do work ...
# Set a longer timeout for slow connections
diffuse = Diffuse(token=token, timeout=60.0) # 60 seconds