Basic Usage Tutorial
This tutorial walks through a complete SAME analysis on a small dataset.
Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
# Add SAME to path
import sys
sys.path.insert(0, str(Path.cwd().parent))
from src import run_same, init_optim_params, init_gurobi_params
Load Example Data
# Download example data (if not already present)
import pooch
DATA_URL = "https://zenodo.org/record/XXXXXX/files/"
data_cache = pooch.create(
path=pooch.os_cache("same_data"),
base_url=DATA_URL,
registry={
"example_ref.csv": "sha256:...",
"example_aligned.csv": "sha256:...",
}
)
ref_df = pd.read_csv(data_cache.fetch("example_ref.csv"))
aligned_df = pd.read_csv(data_cache.fetch("example_aligned.csv"))
print(f"Reference: {len(ref_df)} cells")
print(f"Aligned: {len(aligned_df)} cells")
Visualize Input Data
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Reference
ax = axes[0]
for ct in ref_df['cell_type'].unique():
subset = ref_df[ref_df['cell_type'] == ct]
ax.scatter(subset['X'], subset['Y'], label=ct, s=10, alpha=0.7)
ax.set_title('Reference')
ax.legend()
# Aligned
ax = axes[1]
for ct in aligned_df['cell_type'].unique():
subset = aligned_df[aligned_df['cell_type'] == ct]
ax.scatter(subset['X'], subset['Y'], label=ct, s=10, alpha=0.7)
ax.set_title('Aligned (to be matched)')
ax.legend()
plt.tight_layout()
plt.show()
Run SAME
# Define cell type columns
commonCT = ['TypeA', 'TypeB', 'TypeC']
# Configure parameters
optim = init_optim_params(
radius=300, # Search radius
knn=10, # Nearest neighbors
lazy_constraints=True # Memory efficient
)
gurobi = init_gurobi_params(
time_limit=1800, # 30 minutes max
mip_gap=0.05 # 5% gap tolerance
)
# Run optimization
matches, var_out = run_same(
ref_df=ref_df,
aligned_df=aligned_df,
commonCT=commonCT,
optim_params=optim,
gurobi_params=gurobi,
outprefix='results/'
)
print(f"Found {len(matches)} matches")
Analyze Results
# Match rate
print(f"Match rate: {len(matches) / len(aligned_df) * 100:.1f}%")
# Cell type accuracy
matches['type_match'] = (
matches['aligned_idx'].map(aligned_df.set_index(aligned_df.index)['cell_type']) ==
matches['ref_idx'].map(ref_df.set_index(ref_df.index)['cell_type'])
)
print(f"Cell type accuracy: {matches['type_match'].mean() * 100:.1f}%")
# Spatial violations
n_violations = matches['triangle_violation'].sum()
print(f"Triangle violations: {n_violations} ({n_violations / len(matches) * 100:.1f}%)")
Visualize Matches
fig, ax = plt.subplots(figsize=(10, 10))
# Plot reference
ax.scatter(ref_df['X'], ref_df['Y'], c='blue', s=20, alpha=0.3, label='Reference')
# Plot aligned
ax.scatter(aligned_df['X'], aligned_df['Y'], c='red', s=20, alpha=0.3, label='Aligned')
# Draw match lines
for _, row in matches.iterrows():
ax.plot([row['X'], row['ref_X']], [row['Y'], row['ref_Y']],
'g-', alpha=0.3, linewidth=0.5)
ax.set_title(f'SAME Matches (n={len(matches)})')
ax.legend()
plt.show()
Save Results
# Save matches
matches.to_csv('results/matches.csv', index=False)
# Create aligned coordinates (SAME_X, SAME_Y)
aligned_with_same = aligned_df.copy()
aligned_with_same = aligned_with_same.merge(
matches[['aligned_idx', 'ref_X', 'ref_Y']],
left_index=True, right_on='aligned_idx', how='left'
)
aligned_with_same.rename(columns={'ref_X': 'SAME_X', 'ref_Y': 'SAME_Y'}, inplace=True)
# For unmatched cells, keep original coordinates
aligned_with_same['SAME_X'] = aligned_with_same['SAME_X'].fillna(aligned_with_same['X'])
aligned_with_same['SAME_Y'] = aligned_with_same['SAME_Y'].fillna(aligned_with_same['Y'])
aligned_with_same.to_csv('results/aligned_with_same_coords.csv', index=False)
Next Steps
- Large Datasets Tutorial - Handle 10,000+ cells
- Paper Figures - Reproduce figures from the paper