Tutorial 4, Integration and downstream of spatial transcriptomics and proteomics of COAD#

import sys
sys.path.append(r"/home/wangheqi/PycharmProject/")

import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import numpy as np

import matplotlib.pyplot as plt
import spcoral
import random
import torch

random.seed(2030)
np.random.seed(2030)

Read the data#

The dataset was downloaded from SPATCH.

We selected the COAD dataset with transcriptomics (CosMx, 6175 genes) and proteomics (CODEX, 16 proteins) to test SPCoral.

adata_codex = sc.read_h5ad('/mnt/TrueNas/project/wangheqi_metabolomic_algorithms/CosMx/COAD/proteome/adata_codex.h5ad')
adata_cosmx = sc.read_h5ad('/mnt/TrueNas/project/wangheqi_metabolomic_algorithms/CosMx/COAD/transcriptome/adata.h5ad')
adata_codex = adata_codex[~pd.isna(adata_codex.obs['spatial_cluster']), ].copy()
adata_cosmx = adata_cosmx[~pd.isna(adata_cosmx.obs['spatial_cluster']), ].copy()

Show cell types of the raw data#

%matplotlib inline
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 13))

sc.pl.embedding(adata_codex, basis='spatial', color='annotation', ax=ax, s=6)
../_images/cab1821dfbd7373f9193a4a25c724a9f6cdec78103c9489193adf5eef14ea25f.png
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 13))

sc.pl.embedding(adata_cosmx, basis='spatial', color='annotation', ax=ax, s=6)
../_images/f76e5537077d2fcb8ee1bc336f387ba2a7f39bf018cbecd93fdd7d5090f36fc3.png

Preprocess 1: Scale the data#

# Normalizing to median total counts
sc.pp.normalize_total(adata_cosmx)
# Logarithmize the data
sc.pp.log1p(adata_cosmx)
sc.pp.pca(adata_cosmx, n_comps=100)
sc.pp.scale(adata_codex, max_value=5)
adata_cosmx.obsm['feat'] = adata_cosmx.obsm['X_pca']
adata_codex.obsm['feat'] = adata_codex.X

Preprocess 2: Segmentation#

This dataset contains more than 500,000 cells (CODEX: 248470, CosMx: 261731). After constructing the adjacency network, it is difficult to load it directly into the GPU. To address this, we developed a method similar to patch segmentation, dividing the original data into smaller patches and feeding them into the model one by one.

clip_results = spcoral.pp.clipping_patch(
    adata_omics1 = adata_cosmx,
    adata_omics2 = adata_codex,
    x_num=16,
    y_num=16,
    retain_edge=0.02,
    min_cells=50
)

Model training#

Model = spcoral.model.integrate_model_block(
    clip_results,
    epochs=100,
    device=torch.device('cuda:2'),
)
[Fast Mode] Seed=2020, cudnn.benchmark=True, multi-thread ON
Model.preprocess(
    graph_method_single='radius',
    radius_spatial_omics1=2,
    radius_spatial_omics2=2,
    num_processes=20,
    k_cross_omics=6,
    k_all_omics=12,
    use_obsm='spatial',
    g_all_auto=False
)
Model.train()
[Fast Mode] Seed=2020, cudnn.benchmark=True, multi-thread ON
adata_cosmx, adata_codex = Model.map_results_to_adata()
adata_cosmx, adata_codex = spcoral.analysis.cluster(adata_cosmx, adata_codex, cluster_method='louvain', resolution_louvain=1.0)

Visualization#

%matplotlib inline
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 13))

sc.pl.embedding(adata_codex, basis='spatial', color='domain', ax=ax, s=6)
../_images/bc1b776f6386341fcf0c50cf08f7086b11eb5d0d57073210c33ba14fc1c032fa.png
%matplotlib inline
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 13))

sc.pl.embedding(adata_cosmx, basis='spatial', color='domain', ax=ax, s=6)
../_images/92e6b61e432b49d0183b488d5f60a8d84784821ed4b056e72ad8854e65eff25b.png

Downstream 1: Interpolation & Prediction#

Based on the cross decoder of SPCoral, it can perform interpolation and prediction of features between two omics.

Here we present an example of predicting proteomic features in transcriptome slices.

adata_codex_prediction = sc.AnnData(adata_cosmx.obsm['cross_spcoral'], obs=adata_cosmx.obs, var=adata_codex.var, obsm=adata_cosmx.obsm)
adata_codex_prediction.var_names.tolist()
['CD8',
 'CD20',
 'CD3e',
 'CD56',
 'Pan-Cytokeratin',
 'CD4',
 'CD34',
 'SMA',
 'FOXP3',
 'CD163',
 'HLA-A',
 'CD11c',
 'MPO',
 'CD68',
 'HLA-DR',
 'IDO1']
%matplotlib inline
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(8, 10))

sc.pl.embedding(adata_codex_prediction, basis='spatial', color='CD68', ax=ax, s=6)
../_images/501d5ead1481499cf552b2d022c038c90a23619286393b350bfdc4b0b9cf2e73.png
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(8, 10))

sc.pl.embedding(adata_codex_prediction, basis='spatial', color='CD20', ax=ax, s=6)
../_images/4a4b6697e9d9ecda8a5e6c0557d600030957d663505de79f9d8532826ac636e9.png

Downstream 2: Correlation of cross-model features#

By utilizing the predicted features, we can calculate the correlation between the two omics data sets using various methods.

First, we calculated the global spatial correlation of the characteristic gene SPP1 and the marker protein CD68 in macrophages.

adata_cosmx.X = adata_cosmx.X.toarray()
spcoral.analysis.pearson(adata_cosmx, adata_codex, features_omics1='SPP1', features_omics2='CD68', used_rec=False)
{'features_omics1': ['SPP1'],
 'features_omics2': ['CD68'],
 'SPP1_CD68': {'P': 7.364066971998629e-13, 'corr': 0.014300842633701228}}
spcoral.analysis.bivariate_moran(adata_cosmx, adata_codex, features_omics1='SPP1', features_omics2='CD68', used_rec=False)
{'features_omics1': ['SPP1'],
 'features_omics2': ['CD68'],
 'SPP1_CD68': {'P': 0.001, 'I': 0.013148575831596034}}

Next, we used the local Moran index to identify the spatial locations with strong co-localization between two markers.

res_dict = spcoral.analysis.bivariate_local_moran(
    adata_cosmx, adata_codex, features_omics1='SPP1', features_omics2='CD68', used_rec=False
)
adata_cosmx.obs['SPP1_CD68'] = res_dict['SPP1_CD68']['I']
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(8, 10))

sc.pl.embedding(adata_cosmx, basis='spatial', color='SPP1_CD68', ax=ax, s=6, vmin=0, vmax=10, title='Local moran: SPP1_CD68')
../_images/3e7e3b6512d3253214b27821e45b140d1d1b11c0e49e03403c6ed329c4b7eb37.png