OSTA.data 0.99.2
library(VisiumIO)
library(OSTA.data)
library(DropletUtils)
library(SpatialExperimentIO)
OSTA.data
is a companion package for the “Orchestrating Spatial
Transcriptomics Analysis” (OSTA) with Bioconductor online book.
Throughout OSTA, we rely on a set of publicly available datasets that cover different sequencing- and imaging-based platforms, namely: Visium, Visium HD, Xenium (10x Genomics) and CosMx (NanoString). In addition, we rely on scRNA-seq (Chromium) data for tasks such as spot deconvolution and label transfer (i.e., supervised clustering).
These data been deposited in an Open Storage Framework (OSF) repository here, and can be easily queried and downloaded using functions from the osfr package.
For convenience, we have implemented OSTA.data
to:
A list of currently available datasets maybe be viewer via:
OSTA.data_list()
## [1] "Chromium_HumanBreast_Janesick" "Chromium_HumanColon_Oliveira"
## [3] "CosMx1k_MouseBrain1" "CosMx1k_MouseBrain2"
## [5] "CosMx6k_HumanBrain" "VisiumHD_HumanColon_Oliveira"
## [7] "Visium_HumanBreast_Janesick" "Visium_HumanColon_Oliveira"
## [9] "Xenium_HumanBreast1_Janesick" "Xenium_HumanColon_Oliveira"
Any of the above may be retrieved using OSTA.data_load()
.
For imaging-based spatial transcriptomics datasets (namely, CosMx and Xenium),
arguments pol
and mol
specify whether or not cell segmentation boundaries
and data on transcript molecules should be retrieved (both default to TRUE
).
id <- "Xenium_HumanColon_Oliveira"
pa <- OSTA.data_load(id)
basename(pa)
## [1] "7d05d3100a6c2_Xenium_HumanColon_Oliveira.zip"
Once we have downloaded the .zip archive, we can unpack into a designated location (e.g., a working directory subfolder), or a temporary location:
# create temporary directory
dir.create(td <- tempfile())
unzip(pa, exdir=td) # unzip
list.files(td) # list files
## [1] "LICENSE.txt" "cell_boundaries.parquet"
## [3] "cell_feature_matrix.h5" "cells.parquet"
## [5] "experiment.xenium" "gene_panel.json"
## [7] "nucleus_boundaries.parquet" "transcripts.parquet"
Data is all set, and can be read it into R using a framework of our choice. Here, we demonstrate how to
both of which return a SpatialExperiment object.
(spe <- readXeniumSXE(td))
## class: SpatialExperiment
## dim: 422 340837
## metadata(4): experiment.xenium transcripts cell_boundaries
## nucleus_boundaries
## assays(1): counts
## rownames(422): ABCC8 ACP5 ... WFDC2 XCL2
## rowData names(3): ID Symbol Type
## colnames(340837): aaaadaba-1 aaaadgga-1 ... oikdmkkf-1 oikeebja-1
## colData names(10): cell_id transcript_counts ... nucleus_area sample_id
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(3): NegControlProbe UnassignedCodeword NegControlCodeword
## spatialCoords names(2) : x_centroid y_centroid
## imgData names(0):
# retrieval
id <- "CosMx1k_MouseBrain1"
pa <- OSTA.data_load(id, mol=FALSE)
dir.create(td <- tempfile())
unzip(pa, exdir=td)
# importing
(spe <- readCosmxSXE(td, addTx=FALSE))
## class: SpatialExperiment
## dim: 950 38996
## metadata(2): polygons fov_positions
## assays(1): counts
## rownames(950): Chrna4 Slc6a1 ... Cck Aqp4
## rowData names(0):
## colnames(38996): 1 2 ... 38995 38996
## colData names(19): fov cell_ID ... Max.DAPI sample_id
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(1): NegPrb
## spatialCoords names(2) : CenterX_global_px CenterY_global_px
## imgData names(0):
# retrieval
id <- "Visium_HumanColon_Oliveira"
pa <- OSTA.data_load(id)
## Requesting folder 'outs' from OSF
## Downloaded 4 file(s) from OSF folder 'outs'
dir.create(td <- tempfile())
unzip(pa, exdir=td)
# importing
obj <- TENxVisium(
spacerangerOut=td,
images="lowres",
format="h5")
(spe <- import(obj))
## class: SpatialExperiment
## dim: 18085 4269
## metadata(2): resources spatialList
## assays(1): counts
## rownames(18085): ENSG00000187634 ENSG00000188976 ... ENSG00000198695
## ENSG00000198727
## rowData names(3): ID Symbol Type
## colnames(4269): AACAATGTGCTCCGAG-1 AACACCATTCGCATAC-1 ...
## TGTTGGTGCGGAATCA-1 TGTTGGTGGACTCAGG-1
## colData names(4): in_tissue array_row array_col sample_id
## reducedDimNames(0):
## mainExpName: Gene Expression
## altExpNames(0):
## spatialCoords names(2) : pxl_col_in_fullres pxl_row_in_fullres
## imgData names(4): sample_id image_id data scaleFactor
# retrieval
id <- "VisiumHD_HumanColon_Oliveira"
pa <- OSTA.data_load(id)
## Requesting folder 'binned_outputs' from OSF
## Downloaded 9 file(s) from OSF folder 'binned_outputs'
dir.create(td <- tempfile())
unzip(pa, exdir=td)
# importing
obj <- TENxVisiumHD(
spacerangerOut=td,
images="lowres",
format="h5")
(spe <- import(obj))
## class: SpatialExperiment
## dim: 18085 545913
## metadata(2): resources spatialList
## assays(1): counts
## rownames(18085): ENSG00000187634 ENSG00000188976 ... ENSG00000198695
## ENSG00000198727
## rowData names(3): ID Symbol Type
## colnames(545913): s_008um_00301_00321-1 s_008um_00526_00291-1 ...
## s_008um_00353_00477-1 s_008um_00595_00611-1
## colData names(6): barcode in_tissue ... bin_size sample_id
## reducedDimNames(0):
## mainExpName: Gene Expression
## altExpNames(0):
## spatialCoords names(2) : pxl_col_in_fullres pxl_row_in_fullres
## imgData names(4): sample_id image_id data scaleFactor
# retrieval
id <- "Chromium_HumanBreast_Janesick"
pa <- OSTA.data_load(id)
dir.create(td <- tempfile())
unzip(pa, exdir=td)
# importing
h5 <- list.files(td, "h5$", full.names=TRUE)
(sce <- read10xCounts(h5))
## class: SingleCellExperiment
## dim: 18082 30365
## metadata(1): Samples
## assays(1): counts
## rownames(18082): ENSG00000187634 ENSG00000188976 ... ENSG00000198695
## ENSG00000198727
## rowData names(3): ID Symbol Type
## colnames: NULL
## colData names(2): Sample Barcode
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
sessionInfo()
## R Under development (unstable) (2025-02-19 r87757)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.2 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.21-bioc/R/lib/libRblas.so
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0 LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] SpatialExperimentIO_0.99.8 DropletUtils_1.27.2
## [3] OSTA.data_0.99.2 VisiumIO_1.3.5
## [5] TENxIO_1.9.3 SingleCellExperiment_1.29.2
## [7] SummarizedExperiment_1.37.0 Biobase_2.67.0
## [9] GenomicRanges_1.59.1 GenomeInfoDb_1.43.4
## [11] IRanges_2.41.3 S4Vectors_0.45.4
## [13] BiocGenerics_0.53.6 generics_0.1.3
## [15] MatrixGenerics_1.19.1 matrixStats_1.5.0
## [17] BiocStyle_2.35.0
##
## loaded via a namespace (and not attached):
## [1] DBI_1.2.3 rlang_1.1.5
## [3] magrittr_2.0.3 compiler_4.5.0
## [5] RSQLite_2.3.9 DelayedMatrixStats_1.29.1
## [7] vctrs_0.6.5 httpcode_0.3.0
## [9] pkgconfig_2.0.3 SpatialExperiment_1.17.0
## [11] crayon_1.5.3 fastmap_1.2.0
## [13] dbplyr_2.5.0 magick_2.8.5
## [15] XVector_0.47.2 scuttle_1.17.0
## [17] rmarkdown_2.29 tzdb_0.4.0
## [19] UCSC.utils_1.3.1 purrr_1.0.4
## [21] bit_4.6.0 xfun_0.51
## [23] cachem_1.1.0 beachmat_2.23.6
## [25] jsonlite_1.9.1 blob_1.2.4
## [27] rhdf5filters_1.19.2 DelayedArray_0.33.6
## [29] Rhdf5lib_1.29.1 BiocParallel_1.41.2
## [31] parallel_4.5.0 R6_2.6.1
## [33] bslib_0.9.0 stringi_1.8.4
## [35] limma_3.63.8 jquerylib_0.1.4
## [37] Rcpp_1.0.14 bookdown_0.42
## [39] assertthat_0.2.1 knitr_1.49
## [41] triebeard_0.4.1 R.utils_2.13.0
## [43] readr_2.1.5 BiocBaseUtils_1.9.0
## [45] Matrix_1.7-2 tidyselect_1.2.1
## [47] abind_1.4-8 yaml_2.3.10
## [49] codetools_0.2-20 curl_6.2.1
## [51] lattice_0.22-6 tibble_3.2.1
## [53] withr_3.0.2 evaluate_1.0.3
## [55] BiocFileCache_2.15.1 pillar_1.10.1
## [57] BiocManager_1.30.25 filelock_1.0.3
## [59] hms_1.1.3 sparseMatrixStats_1.19.0
## [61] glue_1.8.0 tools_4.5.0
## [63] BiocIO_1.17.1 data.table_1.17.0
## [65] locfit_1.5-9.12 fs_1.6.5
## [67] rhdf5_2.51.2 grid_4.5.0
## [69] urltools_1.7.3 edgeR_4.5.8
## [71] GenomeInfoDbData_1.2.13 HDF5Array_1.35.15
## [73] cli_3.6.4 S4Arrays_1.7.3
## [75] arrow_19.0.1 dplyr_1.1.4
## [77] R.methodsS3_1.8.2 sass_0.4.9
## [79] digest_0.6.37 SparseArray_1.7.6
## [81] crul_1.5.0 dqrng_0.4.1
## [83] osfr_0.2.9 rjson_0.2.23
## [85] memoise_2.0.1 htmltools_0.5.8.1
## [87] R.oo_1.27.0 lifecycle_1.0.4
## [89] h5mread_0.99.4 httr_1.4.7
## [91] statmod_1.5.0 bit64_4.6.0-1