SingleCellExperience类用于表示单细胞测序数据。它继承自RangedSummarizedExperiment类,并以相同的方式使用。此外,该类还支持通过reducedDims存储降维结果(如PCA、t-SNE),并通过altExps存储替代特征类型(如spike-ins)。
Usage
SingleCellExperiment( ..., reducedDims = list(), altExps = list(), rowPairs = list(), colPairs = list(), mainExpName = NULL )
1. 生成SingleCellExperiment对象
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("SingleCellExperiment")
library("SingleCellExperiment")
ls("package:SingleCellExperiment")
# 只有表达矩阵
counts <- matrix(rpois(100, lambda = 10), ncol=10, nrow=10)
sce <- SingleCellExperiment(counts)
sce <- SingleCellExperiment(list(counts=counts))
# 含有降纬数据
ncells <- 100
u <- matrix(rpois(20000, 5), ncol=ncells)
v <- log2(u + 1)
pca <- matrix(runif(ncells*5), ncells)
tsne <- matrix(rnorm(ncells*2), ncells)
sce <- SingleCellExperiment(assays=list(counts=u, logcounts=v),
reducedDims=SimpleList(PCA=pca, tSNE=tsne))
# 有SummarizedExperiment对象转化生成
se <- SummarizedExperiment(assays=list(counts=u, logcounts=v))
as(se, "SingleCellExperiment")
# 含有colData,rowData 以及metadata
pretend.cell.labels <- sample(letters, ncol(counts), replace=TRUE)
pretend.gene.lengths <- sample(10000, nrow(counts))
sce <- SingleCellExperiment(list(counts=counts),
colData=DataFrame(label=pretend.cell.labels),
rowData=DataFrame(length=pretend.gene.lengths),
metadata=list(study="GSE111111"))
2. 提取行列信息,表达矩阵等
library(scRNAseq)
sce <- ReprocessedAllenData("tophat_counts")
# 行:特征,列:单细胞名
dim(assay(sce))
dim(sce)
# 表达矩阵
assay(sce,1)
assays(sce)[[1]]
colnames(colData(sce))
rownames(rowData(sce))
# 元数据
metadata(sce)
# Alternative Experiment methods
altExpNames(sce)
altExp(sce, "ERCC")
altExp(sce, 1)
assay(altExp(sce, "ERCC"))
3. 取子集
library(scRNAseq)
sce <- MuraroPancreasData()
sce[1:2,1:3]
sce["A1BG__chr19",]
sce[,c("D28-1_1", "D28-1_2")]
# 也可以根据rowData(sce)以及colData(sce)进行选择
# 如果有NA,要去除
sce[,sce$donore=="D28"]
4. 其他
library(scRNAseq)
sce <- ReprocessedAllenData("tophat_counts")
## 1. 增加logcounts数据 norm + log
counts <- assay(sce, "tophat_counts")
libsizes <- colSums(counts)
size.factors <- libsizes/mean(libsizes)
logcounts(sce) <- log2(t(t(counts)/size.factors) + 1)
assayNames(sce)
## 2. 获得logcounts 表达谱数据 matrix
assay(sce, "logcounts")
logcounts(sce)
## 3. 增加PCA,TSNE数据
pca_data <- prcomp(t(logcounts(sce)), rank=50)
library(Rtsne)
set.seed(5252)
tsne_data <- Rtsne(pca_data$x[,1:50], pca = FALSE)
reducedDims(sce) <- list(PCA=pca_data$x, TSNE=tsne_data$Y)
sce
## 4. getting降纬数据
educedDim(sce, "PCA")
reducedDim(sce, "tSNE")
## 5. 增加Alternative Experiment 数据
## ----options, include=FALSE, echo=FALSE---------------------------------------
library(BiocStyle)
knitr::opts_chunk$set(warning=FALSE, error=FALSE, message=FALSE)
library(SingleCellExperiment)
counts <- matrix(rpois(100, lambda = 10), ncol=10, nrow=10)
sce <- SingleCellExperiment(counts)
# the same columns
altExp(sce, "Spike") <- SingleCellExperiment(matrix(rpois(20, lambda = 5), ncol=10, nrow=2))
altExp(sce, "Protein") <- SingleCellExperiment(matrix(rpois(50, lambda = 100), ncol=10, nrow=5))
altExp(sce, "CRISPR") <- SingleCellExperiment(matrix(rbinom(80, p=0.1, 1), ncol=10, nrow=8))
sce
## 6. 函数操作
## -----------------------------------------------------------------------------
totalCount <- function(x, i=1, multiplier=1, subset.row=NULL) {
mat <- assay(x, i)
if (!is.null(subset.row)) {
mat <- mat[subset.row,,drop=FALSE]
}
colSums(mat) * multiplier
}
## -----------------------------------------------------------------------------
totals <- applySCE(sce, FUN=totalCount)
totals
## -----------------------------------------------------------------------------
totals.manual <- list(
totalCount(sce),
Spike=totalCount(altExp(sce, "Spike")),
Protein=totalCount(altExp(sce, "Protein")),
CRISPR=totalCount(altExp(sce, "CRISPR"))
)
stopifnot(identical(totals, totals.manual))
## -----------------------------------------------------------------------------
totals10.manual <- list(
totalCount(sce, multiplier=10),
Spike=totalCount(altExp(sce, "Spike"), multiplier=10),
Protein=totalCount(altExp(sce, "Protein"), multiplier=10),
CRISPR=totalCount(altExp(sce, "CRISPR"), multiplier=10)
)
## -----------------------------------------------------------------------------
totals10.apply <- applySCE(sce, FUN=totalCount, multiplier=10)
stopifnot(identical(totals10.apply, totals10.manual))
## -----------------------------------------------------------------------------
totals10.lapply <- lapply(c(List(sce), altExps(sce)),
FUN=totalCount, multiplier=10)
stopifnot(identical(totals10.apply, totals10.lapply))
## -----------------------------------------------------------------------------
totals.custom <- applySCE(sce, FUN=totalCount, multiplier=10,
ALT.ARGS=list(Spike=list(subset.row=2),
Protein=list(subset.row=3:5)))
totals.custom
## -----------------------------------------------------------------------------
head.sce <- applySCE(sce, FUN=head, n=5)
head.sce
## -----------------------------------------------------------------------------
altExp(head.sce)
altExp(head.sce, "Protein")
altExp(head.sce, "CRISPR")
## -----------------------------------------------------------------------------
head.sce.list <- applySCE(sce, FUN=head, n=5, SIMPLIFY=FALSE)
head.sce.list
## -----------------------------------------------------------------------------
manual.head <- head(sce, n=5)
altExp(manual.head, "Spike") <- head(altExp(sce, "Spike"), n=5)
altExp(manual.head, "Protein") <- head(altExp(sce, "Protein"), n=5)
altExp(manual.head, "CRISPR") <- head(altExp(sce, "CRISPR"), n=5)
manual.head