Orchestrating Single-Cell Analysis with Bioconductor

OSCA – an amazing resource!

Differential expression

This script is derived from the OSCA Multi-Sample book, Chapter 4: DE analyses between conditions. See the book for full details.

Initial analysis

## load data
sce.chimera <- WTChimeraData(samples=5:10)
#> class: SingleCellExperiment 
#> dim: 29453 20935 
#> metadata(0):
#> assays(1): counts
#> rownames(29453): ENSMUSG00000051951 ENSMUSG00000089699 ...
#>   ENSMUSG00000095742 tomato-td
#> rowData names(2): ENSEMBL SYMBOL
#> colnames(20935): cell_9769 cell_9770 ... cell_30702 cell_30703
#> colData names(11): cell barcode ... doub.density sizeFactor
#> reducedDimNames(2): pca.corrected.E7.5 pca.corrected.E8.5
#> mainExpName: NULL
#> altExpNames(0):

## feature annotation
rownames(sce.chimera) <- uniquifyFeatureNames(
    rowData(sce.chimera)$ENSEMBL, rowData(sce.chimera)$SYMBOL

## qc
drop <- sce.chimera$celltype.mapped %in% c("stripped", "Doublet")
sce.chimera <- sce.chimera[,!drop]

## normalization
sce.chimera <- logNormCounts(sce.chimera)

## variance modeling
dec.chimera <- modelGeneVar(sce.chimera, block=sce.chimera$sample)
chosen.hvgs <- dec.chimera$bio > 0

## merge batches
    merged <- correctExperiments(
                list(1,3,5), # WT (3 replicates)
                list(2,4,6)  # td-Tomato (3 replicates)

## cluster
g <- buildSNNGraph(merged, use.dimred="corrected")
clusters <- igraph::cluster_louvain(g)
colLabels(merged) <- factor(clusters$membership)

## dimensionality reduction
merged <- runTSNE(merged, dimred="corrected", external_neighbors=TRUE)
merged <- runUMAP(merged, dimred="corrected", external_neighbors=TRUE)

Initial observations – labels are similar, whether a ‘td-Tomato’ positive cell or not.

colData(merged) |>
    dplyr::as_tibble() |>
    dplyr::count(label, tomato) |>
    tidyr::pivot_wider(names_from = "tomato", values_from = "n")
#> # A tibble: 26 × 3
#>    label `FALSE` `TRUE`
#>    <fct>   <int>  <int>
#>  1 1         129    264
#>  2 2         363    512
#>  3 3         417    632
#>  4 4        1255    973
#>  5 5        1444    753
#>  6 6         868   1092
#>  7 7         542    398
#>  8 8         476    405
#>  9 9         269    333
#> 10 10        608    604
#> # ℹ 16 more rows

plotTSNE(merged, colour_by="tomato", text_by="label", point_size = .1)

Create pseudo-bulk samples

# Using 'label' and 'sample' as our two factors; each column of the output
# corresponds to one unique combination of these two factors.
summed <- aggregateAcrossCells(
    id=colData(merged)[, c("celltype.mapped", "sample")]
#> class: SingleCellExperiment 
#> dim: 14699 186 
#> metadata(2):
#> assays(1): counts
#> rownames(14699): Xkr4 Rp1 ... Vmn2r122 CAAA01147332.1
#> rowData names(3): rotation ENSEMBL SYMBOL
#> colnames: NULL
#> colData names(16): batch cell ... sample ncells
#> reducedDimNames(5): corrected pca.corrected.E7.5 pca.corrected.E8.5
#> mainExpName: NULL
#> altExpNames(0):


Check out the following for a careful and comprehensive evaluation of single-cell differential expression methods:

Soneson, C., Robinson, M. Bias, robustness and scalability in single-cell differential expression analysis. Nat Methods 15, 255–261 (2018).

