## To look at our seurat object
str(pbmc)
## To access the meta.data slot
pbmc@meta.data
## meta.data contains cell metadata identified by cell barcode, currently there is nFeatures and nCounts
## the actual count data can be found by which is what we had in `pbmc.data` lots of accessors here!
pbmc@assays$RNA@counts
# or
pbmc$RNA@counts
# or
pbmc[["RNA"]]@counts
## this is the data object in pbmc.data but is now stored within the seurat object
pbmc@assays$RNA@counts[c("CD3D","TCL1A","MS4A1"), 1:30]
# The PBMC dataset is a gene-expression dataset and is stored in an assay called `RNA`. What other types of assays could we have stored in a Seurat object if we had a different type of dataset?
# We could have a ATAC assay if we had done single cell ATAC
# We could have a HTO assay if we'd done cell hashtagging
# We could have a CSP assay if we captured cell surface protein data
# Show QC metrics for the first 5 cells
head(pbmc@meta.data, 5)
pbmc$percent.riboL <- PercentageFeatureSet(pbmc, pattern = "^RPL")
pbmc$percent.riboS <- PercentageFeatureSet(pbmc, pattern = "^RPS")
plot1 <- FeatureScatter(pbmc, feature1 = "percent.riboS", feature2 = "percent.riboL")
plot1
The large and small ribosomal subunit genes are correlated within cell.
What about with mitochondria and gene, feature counts?
plot2 <- FeatureScatter(pbmc, feature1 = "percent.riboL", feature2 = "percent.mt")
plot2
There are cells with low ribosome and low mitochondrial gene percentages, and some outliers too (low ribo, high mt).
These are the cells you may want to exclude.
To highlight cells with very low percentage of ribosomal genes,
create a new column in the meta.data table and with
FeatureScatter
make a plot of the RNA count and
mitochondrial percentage with the cells with very low ribosomal gene
perentage.
pbmc$lowRiboL <- pbmc$percent.riboL <= 5
plot1 <- FeatureScatter(pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "lowRiboL")
plot1
# earlier we created a variable genes plot
plot1 <- VariableFeaturePlot(pbmc)
# create a vector of genes of interest
goi <- c("IL8", "IDH2", "CXCL3")
# now we add the labels we want
plot3 <- LabelPoints(plot = plot1, points = goi, repel = TRUE)
plot3
Setting the resolution to 0.05 produces less clusters:
pbmc2 <- FindClusters(pbmc, resolution = 0.05)
DimPlot(pbmc2, reduction = 'pca', dims=c(1,2))
DimPlot(pbmc2, reduction = 'umap')
Using only the first two principal components, the clusters look muddled in the UMAP:
pbmc3 <- FindNeighbors(pbmc, dims = 1:2)
pbmc3 <- FindClusters(pbmc3, resolution = 0.5)
DimPlot(pbmc3, reduction = 'pca', dims=c(1,2))
DimPlot(pbmc3, reduction = 'umap')
See if you can annotate the data with the fine labels from the Monoco reference dataset and whether it improves the cell type annotation resolution.
monaco <- celldex::MonacoImmuneData()
monaco.pred.fine <- SingleR::SingleR(
test = sce, ref = monaco, labels = monaco$label.fine)
pbmc$monaco_fine <- monaco.pred.fine$labels
DimPlot(pbmc, reduction='umap', group.by='monaco_fine')
Do you lose any groups?