Marker weights improve cell type assignment
marker-weights-improves-results.Rmd
library(hitype)
#> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
#> which was just loaded, were retired in October 2023.
#> Please refer to R-spatial evolution reports for details, especially
#> https://r-spatial.org/r/2023/05/15/evolution4.html.
#> It may be desirable to make the sf package available;
#> package maintainers should consider adding sf to Suggests:.
Unweighted markers:
markers <- data.frame(
cellName = c(
"Naive CD4+ T", "CD14+ Mono", "Memory CD4+", "B",
"CD8+ T", "FCFR3A+ Mono", "NK", "DC", "Platelet"
),
geneSymbolmore1 = c(
"IL7R,CCR7", "CD14,LYZ", "IL7R,S100A4", "MS4A1",
"CD8A", "FCGR3A,MS4A7", "GNLY,NKG7", "FCER1A,CST3", "PPBP"
),
geneSymbolmore2 = rep("", 9)
)
markers
#> cellName geneSymbolmore1 geneSymbolmore2
#> 1 Naive CD4+ T IL7R,CCR7
#> 2 CD14+ Mono CD14,LYZ
#> 3 Memory CD4+ IL7R,S100A4
#> 4 B MS4A1
#> 5 CD8+ T CD8A
#> 6 FCFR3A+ Mono FCGR3A,MS4A7
#> 7 NK GNLY,NKG7
#> 8 DC FCER1A,CST3
#> 9 Platelet PPBP
Prepare pbmc
data:
pbmc <- pbmc3k.SeuratData::pbmc3k
pbmc[["percent.mt"]] <- Seurat::PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- Seurat::NormalizeData(pbmc)
pbmc <- Seurat::FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
pbmc <- Seurat::ScaleData(pbmc, features = rownames(pbmc))
pbmc <- Seurat::RunPCA(pbmc, features = Seurat::VariableFeatures(object = pbmc))
pbmc <- Seurat::FindNeighbors(pbmc, dims = 1:10)
pbmc <- Seurat::FindClusters(pbmc, resolution = 0.5)
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#>
#> Number of nodes: 2638
#> Number of edges: 95927
#>
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.8728
#> Number of communities: 9
#> Elapsed time: 0 seconds
pbmc <- Seurat::RunUMAP(pbmc, dims = 1:10)
new_cluster_ids <- markers$cellName
names(new_cluster_ids) <- levels(pbmc)
pbmc <- Seurat::RenameIdents(pbmc, new_cluster_ids)
Use unweighted markers:
library(hitype)
gs <- gs_prepare(markers)
scores <- hitype_score(pbmc@assays$RNA@data, gs)
assigned <- hitype_assign(pbmc$seurat_clusters, scores, gs, threshold = 0.01)
out <- summary(assigned)
out$ManualAssignment <- markers$cellName
out
#> # A tibble: 9 × 5
#> Level Cluster CellType Score ManualAssignment
#> <int> <fct> <chr> <dbl> <chr>
#> 1 1 0 Naive CD4+ T 0.162 Naive CD4+ T
#> 2 1 1 CD14+ Mono 0.265 CD14+ Mono
#> 3 1 2 Memory CD4+ 0.154 Memory CD4+
#> 4 1 3 B 0.253 B
#> 5 1 4 NK 0.209 CD8+ T
#> 6 1 5 FCFR3A+ Mono 0.341 FCFR3A+ Mono
#> 7 1 6 NK 0.374 NK
#> 8 1 7 DC 0.483 DC
#> 9 1 8 Platelet 0.876 Platelet
Use trained weights:
gs <- gs_prepare(hitypedb_pbmc3k)
scores <- hitype_score(pbmc@assays$RNA@data, gs)
assigned <- hitype_assign(pbmc$seurat_clusters, scores, gs, threshold = 0.01)
out <- summary(assigned)
out$ManualAssignment <- markers$cellName
out
#> # A tibble: 9 × 5
#> Level Cluster CellType Score ManualAssignment
#> <int> <fct> <chr> <dbl> <chr>
#> 1 1 0 Naive CD4+ T 0.0798 Naive CD4+ T
#> 2 1 1 CD14+ Mono 0.124 CD14+ Mono
#> 3 1 2 Memory CD4+ 0.0873 Memory CD4+
#> 4 1 3 B 0.106 B
#> 5 1 4 CD8+ T 0.146 CD8+ T
#> 6 1 5 FCFR3A+ Mono 0.280 FCFR3A+ Mono
#> 7 1 6 NK 0.195 NK
#> 8 1 7 DC 0.435 DC
#> 9 1 8 Platelet 0.868 Platelet
Use unweighted markers on ifnb
dataset:
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dplyr))
ifnb <- ifnb.SeuratData::ifnb
# Keep only the cells that exist in the pbmc dataset
ifnb <- subset(ifnb, subset = seurat_annotations %in% c(
"CD14 Mono",
"DC",
"CD4 Memory T",
"CD4 Naive T",
"CD8 T",
"B",
"NK"
))
ifnb@meta.data <- ifnb@meta.data %>%
mutate(seurat_clusters = case_when(
seurat_annotations == "CD14 Mono" ~ "CD14+ Mono",
seurat_annotations == "CD4 Memory T" ~ "Memory CD4+",
seurat_annotations == "CD4 Naive T" ~ "Naive CD4+ T",
seurat_annotations == "CD8 T" ~ "CD8+ T",
TRUE ~ seurat_annotations
))
gs <- gs_prepare(markers)
scores <- hitype_score(ifnb@assays$RNA@data, gs)
assigned <- hitype_assign(ifnb$seurat_clusters, scores, gs, threshold = 0.01)
summary(assigned)
#> # A tibble: 7 × 4
#> Level Cluster CellType Score
#> <int> <chr> <chr> <dbl>
#> 1 1 B B 0.0494
#> 2 1 CD14+ Mono FCFR3A+ Mono 0.0292
#> 3 1 CD8+ T CD8+ T 0.0460
#> 4 1 DC DC 0.0680
#> 5 1 Memory CD4+ Naive CD4+ T 0.0182
#> 6 1 NK NK 0.0959
#> 7 1 Naive CD4+ T Naive CD4+ T 0.0242
Use trained weights on ifnb
dataset:
gs <- gs_prepare(hitypedb_pbmc3k)
scores <- hitype_score(ifnb@assays$RNA@data, gs)
assigned <- hitype_assign(ifnb$seurat_clusters, scores, gs, threshold = 0.01)
summary(assigned)
#> # A tibble: 7 × 4
#> Level Cluster CellType Score
#> <int> <chr> <chr> <dbl>
#> 1 1 B B 0.0218
#> 2 1 CD14+ Mono FCFR3A+ Mono 0.0238
#> 3 1 CD8+ T CD8+ T 0.0421
#> 4 1 DC DC 0.0583
#> 5 1 Memory CD4+ Memory CD4+ 0.0121
#> 6 1 NK NK 0.0514
#> 7 1 Naive CD4+ T Naive CD4+ T 0.0143