Skip to contents
library(hitype)
#> The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
#> which was just loaded, were retired in October 2023.
#> Please refer to R-spatial evolution reports for details, especially
#> https://r-spatial.org/r/2023/05/15/evolution4.html.
#> It may be desirable to make the sf package available;
#> package maintainers should consider adding sf to Suggests:.

Unweighted markers:

markers <- data.frame(
    cellName = c(
        "Naive CD4+ T", "CD14+ Mono", "Memory CD4+", "B",
        "CD8+ T", "FCFR3A+ Mono", "NK", "DC", "Platelet"
    ),
    geneSymbolmore1 = c(
        "IL7R,CCR7",  "CD14,LYZ", "IL7R,S100A4", "MS4A1",
        "CD8A", "FCGR3A,MS4A7", "GNLY,NKG7", "FCER1A,CST3", "PPBP"
    ),
    geneSymbolmore2 = rep("", 9)
)

markers
#>       cellName geneSymbolmore1 geneSymbolmore2
#> 1 Naive CD4+ T       IL7R,CCR7                
#> 2   CD14+ Mono        CD14,LYZ                
#> 3  Memory CD4+     IL7R,S100A4                
#> 4            B           MS4A1                
#> 5       CD8+ T            CD8A                
#> 6 FCFR3A+ Mono    FCGR3A,MS4A7                
#> 7           NK       GNLY,NKG7                
#> 8           DC     FCER1A,CST3                
#> 9     Platelet            PPBP

Prepare pbmc data:

pbmc <- pbmc3k.SeuratData::pbmc3k
pbmc[["percent.mt"]] <- Seurat::PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- Seurat::NormalizeData(pbmc)
pbmc <- Seurat::FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
pbmc <- Seurat::ScaleData(pbmc, features = rownames(pbmc))
pbmc <- Seurat::RunPCA(pbmc, features = Seurat::VariableFeatures(object = pbmc))
pbmc <- Seurat::FindNeighbors(pbmc, dims = 1:10)
pbmc <- Seurat::FindClusters(pbmc, resolution = 0.5)
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 2638
#> Number of edges: 95927
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.8728
#> Number of communities: 9
#> Elapsed time: 0 seconds
pbmc <- Seurat::RunUMAP(pbmc, dims = 1:10)
new_cluster_ids <- markers$cellName
names(new_cluster_ids) <- levels(pbmc)
pbmc <- Seurat::RenameIdents(pbmc, new_cluster_ids)

Use unweighted markers:

library(hitype)

gs <- gs_prepare(markers)
scores <- hitype_score(pbmc@assays$RNA@data, gs)
assigned <- hitype_assign(pbmc$seurat_clusters, scores, gs, threshold = 0.01)
out <- summary(assigned)
out$ManualAssignment <- markers$cellName
out
#> # A tibble: 9 × 5
#>   Level Cluster CellType     Score ManualAssignment
#>   <int> <fct>   <chr>        <dbl> <chr>           
#> 1     1 0       Naive CD4+ T 0.162 Naive CD4+ T    
#> 2     1 1       CD14+ Mono   0.265 CD14+ Mono      
#> 3     1 2       Memory CD4+  0.154 Memory CD4+     
#> 4     1 3       B            0.253 B               
#> 5     1 4       NK           0.209 CD8+ T          
#> 6     1 5       FCFR3A+ Mono 0.341 FCFR3A+ Mono    
#> 7     1 6       NK           0.374 NK              
#> 8     1 7       DC           0.483 DC              
#> 9     1 8       Platelet     0.876 Platelet

Use trained weights:

gs <- gs_prepare(hitypedb_pbmc3k)
scores <- hitype_score(pbmc@assays$RNA@data, gs)
assigned <- hitype_assign(pbmc$seurat_clusters, scores, gs, threshold = 0.01)
out <- summary(assigned)
out$ManualAssignment <- markers$cellName
out
#> # A tibble: 9 × 5
#>   Level Cluster CellType      Score ManualAssignment
#>   <int> <fct>   <chr>         <dbl> <chr>           
#> 1     1 0       Naive CD4+ T 0.0798 Naive CD4+ T    
#> 2     1 1       CD14+ Mono   0.124  CD14+ Mono      
#> 3     1 2       Memory CD4+  0.0873 Memory CD4+     
#> 4     1 3       B            0.106  B               
#> 5     1 4       CD8+ T       0.146  CD8+ T          
#> 6     1 5       FCFR3A+ Mono 0.280  FCFR3A+ Mono    
#> 7     1 6       NK           0.195  NK              
#> 8     1 7       DC           0.435  DC              
#> 9     1 8       Platelet     0.868  Platelet

Use unweighted markers on ifnb dataset:

suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dplyr))

ifnb <- ifnb.SeuratData::ifnb

# Keep only the cells that exist in the pbmc dataset
ifnb <- subset(ifnb, subset = seurat_annotations %in% c(
  "CD14 Mono",
  "DC",
  "CD4 Memory T",
  "CD4 Naive T",
  "CD8 T",
  "B",
  "NK"
))

ifnb@meta.data <- ifnb@meta.data %>%
  mutate(seurat_clusters = case_when(
    seurat_annotations == "CD14 Mono" ~ "CD14+ Mono",
    seurat_annotations == "CD4 Memory T" ~ "Memory CD4+",
    seurat_annotations == "CD4 Naive T" ~ "Naive CD4+ T",
    seurat_annotations == "CD8 T" ~ "CD8+ T",
    TRUE ~ seurat_annotations
  ))

gs <- gs_prepare(markers)

scores <- hitype_score(ifnb@assays$RNA@data, gs)

assigned <- hitype_assign(ifnb$seurat_clusters, scores, gs, threshold = 0.01)

summary(assigned)
#> # A tibble: 7 × 4
#>   Level Cluster      CellType      Score
#>   <int> <chr>        <chr>         <dbl>
#> 1     1 B            B            0.0494
#> 2     1 CD14+ Mono   FCFR3A+ Mono 0.0292
#> 3     1 CD8+ T       CD8+ T       0.0460
#> 4     1 DC           DC           0.0680
#> 5     1 Memory CD4+  Naive CD4+ T 0.0182
#> 6     1 NK           NK           0.0959
#> 7     1 Naive CD4+ T Naive CD4+ T 0.0242

Use trained weights on ifnb dataset:

gs <- gs_prepare(hitypedb_pbmc3k)

scores <- hitype_score(ifnb@assays$RNA@data, gs)

assigned <- hitype_assign(ifnb$seurat_clusters, scores, gs, threshold = 0.01)

summary(assigned)
#> # A tibble: 7 × 4
#>   Level Cluster      CellType      Score
#>   <int> <chr>        <chr>         <dbl>
#> 1     1 B            B            0.0218
#> 2     1 CD14+ Mono   FCFR3A+ Mono 0.0238
#> 3     1 CD8+ T       CD8+ T       0.0421
#> 4     1 DC           DC           0.0583
#> 5     1 Memory CD4+  Memory CD4+  0.0121
#> 6     1 NK           NK           0.0514
#> 7     1 Naive CD4+ T Naive CD4+ T 0.0143