Skip to contents

You need to install the suggested packages for this vignette:

install.packages("keras")
devtools::install_github("pwwang/hitype")

keras::install_keras()
torch::install_torch()

To train the weights for the markers, firstly, we have to have the markers in the original ScType format:

markers <- data.frame(
    cellName = c(
        "Naive CD4+ T", "CD14+ Mono", "Memory CD4+", "B",
        "CD8+ T", "FCFR3A+ Mono", "NK", "DC", "Platelet"
    ),
    geneSymbolmore1 = c(
        "IL7R,CCR7",  "CD14,LYZ", "IL7R,S100A4", "MS4A1",
        "CD8A", "FCGR3A,MS4A7", "GNLY,NKG7", "FCER1A,CST3", "PPBP"
    ),
    geneSymbolmore2 = rep("", 9)
)

markers
#>       cellName geneSymbolmore1 geneSymbolmore2
#> 1 Naive CD4+ T       IL7R,CCR7                
#> 2   CD14+ Mono        CD14,LYZ                
#> 3  Memory CD4+     IL7R,S100A4                
#> 4            B           MS4A1                
#> 5       CD8+ T            CD8A                
#> 6 FCFR3A+ Mono    FCGR3A,MS4A7                
#> 7           NK       GNLY,NKG7                
#> 8           DC     FCER1A,CST3                
#> 9     Platelet            PPBP

Prepare the data for training:

pbmc <- pbmc3k.SeuratData::pbmc3k
pbmc[["percent.mt"]] <- Seurat::PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- Seurat::NormalizeData(pbmc)
pbmc <- Seurat::FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
pbmc <- Seurat::ScaleData(pbmc, features = rownames(pbmc))
pbmc <- Seurat::RunPCA(pbmc, features = Seurat::VariableFeatures(object = pbmc))
pbmc <- Seurat::FindNeighbors(pbmc, dims = 1:10)
pbmc <- Seurat::FindClusters(pbmc, resolution = 0.5)
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#> 
#> Number of nodes: 2638
#> Number of edges: 95927
#> 
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.8728
#> Number of communities: 9
#> Elapsed time: 0 seconds
pbmc <- Seurat::RunUMAP(pbmc, dims = 1:10)
new_cluster_ids <- markers$cellName
names(new_cluster_ids) <- levels(pbmc)
pbmc <- Seurat::RenameIdents(pbmc, new_cluster_ids)
library(hitype)

weights <- train_weights(path_to_gs = markers, exprs = pbmc)
#> Epoch 1/20
#> 
 1/58 [..............................] - ETA: 48s - loss: 2.2416 - accuracy: 0.1250
35/58 [=================>............] - ETA: 0s - loss: 1.8408 - accuracy: 0.4214 
58/58 [==============================] - 1s 6ms/step - loss: 1.6498 - accuracy: 0.5008 - val_loss: 1.1092 - val_accuracy: 0.7254
#> Epoch 2/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 1.2412 - accuracy: 0.7188
36/58 [=================>............] - ETA: 0s - loss: 0.9561 - accuracy: 0.7179
58/58 [==============================] - 0s 4ms/step - loss: 0.8765 - accuracy: 0.7277 - val_loss: 0.6174 - val_accuracy: 0.7727
#> Epoch 3/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.7128 - accuracy: 0.7812
35/58 [=================>............] - ETA: 0s - loss: 0.6177 - accuracy: 0.7643
58/58 [==============================] - 0s 2ms/step - loss: 0.5821 - accuracy: 0.7802 - val_loss: 0.4683 - val_accuracy: 0.8333
#> Epoch 4/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.4523 - accuracy: 0.8750
36/58 [=================>............] - ETA: 0s - loss: 0.4579 - accuracy: 0.8281
58/58 [==============================] - 0s 2ms/step - loss: 0.4334 - accuracy: 0.8457 - val_loss: 0.3717 - val_accuracy: 0.9318
#> Epoch 5/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.5769 - accuracy: 0.8438
35/58 [=================>............] - ETA: 0s - loss: 0.3458 - accuracy: 0.9062
58/58 [==============================] - 0s 3ms/step - loss: 0.3434 - accuracy: 0.9101 - val_loss: 0.2932 - val_accuracy: 0.9223
#> Epoch 6/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.3529 - accuracy: 0.9062
35/58 [=================>............] - ETA: 0s - loss: 0.2698 - accuracy: 0.9339
58/58 [==============================] - 0s 3ms/step - loss: 0.2655 - accuracy: 0.9361 - val_loss: 0.2240 - val_accuracy: 0.9470
#> Epoch 7/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.1739 - accuracy: 1.0000
35/58 [=================>............] - ETA: 0s - loss: 0.2130 - accuracy: 0.9491
58/58 [==============================] - 0s 3ms/step - loss: 0.1987 - accuracy: 0.9518 - val_loss: 0.1716 - val_accuracy: 0.9545
#> Epoch 8/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.1628 - accuracy: 0.9688
36/58 [=================>............] - ETA: 0s - loss: 0.1599 - accuracy: 0.9635
58/58 [==============================] - 0s 3ms/step - loss: 0.1507 - accuracy: 0.9664 - val_loss: 0.1379 - val_accuracy: 0.9545
#> Epoch 9/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.1102 - accuracy: 0.9688
36/58 [=================>............] - ETA: 0s - loss: 0.1191 - accuracy: 0.9722
58/58 [==============================] - 0s 3ms/step - loss: 0.1143 - accuracy: 0.9718 - val_loss: 0.1080 - val_accuracy: 0.9621
#> Epoch 10/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0945 - accuracy: 0.9688
36/58 [=================>............] - ETA: 0s - loss: 0.0949 - accuracy: 0.9800
58/58 [==============================] - 0s 3ms/step - loss: 0.0934 - accuracy: 0.9800 - val_loss: 0.0858 - val_accuracy: 0.9943
#> Epoch 11/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0684 - accuracy: 1.0000
36/58 [=================>............] - ETA: 0s - loss: 0.0835 - accuracy: 0.9835
58/58 [==============================] - 0s 2ms/step - loss: 0.0795 - accuracy: 0.9832 - val_loss: 0.0776 - val_accuracy: 0.9924
#> Epoch 12/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0661 - accuracy: 1.0000
36/58 [=================>............] - ETA: 0s - loss: 0.0632 - accuracy: 0.9931
58/58 [==============================] - 0s 3ms/step - loss: 0.0598 - accuracy: 0.9919 - val_loss: 0.0568 - val_accuracy: 0.9943
#> Epoch 13/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.1411 - accuracy: 0.9688
37/58 [==================>...........] - ETA: 0s - loss: 0.0537 - accuracy: 0.9924
58/58 [==============================] - 0s 3ms/step - loss: 0.0502 - accuracy: 0.9930 - val_loss: 0.0480 - val_accuracy: 0.9962
#> Epoch 14/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0669 - accuracy: 0.9688
36/58 [=================>............] - ETA: 0s - loss: 0.0471 - accuracy: 0.9922
58/58 [==============================] - 0s 3ms/step - loss: 0.0459 - accuracy: 0.9935 - val_loss: 0.0397 - val_accuracy: 0.9962
#> Epoch 15/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0073 - accuracy: 1.0000
35/58 [=================>............] - ETA: 0s - loss: 0.0362 - accuracy: 0.9955
58/58 [==============================] - 0s 2ms/step - loss: 0.0364 - accuracy: 0.9962 - val_loss: 0.0349 - val_accuracy: 0.9962
#> Epoch 16/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0431 - accuracy: 0.9688
23/58 [==========>...................] - ETA: 0s - loss: 0.0300 - accuracy: 0.9946
58/58 [==============================] - 0s 3ms/step - loss: 0.0323 - accuracy: 0.9946 - val_loss: 0.0312 - val_accuracy: 0.9962
#> Epoch 17/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0193 - accuracy: 1.0000
36/58 [=================>............] - ETA: 0s - loss: 0.0310 - accuracy: 0.9965
58/58 [==============================] - 0s 3ms/step - loss: 0.0291 - accuracy: 0.9968 - val_loss: 0.0288 - val_accuracy: 0.9962
#> Epoch 18/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0608 - accuracy: 0.9688
37/58 [==================>...........] - ETA: 0s - loss: 0.0272 - accuracy: 0.9958
58/58 [==============================] - 0s 3ms/step - loss: 0.0302 - accuracy: 0.9935 - val_loss: 0.0266 - val_accuracy: 0.9981
#> Epoch 19/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0101 - accuracy: 1.0000
35/58 [=================>............] - ETA: 0s - loss: 0.0213 - accuracy: 0.9982
58/58 [==============================] - 0s 3ms/step - loss: 0.0221 - accuracy: 0.9978 - val_loss: 0.0245 - val_accuracy: 0.9981
#> Epoch 20/20
#> 
 1/58 [..............................] - ETA: 0s - loss: 0.0078 - accuracy: 1.0000
28/58 [=============>................] - ETA: 0s - loss: 0.0152 - accuracy: 0.9978
58/58 [==============================] - 0s 3ms/step - loss: 0.0195 - accuracy: 0.9968 - val_loss: 0.0237 - val_accuracy: 0.9981
#> Evaluating on test data
#> 
1/9 [==>...........................] - ETA: 0s - loss: 0.0046 - accuracy: 1.0000
9/9 [==============================] - 0s 1ms/step - loss: 0.0066 - accuracy: 1.0000
#> 
[1m
[22mSkipping Masking ...
#> 
[1m
[22mSkipping Dropout ...
#> 
[1m
[22mSkipping Dropout ...
#> 
[38;5;246m# A tibble: 9 × 4
[39m
#>   Level Cluster      CellType     Score
#>   
[3m
[38;5;246m<dbl>
[39m
[23m 
[3m
[38;5;246m<fct>
[39m
[23m        
[3m
[38;5;246m<chr>
[39m
[23m        
[3m
[38;5;246m<dbl>
[39m
[23m
#> 
[38;5;250m1
[39m     1 Naive CD4+ T Naive CD4+ T 0.113
#> 
[38;5;250m2
[39m     1 CD14+ Mono   CD14+ Mono   0.204
#> 
[38;5;250m3
[39m     1 Memory CD4+  Memory CD4+  0.140
#> 
[38;5;250m4
[39m     1 B            B            0.195
#> 
[38;5;250m5
[39m     1 CD8+ T       CD8+ T       0.241
#> 
[38;5;250m6
[39m     1 FCFR3A+ Mono FCFR3A+ Mono 0.461
#> 
[38;5;250m7
[39m     1 NK           NK           0.291
#> 
[38;5;250m8
[39m     1 DC           DC           0.811
#> 
[38;5;250m9
[39m     1 Platelet     Platelet     0.859
weights
#>       cellName level geneSymbolmore2       geneSymbolmore1
#> 1            B     1                               MS4A1++
#> 2   CD14+ Mono     1                           CD14++,LYZ+
#> 3       CD8+ T     1                             CD8A+++++
#> 4           DC     1                  FCER1A+++++,CST3++++
#> 5 FCFR3A+ Mono     1                 FCGR3A+++++,MS4A7++++
#> 6  Memory CD4+     1                      IL7R++++,S100A4+
#> 7 Naive CD4+ T     1                           IL7R+,CCR7+
#> 8           NK     1                         GNLY++,NKG7++
#> 9     Platelet     1                              PPBP++++

Use the trained weights on ifnb dataset:

suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dplyr))

ifnb <- ifnb.SeuratData::ifnb

# Keep only the cells that exist in the pbmc dataset
ifnb <- subset(ifnb, subset = seurat_annotations %in% c(
  "CD14 Mono",
  "DC",
  "CD4 Memory T",
  "CD4 Naive T",
  "CD8 T",
  "B",
  "NK"
))

ifnb@meta.data <- ifnb@meta.data %>%
  mutate(seurat_clusters = case_when(
    seurat_annotations == "CD14 Mono" ~ "CD14+ Mono",
    seurat_annotations == "CD4 Memory T" ~ "Memory CD4+",
    seurat_annotations == "CD4 Naive T" ~ "Naive CD4+ T",
    seurat_annotations == "CD8 T" ~ "CD8+ T",
    TRUE ~ seurat_annotations
  ))

gs <- gs_prepare(weights)

scores <- hitype_score(ifnb@assays$RNA@data, gs)

assigned <- hitype_assign(ifnb$seurat_clusters, scores, gs, threshold = 0.01)

summary(assigned)
#> # A tibble: 7 × 4
#>   Level Cluster      CellType      Score
#>   <int> <chr>        <chr>         <dbl>
#> 1     1 B            B            0.0278
#> 2     1 CD14+ Mono   FCFR3A+ Mono 0.0238
#> 3     1 CD8+ T       CD8+ T       0.0421
#> 4     1 DC           DC           0.0583
#> 5     1 Memory CD4+  Memory CD4+  0.0121
#> 6     1 NK           NK           0.0514
#> 7     1 Naive CD4+ T Naive CD4+ T 0.0133