Skip to contents

An R package for enrichment (over-representation) analysis of gene sets. It reproduces the results by clusterProfiler and enrichr, but with following features:

  • Offline (no internet connection required)
  • Analysis against custom gene sets (GMT files)
  • Both styles (clusterProfiler and enrichr) supported
  • Seemless integration with scplotter for visualization

Installation

You can install the development version of enrichit from GitHub with:

# install.packages("devtools")
devtools::install_github("pwwang/enrichit")
# or remotes::install_github("pwwang/enrichit")

Usage


library(enrichit)

data(userlist)
kegg_gmt <- system.file("extdata", "KEGG_2021_Human.gmt.gz", package = "enrichit")
hallmark_gmt <- system.file("extdata", "MSigDB_Hallmark_2020.gmt.gz", package = "enrichit")

EnrichIt(userlist, c(kegg_gmt, hallmark_gmt))
#                                        Term Overlap      P.value
# 202                    Pancreatic secretion  29/102 1.462598e-13
# 161    Maturity onset diabetes of the young   14/26 1.403986e-11
# 230        Protein digestion and absorption  20/103 9.254422e-07
# 178 Neuroactive ligand-receptor interaction  41/341 3.071673e-06
# 45    Carbohydrate digestion and absorption   11/47 4.410201e-05
# 87             Fat digestion and absorption   10/43 1.037564e-04
# 501                     Pancreas Beta Cells   19/40 5.821496e-14
# 49                        KRAS Signaling Dn  20/200 8.805898e-03
# 451                             Pperoxisome   7/104 3.681308e-01
# 441                    Bile Acid Metabolism   7/112 4.427207e-01
# 22                       Hedgehog Signaling    2/36 6.087590e-01
# 401                            Angiogenesis    2/36 6.087590e-01
# ... ...
#     Adjusted.P.value Odds.Ratio Combined.Score
# 202     3.510235e-11  6.9503571    205.4066273
# 161     1.684783e-09 20.0200000    500.2821950
# 230     7.403538e-05  4.1479048     57.6268164
# 178     1.843004e-04  2.3870157     30.2990782
# 45      2.116896e-03  5.2050857     52.2018318
# 87      4.150256e-03  5.1550271     47.2894589
# 501     1.921094e-12 15.6424970    476.6993715
# 49      1.452973e-01  1.8945006      8.9654086
# 451     9.999916e-01  1.2139642      1.2131352
# 441     9.999916e-01  1.1205931      0.9130774
# 22      9.999916e-01  0.9875048      0.4901311
# 401     9.999916e-01  0.9875048      0.4901311
# ... ...
#                                         Genes Rank             Database
# 202                    AMY1B;PRSS2;SLC4A4;...    1      KEGG_2021_Human
# 161                  PDX1;BHLHA15;NEUROD1;...    2      KEGG_2021_Human
# 230                    PRSS2;PRSS1;SLC8A2;...    3      KEGG_2021_Human
# 178                        SST;AGTR2;CNR1;...    4      KEGG_2021_Human
# 45                         AMY1B;G6PC1;SI;...    5      KEGG_2021_Human
# 87                PLA2G2D;PLA2G2A;PLA2G4A;...    6      KEGG_2021_Human
# 501                    CHGA;ABCC8;NEUROD1;...    1 MSigDB_Hallmark_2020
# 49                         EGF;CNTFR;NOS1;...    2 MSigDB_Hallmark_2020
# 451   RXRG;CEL;CACNA1B;ABCC8;ALB;SERPINA6;TTR    3 MSigDB_Hallmark_2020
# 441     RXRG;NR0B2;SERPINA6;NR1H4;GNMT;TTR;GC    4 MSigDB_Hallmark_2020
# 22                               CNTFR;NKX6-1    5 MSigDB_Hallmark_2020
# 401                                  VTN;APOH    5 MSigDB_Hallmark_2020
# ... ...

EnrichIt(userlist, c(kegg_gmt, hallmark_gmt), style = "clusterProfiler")
#                          ID                             Description GeneRatio
# 202     KEGG_2021_Human_202                    Pancreatic secretion    29/279
# 178     KEGG_2021_Human_178 Neuroactive ligand-receptor interaction    41/279
# 161     KEGG_2021_Human_161    Maturity onset diabetes of the young    14/279
# 230     KEGG_2021_Human_230        Protein digestion and absorption    20/279
# 45       KEGG_2021_Human_45   Carbohydrate digestion and absorption    11/279
# 87       KEGG_2021_Human_87            Fat digestion and absorption    10/279
# 501 MSigDB_Hallmark_2020_50                     Pancreas Beta Cells    19/120
# 49  MSigDB_Hallmark_2020_49                       KRAS Signaling Dn    20/120
# 321 MSigDB_Hallmark_2020_32                   Xenobiotic Metabolism    10/120
# 451 MSigDB_Hallmark_2020_45                             Pperoxisome     7/120
# 441 MSigDB_Hallmark_2020_44                    Bile Acid Metabolism     7/120
# 231 MSigDB_Hallmark_2020_23                              Complement     9/120
#       BgRatio       pvalue     p.adjust       qvalue                  geneID
# 202 102/10922 7.021146e-23 1.685075e-20 1.345104e-20   AMY1B/PRSS2/PRSS1/...
# 178 341/10922 5.947771e-17 7.137325e-15 5.697338e-15     SST/AGTR2/PRSS2/...
# 161  26/10922 2.674313e-16 2.139450e-14 1.707807e-14  PDX1/BHLHA15/HNF1B/...
# 230 103/10922 1.091957e-12 6.551741e-11 5.229898e-11  PRSS2/PRSS1/ATP1A2/...
# 45   47/10922 1.910985e-08 9.172727e-07 7.322089e-07   AMY1B/G6PC1/G6PC2/...
# 87   43/10922 9.170990e-08 3.668396e-06 2.928281e-06 PLA2G2D/PLA2G2A/CEL/...
# 501  40/10922 1.471191e-27 4.854929e-26 3.097243e-26     CHGA/ABCC8/PDX1/...
# 49  200/10922 4.117838e-14 6.794432e-13 4.334566e-13    EGF/CNTFR/TENT5C/...
# 321 200/10922 6.837635e-05 7.521398e-04 4.798340e-04     RBP4/ITIH4/PDK4/...
# 451 104/10922 1.428239e-04 1.178297e-03 7.517049e-04    RXRG/CEL/CACNA1B/...
# 441 112/10922 2.265978e-04 1.495545e-03 9.540959e-04 RXRG/NR0B2/SERPINA6/...
# 231 200/10922 3.505544e-04 1.928049e-03 1.230015e-03    PRSS3/KLKB1/KLK1/...
#     Count             Database
# 202    29      KEGG_2021_Human
# 178    41      KEGG_2021_Human
# 161    14      KEGG_2021_Human
# 230    20      KEGG_2021_Human
# 45     11      KEGG_2021_Human
# 87     10      KEGG_2021_Human
# 501    19 MSigDB_Hallmark_2020
# 49     20 MSigDB_Hallmark_2020
# 321    10 MSigDB_Hallmark_2020
# 451     7 MSigDB_Hallmark_2020
# 441     7 MSigDB_Hallmark_2020
# 231     9 MSigDB_Hallmark_2020

Visualization

library(scplotter)
library(enrichit)

data(userlist)
kegg_gmt <- system.file("extdata", "KEGG_2021_Human.gmt.gz", package = "enrichit")
hallmark_gmt <- system.file("extdata", "MSigDB_Hallmark_2020.gmt.gz", package = "enrichit")

enrich_result <- EnrichIt(userlist, c(kegg_gmt, hallmark_gmt), style = "clusterProfiler")

EnrichmentPlot(enrich_result, split_by = "Database")

EnrichmentPlot(enrich_result, plot_type = "dot", split_by = "Database")

EnrichmentPlot(enrich_result[enrich_result$Database == "KEGG_2021_Human", ], plot_type = "network")

EnrichmentPlot(enrich_result, plot_type = "wordcloud")

Documentation

See documentation for more details.