SOURCE CODE biopipen.ns.scrna_metabolic_landscape DOCS

"""Metabolic landscape analysis for scRNA-seq data"""
from __future__ import annotations
from pathlib import Path
from typing import Type

from diot import Diot
from datar.tibble import tibble
from pipen.utils import mark
from pipen_args import ProcGroup
from pipen_annotate import annotate

from ..core.config import config
from ..core.proc import Proc


class MetabolicPathwayActivity(Proc):DOCS
    """This process calculates the pathway activities in different groups and subsets.

    The cells are first grouped by subsets and then the metabolic activities are
    examined for each groups in different subsets.

    For each subset, a heatmap and a violin plot will be generated.
    The heatmap shows the pathway activities for each group and each metabolic pathway

    ![MetabolicPathwayActivity_heatmap](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayActivity_heatmap.png){: width="80%"}

    The violin plot shows the distribution of the pathway activities for each group

    ![MetabolicPathwayActivity_violin](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayActivity_violin.png){: width="45%"}

    Envs:
        ntimes (type=int): Number of times to do the permutation
        ncores (type=int;pgarg): Number of cores to use for parallelization
            Defaults to `ScrnaMetabolicLandscape.ncores`
        heatmap_devpars (ns): Device parameters for the heatmap
            - width (type=int): Width of the heatmap
            - height (type=int): Height of the heatmap
            - res (type=int): Resolution of the heatmap
        violin_devpars (ns): Device parameters for the violin plot
            - width (type=int): Width of the violin plot
            - height (type=int): Height of the violin plot
            - res (type=int): Resolution of the violin plot
        gmtfile (pgarg): The GMT file with the metabolic pathways.
            Defaults to `ScrnaMetabolicLandscape.gmtfile`
        grouping (type=auto;pgarg;readonly): Defines the basic groups to
            investigate the metabolic activity, typically the clusters.
            Defaults to `ScrnaMetabolicLandscape.grouping`
        grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
            names. For example, if we have `grouping_prefix = "cluster"` and
            we have `1` and `2` in the `grouping` column, the groups
            will be named as `cluster_1` and `cluster_2`.
            Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
        subsetting (type=auto;pgarg;readonly): How do we subset the data. Other
            columns in the metadata to do comparisons. For example,
            `"TimePoint"` or `["TimePoint", "Response"]`.
            Defaults to `ScrnaMetabolicLandscape.subsetting`
        subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
            subset names.
            For example, if we have `subsetting_prefix = "timepoint"` and
            we have `pre` and `post` in the `subsetting` column, the subsets
            will be named as `timepoint_pre` and `timepoint_post`.
            If `subsetting` is a list, then this should also be a
            same-length list. If a single string is given, it will be
            repeated to a list with the same length as `subsetting`.
            Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`

    Requires:
        r-scater:
            - check: {{proc.lang}} <(echo "library(scater)")
        r-reshape2:
            - check: {{proc.lang}} <(echo "library(reshape2)")
        r-rcolorbrewer:
            - check: {{proc.lang}} <(echo "library(RColorBrewer)")
        r-ggplot2:
            - check: {{proc.lang}} <(echo "library(ggplot2)")
        r-ggprism:
            - check: {{proc.lang}} <(echo "library(ggprism)")
        r-complexheatmap:
            - check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
        r-parallel:
            - check: {{proc.lang}} <(echo "library(parallel)")
    """  # noqa: E501
    input = "sobjfile:file"
    output = "outdir:dir:{{in.sobjfile | stem}}.pathwayactivity"
    envs = {
        "ntimes": 5000,
        "ncores": config.misc.ncores,
        "heatmap_devpars": {},
        "violin_devpars": {},
        "gmtfile": None,
        "grouping": None,
        "grouping_prefix": "",
        "subsetting": None,
        "subsetting_prefix": "",
    }
    lang = config.lang.rscript
    script = (
        "file://../scripts/"
        "scrna_metabolic_landscape/MetabolicPathwayActivity.R"
    )
    plugin_opts = {
        "report": (
            "file://../reports/"
            "scrna_metabolic_landscape/MetabolicPathwayActivity.svelte"
        )
    }


class MetabolicFeatures(Proc):DOCS
    """This process performs enrichment analysis for the metabolic pathways
    for each group in each subset.

    The enrichment analysis is done with [`fgsea`](https://bioconductor.org/packages/release/bioc/html/fgsea.html)
    package or the [`GSEA_R`](https://github.com/GSEA-MSigDB/GSEA_R) package.

    Envs:
        ncores (type=int;pgarg): Number of cores to use for parallelization.
            Defaults to `ScrnaMetabolicLandscape.ncores`
        fgsea (flag): Whether to do fast gsea analysis using `fgsea` package.
            If `False`, the `GSEA_R` package will be used.
        prerank_method (choice): Method to use for gene preranking.
            Signal to noise: the larger the differences of the means
            (scaled by the standard deviations); that is, the more distinct
            the gene expression is in each phenotype and the more the gene
            acts as a “class marker.”.
            Absolute signal to noise: the absolute value of the signal to
            noise.
            T test: Uses the difference of means scaled by the standard
            deviation and number of samples.
            Ratio of classes: Uses the ratio of class means to calculate
            fold change for natural scale data.
            Diff of classes: Uses the difference of class means to calculate
            fold change for nature scale data
            Log2 ratio of classes: Uses the log2 ratio of class means to
            calculate fold change for natural scale data. This is the
            recommended statistic for calculating fold change for log scale
            data.
            - signal_to_noise: Signal to noise
            - s2n: Alias of signal_to_noise
            - abs_signal_to_noise: absolute signal to noise
            - abs_s2n: Alias of abs_signal_to_noise
            - t_test: T test
            - ratio_of_classes: Also referred to as fold change
            - diff_of_classes: Difference of class means
            - log2_ratio_of_classes: Log2 ratio of class means
        top (type=int): N top of enriched pathways to show
        gmtfile (pgarg): The GMT file with the metabolic pathways.
            Defaults to `ScrnaMetabolicLandscape.gmtfile`
        grouping (type=auto;pgarg;readonly): Defines the basic groups to
            investigate the metabolic activity.
            Defaults to `ScrnaMetabolicLandscape.grouping`
        grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to
            group names.
            Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
        subsetting (type=auto;pgarg;readonly): How do we subset the data.
            Another column(s) in the metadata.
            Defaults to `ScrnaMetabolicLandscape.subsetting`
        subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
            subset names.
            Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`

    Requires:
        r-parallel:
            - check: {{proc.lang}} <(echo "library(parallel)")
        r-fgsea:
            - check: {{proc.lang}} <(echo "library(fgsea)")
    """  # noqa: E501
    input = "sobjfile:file"
    output = "outdir:dir:{{in.sobjfile | stem}}.pathwayfeatures"
    lang = config.lang.rscript
    envs = {
        "ncores": config.misc.ncores,
        "fgsea": True,
        "prerank_method": "signal_to_noise",
        "top": 10,
        "gmtfile": None,
        "grouping": None,
        "grouping_prefix": "",
        "subsetting": None,
        "subsetting_prefix": "",
    }
    script = (
        "file://../scripts/scrna_metabolic_landscape/MetabolicFeatures.R"
    )
    plugin_opts = {
        "report": (
            "file://../reports/"
            "scrna_metabolic_landscape/MetabolicFeatures.svelte"
        )
    }


class MetabolicFeaturesIntraSubset(Proc):DOCS
    """Intra-subset metabolic features - Enrichment analysis in details

    Similar to the [`MetabolicFeatures`](!!#biopipennsscrna_metabolic_landscapemetabolicfeatures)
    process, this process performs enrichment analysis for the metabolic pathways for
    each subset in each group, instead of each group in each subset.

    Envs:
        ncores (type=int; pgarg): Number of cores to use for parallelization
            Defaults to `ScrnaMetabolicLandscape.ncores`
        fgsea (flag): Whether to do fast gsea analysis
        prerank_method (choice): Method to use for gene preranking
            Signal to noise: the larger the differences of the means
            (scaled by the standard deviations); that is, the more distinct
            the gene expression is in each phenotype and the more the gene
            acts as a “class marker.”.
            Absolute signal to noise: the absolute value of the signal to
            noise.
            T test: Uses the difference of means scaled by the standard
            deviation and number of samples.
            Ratio of classes: Uses the ratio of class means to calculate
            fold change for natural scale data.
            Diff of classes: Uses the difference of class means to calculate
            fold change for nature scale data
            Log2 ratio of classes: Uses the log2 ratio of class means to
            calculate fold change for natural scale data. This is the
            recommended statistic for calculating fold change for log scale
            data.
            - signal_to_noise: Signal to noise
            - s2n: Alias of signal_to_noise
            - abs_signal_to_noise: absolute signal to noise
            - abs_s2n: Alias of abs_signal_to_noise
            - t_test: T test
            - ratio_of_classes: Also referred to as fold change
            - diff_of_classes: Difference of class means
            - log2_ratio_of_classes: Log2 ratio of class means
        top (type=int): N top of enriched pathways to show
        gmtfile (pgarg): The GMT file with the metabolic pathways.
            Defaults to `ScrnaMetabolicLandscape.gmtfile`
        grouping (type=auto;pgarg;readonly): Defines the basic groups to
            investigate the metabolic activity.
            Defaults to `ScrnaMetabolicLandscape.grouping`
        grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
            names.
            Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
        subsetting (type=auto;pgarg;readonly): How do we subset the data.
            Another column(s) in the metadata.
            Defaults to `ScrnaMetabolicLandscape.subsetting`
        subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
            subset names.
            Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`
        subsetting_comparison (type=json;pgarg;readonly): How do we compare the
            subsets.
            Defaults to `ScrnaMetabolicLandscape.subsetting_comparison`

    Requires:
        r-parallel:
            - check: {{proc.lang}} <(echo "library(parallel)")
        r-scater:
            - check: {{proc.lang}} <(echo "library(scater)")
        r-fgsea:
            - check: {{proc.lang}} <(echo "library(fgsea)")
    """  # noqa: E501
    input = "sobjfile:file"
    output = (
        "outdir:dir:{{in.sobjfile | stem}}.intra-subset-pathwayfeatures"
    )
    lang = config.lang.rscript
    envs = {
        "ncores": config.misc.ncores,
        "gmtfile": None,
        "fgsea": True,
        "prerank_method": "signal_to_noise",
        "top": 10,
        "grouping": None,
        "grouping_prefix": "",
        "subsetting": None,
        "subsetting_prefix": "",
        "subsetting_comparison": {},
    }
    script = (
        "file://../scripts/scrna_metabolic_landscape/"
        "MetabolicFeaturesIntraSubset.R"
    )
    plugin_opts = {
        "report": (
            "file://../reports/scrna_metabolic_landscape/"
            "MetabolicFeaturesIntraSubset.svelte"
        )
    }


class MetabolicPathwayHeterogeneity(Proc):DOCS
    """Calculate Metabolic Pathway heterogeneity.

    For each subset, the normalized enrichment score (NES) of each metabolic pathway
    is calculated for each group.
    The NES is calculated by comparing the enrichment score of the subset to the
    enrichment scores of the same subset in the permutations.
    The p-value is calculated by comparing the NES to the NESs of the same subset
    in the permutations.
    The heterogeneity can be reflected by the NES values and the p-values in
    different groups for the metabolic pathways.

    ![MetabolicPathwayHeterogeneity](https://pwwang.github.io/immunopipe/latest/processes/images/MetabolicPathwayHeterogeneity.png)


    Envs:
        gmtfile (pgarg): The GMT file with the metabolic pathways.
            Defaults to `ScrnaMetabolicLandscape.gmtfile`
        select_pcs (type=float): Select the PCs to use for the analysis.
        pathway_pval_cutoff (type=float): The p-value cutoff to select
            the enriched pathways
        ncores (type=int;pgarg): Number of cores to use for parallelization
            Defaults to `ScrnaMetabolicLandscape.ncores`
        bubble_devpars (ns): The devpars for the bubble plot
            - width (type=int): The width of the plot
            - height (type=int): The height of the plot
            - res (type=int): The resolution of the plot
        grouping (type=auto;pgarg;readonly): Defines the basic groups to
            investigate the metabolic activity.
            Defaults to `ScrnaMetabolicLandscape.grouping`
        grouping_prefix (type=auto;pgarg;readonly): Working as a prefix to group
            names.
            Defaults to `ScrnaMetabolicLandscape.grouping_prefix`
        subsetting (type=auto;pgarg;readonly): How do we subset the data.
            Another column(s) in the metadata.
            Defaults to `ScrnaMetabolicLandscape.subsetting`
        subsetting_prefix (type=auto;pgarg;readonly): Working as a prefix to
            subset names.
            Defaults to `ScrnaMetabolicLandscape.subsetting_prefix`

    Requires:
        r-gtools:
            - check: {{proc.lang}} <(echo "library(gtools)")
        r-ggplot2:
            - check: {{proc.lang}} <(echo "library(ggplot2)")
        r-ggprism:
            - check: {{proc.lang}} <(echo "library(ggprism)")
        r-parallel:
            - check: {{proc.lang}} <(echo "library(parallel)")
        r-dplyr:
            - check: {{proc.lang}} <(echo "library(dplyr)")
        r-tibble:
            - check: {{proc.lang}} <(echo "library(tibble)")
        r-enrichr:
            - check: {{proc.lang}} <(echo "library(enrichR)")
        r-data.table:
            - check: {{proc.lang}} <(echo "library(data.table)")
        r-fgsea:
            - check: {{proc.lang}} <(echo "library(fgsea)")
    """  # noqa: E501
    input = "sobjfile:file"
    output = "outdir:dir:{{in.sobjfile | stem}}.pathwayhetero"
    lang = config.lang.rscript
    envs = {
        "gmtfile": None,
        "select_pcs": 0.8,
        "pathway_pval_cutoff": 0.01,
        "ncores": config.misc.ncores,
        "bubble_devpars": {},
        "grouping": None,
        "grouping_prefix": "",
        "subsetting": None,
        "subsetting_prefix": "",
    }
    script = (
        "file://../scripts/scrna_metabolic_landscape/"
        "MetabolicPathwayHeterogeneity.R"
    )
    plugin_opts = {
        "report": (
            "file://../reports/scrna_metabolic_landscape/"
            "MetabolicPathwayHeterogeneity.svelte"
        )
    }


class ScrnaMetabolicLandscape(ProcGroup):DOCS
    """Metabolic landscape analysis for scRNA-seq data

    An abstract from
    <https://github.com/LocasaleLab/Single-Cell-Metabolic-Landscape>

    See docs here for more details
    <https://pwwang.github.io/biopipen/pipelines/scrna_metabolic_landscape>


    Reference:
        Xiao, Zhengtao, Ziwei Dai, and Jason W. Locasale.
        "Metabolic landscape of the tumor microenvironment at
        single cell resolution." Nature communications 10.1 (2019): 1-12.

    Args:
        metafile: Either a metafile or an rds file of a Seurat object.
            If it is a metafile, it should have two columns: `Sample` and
            `RNAData`. `Sample` should be the first column with unique
            identifiers for the samples and `RNAData` indicates where the
            barcodes, genes, expression matrices are. The data will be loaded
            and an unsupervised clustering will be done.
            Currently only 10X data is supported.
            If it is an rds file, the seurat object will be used directly
        is_seurat (flag): Whether the input `metafile` is a seurat object.
            If `metafile` is specified directly, this option will be ignored
            and will be inferred from the file extension. If `metafile` is
            not specified, meaning `<pipeline>.procs.MetabolicInput` is
            dependent on other processes, this option will be used to determine
            whether the input is a seurat object or not.
        noimpute (flag): Whether to do imputation for the dropouts.
            If False, the values will be left as is.
        gmtfile: The GMT file with the metabolic pathways. The gene names should
            match the gene names in the gene list in RNAData or
            the Seurat object.
            You can also provide a URL to the GMT file.
            For example, from
            <https://download.baderlab.org/EM_Genesets/current_release/Human/symbol/>.
        grouping: defines the basic groups to investigate the metabolic activity
            Typically the clusters.
        grouping_prefix: Working as a prefix to group names
            For example, if we have `grouping_prefix = "cluster"` and
            we have `1` and `2` in the `grouping` column, the groups
            will be named as `cluster_1` and `cluster_2`
        subsetting (type=auto): How do we subset the data. Other columns in the
            metadata to do comparisons. For example, `"TimePoint"` or
            `["TimePoint", "Response"]`
        subsetting_prefix (type=auto): Working as a prefix to subset names
            For example, if we have `subsetting_prefix = "timepoint"` and
            we have `pre` and `post` in the `subsetting` column, the subsets
            will be named as `timepoint_pre` and `timepoint_post`
            If `subsetting` is a list, then this should also be a same-length
            list. If a single string is given, it will be repeated to a list
            with the same length as `subsetting`
        subsetting_comparison (type=json): What kind of comparisons are we
            doing to compare cells from different subsets.
            It should be dict with keys as the names of the comparisons and
            values as the 2 comparison groups from the `subsetting` column.
            For example, if we have `pre` and `post` in the `subsetting` column,
            we could have
            `subsetting_comparison = {"pre_vs_post": ["post", "pre"]}`
            The second group will be the control group in the comparison.
            If we also have `1`, `2` and `3` in the `grouping` column,
            by default, the comparisons are done within each subset for
            each group. For example, for group `1`, groups `2` and `3`
            will be used as control, and for group `2`, groups `1` and `3`
            will be used as control, and for group `3`, groups `1` and `2`
            will be used as control. It is similar to `Seurat::FindMarkers`
            procedure. With this option, the comparisons are also done to
            compare cells from different subsets within each group. With the
            example above, we will have `pre_vs_post` comparisons within
            each group.
            If `subsetting` is a list, this must be a list of dicts with the
            same length.
        mutaters (type=json): Add new columns to the metadata for
            grouping/subsetting.
            They are passed to `sobj@meta.data |> mutate(...)`. For example,
            `{"timepoint": "if_else(treatment == 'control', 'pre', 'post')"}`
            will add a new column `timepoint` to the metadata with values of
            `pre` and `post` based on the `treatment` column.
        ncores (type=int): Number of cores to use for parallelization for
            each process
    """
    DEFAULTS = Diot(
        metafile=None,
        is_seurat=None,
        gmtfile=None,
        grouping=None,
        grouping_prefix="",
        subsetting=None,
        subsetting_prefix=None,
        subsetting_comparison={},
        mutaters=None,
        noimpute=False,
        ncores=config.misc.ncores,
    )

    def post_init(self):DOCS
        """Load runtime processes"""
        if self.opts.metafile:
            suffix = Path(self.opts.metafile).suffix
            self.opts.is_seurat = suffix in (".rds", ".RDS")

        # Make sure the grouping is a list
        if self.opts.subsetting and not isinstance(self.opts.subsetting, list):
            self.opts.subsetting = [self.opts.subsetting]

        # Make sure the grouping is a list with the same length as subsetting
        if (
            self.opts.subsetting
            and not isinstance(self.opts.subsetting_prefix, list)
        ):
            self.opts.subsetting_prefix = [
                self.opts.subsetting_prefix
            ] * len(self.opts.subsetting)

        # Make sure the lengths of subsetting and subsetting_comparison the same
        if self.opts.subsetting:
            if len(self.opts.subsetting) == 1 and isinstance(
                self.opts.subsetting_comparison, dict
            ):
                self.opts.subsetting_comparison = [
                    self.opts.subsetting_comparison
                ]

            if len(self.opts.subsetting) > 1 and not isinstance(
                self.opts.subsetting_comparison, list
            ):
                raise ValueError(
                    "The length of `subsetting` is larger than 1, "
                    "but `subsetting_comparison` is not a list of dicts."
                )

            if len(self.opts.subsetting) != len(
                self.opts.subsetting_comparison
            ):
                raise ValueError(
                    "The length of `subsetting` and `subsetting_comparison` "
                    "are not the same"
                )

    @ProcGroup.add_proc
    def p_input(self) -> Type[Proc]:
        """Build MetabolicInputs process"""
        from .misc import File2Proc

        @mark(board_config_hidden=True)
        class MetabolicInput(File2Proc):
            """This process takes Seurat object as input and pass it to the next
            processes in the `ScrnaMetabolicLandscape` group.

            There is no configuration for this process.
            """

            if self.opts.metafile:
                input_data = [self.opts.metafile]

        return MetabolicInput

    @ProcGroup.add_proc
    def p_preparing(self) -> Type[Proc]:
        """Build SeuratPreparing process"""
        if self.opts.is_seurat:
            return None

        from .scrna import SeuratPreparing

        class MetabolicSeuratPreparing(SeuratPreparing):
            requires = self.p_input

        return MetabolicSeuratPreparing

    @ProcGroup.add_proc
    def p_clustering(self) -> Type[Proc]:
        """Build SeuratClustering process"""
        if self.opts.is_seurat:
            return self.p_input

        from .scrna import SeuratClustering

        class MetabolicSeuratClustering(SeuratClustering):
            requires = self.p_preparing

        return MetabolicSeuratClustering

    @ProcGroup.add_proc
    def p_mutater(self) -> Type[Proc]:
        """Build SeuratMetadataMutater process"""
        if not self.opts.mutaters:
            return self.p_clustering

        from .scrna import SeuratMetadataMutater

        class MetabolicSeuratMetadataMutater(SeuratMetadataMutater):
            requires = self.p_clustering
            input_data = lambda ch: tibble(
                srtobj=ch.iloc[:, 0],
                metafile=[None],
            )
            envs = {"mutaters": self.opts.mutaters}

        return MetabolicSeuratMetadataMutater

    @ProcGroup.add_proc
    def p_expr_impute(self) -> Type[Proc]:
        """Build  process"""
        if self.opts.noimpute:
            return self.p_mutater

        from .scrna import ExprImputation

        @annotate.format_doc(indent=3)
        class MetabolicExprImputation(ExprImputation):
            """{{Summary}}

            You can turn off the imputation by setting the `noimpute` option
            of the process group to `True`.
            """
            requires = self.p_mutater

        return MetabolicExprImputation

    @ProcGroup.add_proc
    def p_pathway_activity(self) -> Type[Proc]:
        """Build MetabolicPathwayActivity process"""
        return Proc.from_proc(
            MetabolicPathwayActivity,
            "MetabolicPathwayActivity",
            requires=self.p_expr_impute,
            order=-1,
            envs={
                "ncores": self.opts.ncores,
                "gmtfile": self.opts.gmtfile,
                "grouping": self.opts.grouping,
                "grouping_prefix": self.opts.grouping_prefix,
                "subsetting": self.opts.subsetting,
                "subsetting_prefix": self.opts.subsetting_prefix,
            },
        )

    @ProcGroup.add_proc
    def p_pathway_heterogeneity(self) -> Type[Proc]:
        """Build MetabolicPathwayHeterogeneity process"""
        return Proc.from_proc(
            MetabolicPathwayHeterogeneity,
            "MetabolicPathwayHeterogeneity",
            requires=self.p_expr_impute,
            envs={
                "ncores": self.opts.ncores,
                "gmtfile": self.opts.gmtfile,
                "grouping": self.opts.grouping,
                "grouping_prefix": self.opts.grouping_prefix,
                "subsetting": self.opts.subsetting,
                "subsetting_prefix": self.opts.subsetting_prefix,
            },
        )

    @ProcGroup.add_proc
    def p_features(self) -> Type[Proc]:
        """Build MetabolicFeatures process"""
        return Proc.from_proc(
            MetabolicFeatures,
            "MetabolicFeatures",
            requires=self.p_expr_impute,
            envs={
                "ncores": self.opts.ncores,
                "gmtfile": self.opts.gmtfile,
                "grouping": self.opts.grouping,
                "grouping_prefix": self.opts.grouping_prefix,
                "subsetting": self.opts.subsetting,
                "subsetting_prefix": self.opts.subsetting_prefix,
            },
        )

    @ProcGroup.add_proc
    def p_features_intra_subset(self) -> Type[Proc]:
        """Build MetabolicFeaturesIntraSubset process"""
        if self.opts.subsetting_comparison and not self.opts.subsetting:
            raise ValueError(
                "Cannot use `subsetting_comparison` without `subsetting`."
            )

        return Proc.from_proc(
            MetabolicFeaturesIntraSubset,
            "MetabolicFeaturesIntraSubset",
            requires=self.p_expr_impute,
            envs={
                "ncores": self.opts.ncores,
                "gmtfile": self.opts.gmtfile,
                "grouping": self.opts.grouping,
                "grouping_prefix": self.opts.grouping_prefix,
                "subsetting": self.opts.subsetting,
                "subsetting_prefix": self.opts.subsetting_prefix,
                "subsetting_comparison": self.opts.subsetting_comparison,
            },
        )


if __name__ == "__main__":
    ScrnaMetabolicLandscape().as_pipen().run()