SOURCE CODE biopipen.ns.delim DOCS

"""Tools to deal with csv/tsv files"""
from ..core.config import config
from ..core.proc import Proc


class RowsBinder(Proc):DOCS
    """Bind rows of input files

    Input:
        infiles: The input files to bind.
            The input files should have the same number of columns,
            and same delimiter.

    Output:
        outfile: The output file with rows bound

    Envs:
        sep: The separator of the input files
        header (flag): Whether the input files have header
        filenames: Whether to add filename as the last column.
            Either a string of an R function that starts with `function` or
            a list of names (or string separated by comma) to
            add for each input file.
            The R function takes the path of the input file as the only
            argument and should return a string. The string will be added as
            the last column of the output file.
        filenames_col: The column name for the `filenames` columns
    """
    input = "infiles:files"
    output = (
        "outfile:file:"
        "{{in.infiles | first | stem}}_rbound{{in.infiles | first | ext}}"
    )
    envs = {
        "sep": "\t",
        "header": True,
        "filenames": None,
        "filenames_col": "Filename",
    }
    lang = config.lang.rscript
    script = "file://../scripts/delim/RowsBinder.R"


class SampleInfo(Proc):DOCS
    """List sample information and perform statistics

    Input:
        infile: The input file to list sample information
            The input file should be a csv/tsv file with header

    Output:
        outfile: The output file with sample information, with mutated columns
            if `envs.save_mutated` is True.
            The basename of the output file will be the same as the input file.
            The file name of each plot will be slugified from the case name.
            Each plot has 3 formats: pdf, png and code.zip, which contains the
            data and R code to reproduce the plot.

    Envs:
        sep: The separator of the input file.
        mutaters (type=json): A dict of mutaters to mutate the data frame.
            The key is the column name and the value is the R expression
            to mutate the column. The dict will be transformed to a list in R
            and passed to `dplyr::mutate`.
            You may also use `paired()` to identify paired samples. The function
            takes following arguments:
            * `df`: The data frame. Use `.` if the function is called in
                a dplyr pipe.
            * `id_col`: The column name in `df` for the ids to be returned in
                the final output.
            * `compare_col`: The column name in `df` to compare the values for
                each id in `id_col`.
            * `idents`: The values in `compare_col` to compare. It could be
                either an an integer or a vector. If it is an integer,
                the number of values in `compare_col` must be the same as
                the integer for the `id` to be regarded as paired. If it is
                a vector, the values in `compare_col` must be the same
                as the values in `idents` for the `id` to be regarded as paired.
            * `uniq`: Whether to return unique ids or not. Default is `TRUE`.
                If `FALSE`, you can mutate the meta data frame with the
                returned ids. Non-paired ids will be `NA`.
        save_mutated (flag): Whether to save the mutated columns.
        exclude_cols (auto): The columns to exclude in the table in the report.
            Could be a list or a string separated by comma.
        defaults (ns): The default parameters for `envs.stats`.
            - plot_type: The type of the plot.
                See the supported plot types here:
                <https://pwwang.github.io/plotthis/reference/index.html>
                The plot_type should be lower case and the plot function used in
                `plotthis` should be used. The mapping from plot_type to the
                plot function is like `bar -> BarPlot`, `box -> BoxPlot`, etc.
            - more_formats (list): The additional formats to save the plot.
                By default, the plot will be saved in png, which is also used to
                display in the report. You can add more formats to save the plot.
                For example, `more_formats = ["pdf", "svg"]`.
            - save_code (flag): Whether to save the R code to reproduce the plot.
                The data used to plot will also be saved.
            - subset: An expression to subset the data frame before plotting.
                The expression should be a string of R expression that will be passed
                to `dplyr::filter`. For example, `subset = "Sample == 'A'"`.
            - section: The section name in the report.
                In case you want to group the plots in the report.
            - devpars (ns): The device parameters for the plot.
                - width (type=int): The width of the plot.
                - height (type=int): The height of the plot.
                - res (type=int): The resolution of the plot.
            - descr: The description of the plot, shown in the report.
            - <more>: You can add more parameters to the defaults.
                These parameters will be expanded to the `envs.stats` for each case,
                and passed to individual plot functions.
        stats (type=json): The statistics to perform.
            The keys are the case names and the values are the parameters
            inheirted from `envs.defaults`.
    """
    input = "infile:file"
    output = "outfile:file:{{in.infile | basename}}"
    envs = {
        "sep": "\t",
        "mutaters": {},
        "save_mutated": False,
        "exclude_cols": None,
        "defaults": {
            "plot_type": "bar",
            "more_formats": [],
            "save_code": False,
            "subset": None,
            "section": None,
            "descr": None,
            "devpars": {"width": None, "height": None, "res": 100},
        },
        "stats": {},
    }
    lang = config.lang.rscript
    script = "file://../scripts/delim/SampleInfo.R"
    plugin_opts = {"report": "file://../reports/common.svelte"}