SOURCE CODE biopipen.ns.protein DOCS

"""Protein-related processes."""
from ..core.proc import Proc
from ..core.config import config


class Prodigy(Proc):DOCS
    """Prediction of binding affinity of protein-protein complexes based on
    intermolecular contacts using Prodigy.

    See <https://rascar.science.uu.nl/prodigy/> and
    <https://github.com/haddocking/prodigy>.

    `prodigy-prot` must be installed under the given python of `proc.lang`.

    Input:
        infile: The structure file in PDB or mmCIF format.

    Output:
        outfile: The output file generated by Prodigy.
        outdir: The output directory containing all output files.

    Envs:
        distance_cutoff (type=float): The distance cutoff to calculate intermolecular
            contacts.
        acc_threshold (type=float): The accessibility threshold for BSA analysis.
        temperature (type=float): The temperature (C) for Kd prediction.
        contact_list (flag): Whether to generate contact list.
        pymol_selection (flag): Whether output a script to highlight the interface
            residues in PyMOL.
        selection (list): The selection of the chains to analyze.
            `['A', 'B']` will analyze chains A and B.
            `['A,B', 'C']` will analyze chain A and C; and B and C.
            `['A', 'B', 'C']` will analyze all combinations of A, B, and C.
        outtype (choice): Set the format of the output file (`out.outfile`).
            All three files will be generated. This option only determines which
            is assigned to `out.outfile`.
            - raw: The raw output file from prodigy.
            - json: The output file in JSON format.
            - tsv: The output file in CSV format.
    """
    input = "infile:file"
    output = [
        "outfile:file:{{in.infile | stem}}_prodigy/"
        "{{in.infile | stem}}.{{envs.outtype if envs.outtype != 'raw' else 'out'}}",
        "outdir:dir:{{in.infile | stem}}_prodigy",
    ]
    lang = config.lang.python
    envs = {
        "distance_cutoff": 5.5,
        "acc_threshold": 0.05,
        "temperature": 25.0,
        "contact_list": True,
        "pymol_selection": True,
        "selection": None,
        "outtype": "json",
    }
    script = "file://../scripts/protein/Prodigy.py"


class ProdigySummary(Proc):DOCS
    """Summary of the output from `Prodigy`.

    Input:
        infiles: The output json file generated by `Prodigy`.

    Output:
        outdir: The directory of summary files generated by `ProdigySummary`.

    Envs:
        group (type=auto): The group of the samples for boxplots.
            If `None`, don't do boxplots.
            It can be a dict of group names and sample names, e.g.
            `{"group1": ["sample1", "sample2"], "group2": ["sample3"]}`
            or a file containing the group information, with the first column
            being the sample names and the second column being the group names.
            The file should be tab-delimited with no header.
    """
    input = "infiles:files"
    input_data = lambda ch: [[f"{odir}/_prodigy.tsv" for odir in ch.outdir]]
    output = "outdir:dir:prodigy_summary"
    lang = config.lang.rscript
    envs = {"group": None}
    script = "file://../scripts/protein/ProdigySummary.R"
    plugin_opts = {"report": "file://../reports/protein/ProdigySummary.svelte"}