SOURCE CODE biopipen.ns.protein DOCS

"""Protein-related processes."""
from ..core.proc import Proc
from ..core.config import config


class Prodigy(Proc):DOCS
    """Prediction of binding affinity of protein-protein complexes based on
    intermolecular contacts using Prodigy.

    See <https://rascar.science.uu.nl/prodigy/> and
    <https://github.com/haddocking/prodigy>.

    `prodigy-prot` must be installed under the given python of `proc.lang`.

    Input:
        infile: The structure file in PDB or mmCIF format.

    Output:
        outfile: The output file generated by Prodigy.
        outdir: The output directory containing all output files.

    Envs:
        distance_cutoff (type=float): The distance cutoff to calculate intermolecular
            contacts.
        acc_threshold (type=float): The accessibility threshold for BSA analysis.
        temperature (type=float): The temperature (C) for Kd prediction.
        contact_list (flag): Whether to generate contact list.
        pymol_selection (flag): Whether output a script to highlight the interface
            residues in PyMOL.
        selection (list): The selection of the chains to analyze.
            `['A', 'B']` will analyze chains A and B.
            `['A,B', 'C']` will analyze chain A and C; and B and C.
            `['A', 'B', 'C']` will analyze all combinations of A, B, and C.
        outtype (choice): Set the format of the output file (`out.outfile`).
            All three files will be generated. This option only determines which
            is assigned to `out.outfile`.
            - raw: The raw output file from prodigy.
            - json: The output file in JSON format.
            - tsv: The output file in CSV format.
    """
    input = "infile:file"
    output = [
        "outfile:file:{{in.infile | stem}}_prodigy/"
        "{{in.infile | stem}}.{{envs.outtype if envs.outtype != 'raw' else 'out'}}",
        "outdir:dir:{{in.infile | stem}}_prodigy",
    ]
    lang = config.lang.python
    envs = {
        "distance_cutoff": 5.5,
        "acc_threshold": 0.05,
        "temperature": 25.0,
        "contact_list": True,
        "pymol_selection": True,
        "selection": None,
        "outtype": "json",
    }
    script = "file://../scripts/protein/Prodigy.py"


class ProdigySummary(Proc):DOCS
    """Summary of the output from `Prodigy`.

    Input:
        infiles: The output json file generated by `Prodigy`.

    Output:
        outdir: The directory of summary files generated by `ProdigySummary`.

    Envs:
        group (type=auto): The group of the samples for boxplots.
            If `None`, don't do boxplots.
            It can be a dict of group names and sample names, e.g.
            `{"group1": ["sample1", "sample2"], "group2": ["sample3"]}`
            or a file containing the group information, with the first column
            being the sample names and the second column being the group names.
            The file should be tab-delimited with no header.
    """
    input = "infiles:files"
    input_data = lambda ch: [[f"{odir}/_prodigy.tsv" for odir in ch.outdir]]
    output = "outdir:dir:prodigy_summary"
    lang = config.lang.rscript
    envs = {"group": None}
    script = "file://../scripts/protein/ProdigySummary.R"
    plugin_opts = {"report": "file://../reports/protein/ProdigySummary.svelte"}


class MMCIF2PDB(Proc):DOCS
    """Convert mmCIF or PDBx file to PDB file.

    Using [BeEM](https://github.com/kad-ecoli/BeEM)

    Input:
        infile: The input mmCIF or PDBx file.

    Output:
        outfile: The output PDB file.
            The "outfmt" set to 3 to always output a single PDB file.

    Envs:
        tool (choice): The tool to use for conversion.
            - maxit: Use MAXIT.
            - beem: Use BeEM.
        maxit: The path to the MAXIT executable.
        beem: The path to the BeEM executable.
        <more>: Other options for MAXIT/BeEM.
            For BeEM, "outfmt" will not be used as it is set to 3.
    """
    input = "infile:file"
    output = "outfile:file:{{in.infile | stem}}.pdb"
    lang = config.lang.python
    envs = {
        "tool": "maxit",
        "maxit": config.exe.maxit,
        "beem": config.exe.beem,
    }
    script = "file://../scripts/protein/MMCIF2PDB.py"


class RMSD(Proc):DOCS
    """Calculate the RMSD between two structures.

    See also https://github.com/charnley/rmsd.

    If the input is in mmCIF format, convert it to PDB first.

    Input:
        infile1: The first structure file.
        infile2: The second structure file.

    Output:
        outfile: The output file containing the RMSD value.

    Envs:
        beem: The path to the BeEM executable.
        calculate_rmsd: The path to the calculate_rmsd executable.
        conv_tool (choice): The tool to use for conversion.
            - maxit: Use MAXIT.
            - beem: Use BeEM.
        ca_only (flag): Whether to calculate RMSD using only C-alpha atoms.
        duel (choice): How to handle the duel atoms. Default is "keep".
            - keep: Keep both atoms.
            - keep_first: Keep the first atom.
            - keep_last: Keep the last atom.
            - average: Average the coordinates.
        reorder (flag): Whether to reorder the atoms in the structures.
        <more>: Other options for calculate_rmsd.
    """
    input = "infile1:file, infile2:file"
    output = "outfile:file:{{in.infile1 | stem}}-{{in.infile2 | stem}}.rmsd.txt"
    lang = config.lang.python
    envs = {
        "maxit": config.exe.maxit,
        "beem": config.exe.beem,
        "calculate_rmsd": config.exe.calculate_rmsd,
        "conv_tool": "maxit",
        "ca_only": False,
        "duel": "keep",
        "reorder": True,
    }
    script = "file://../scripts/protein/RMSD.py"


class PDB2Fasta(Proc):DOCS
    """Convert PDB file to FASTA file.

    Input:
        infile: The input PDB file.

    Output:
        outfile: The output FASTA file.

    Envs:
        chains (auto): The chains to extract. A list of chain IDs or separated by
            commas.
            If None, extract all chains.
        wrap (type=int): The number of residues per line in the output FASTA
            file. Set to 0 to disable wrapping.
    """
    input = "infile:file"
    output = "outfile:file:{{in.infile | stem}}.fasta"
    lang = config.lang.python
    envs = {"chains": None, "wrap": 80}
    script = "file://../scripts/protein/PDB2Fasta.py"