"""Protein-related processes."""
from ..core.proc import Proc
from ..core.config import config
class Prodigy(Proc):DOCS
"""Prediction of binding affinity of protein-protein complexes based on
intermolecular contacts using Prodigy.
See <https://rascar.science.uu.nl/prodigy/> and
<https://github.com/haddocking/prodigy>.
`prodigy-prot` must be installed under the given python of `proc.lang`.
Input:
infile: The structure file in PDB or mmCIF format.
Output:
outfile: The output file generated by Prodigy.
outdir: The output directory containing all output files.
Envs:
distance_cutoff (type=float): The distance cutoff to calculate intermolecular
contacts.
acc_threshold (type=float): The accessibility threshold for BSA analysis.
temperature (type=float): The temperature (C) for Kd prediction.
contact_list (flag): Whether to generate contact list.
pymol_selection (flag): Whether output a script to highlight the interface
residues in PyMOL.
selection (list): The selection of the chains to analyze.
`['A', 'B']` will analyze chains A and B.
`['A,B', 'C']` will analyze chain A and C; and B and C.
`['A', 'B', 'C']` will analyze all combinations of A, B, and C.
outtype (choice): Set the format of the output file (`out.outfile`).
All three files will be generated. This option only determines which
is assigned to `out.outfile`.
- raw: The raw output file from prodigy.
- json: The output file in JSON format.
- tsv: The output file in CSV format.
"""
input = "infile:file"
output = [
"outfile:file:{{in.infile | stem}}_prodigy/"
"{{in.infile | stem}}.{{envs.outtype if envs.outtype != 'raw' else 'out'}}",
"outdir:dir:{{in.infile | stem}}_prodigy",
]
lang = config.lang.python
envs = {
"distance_cutoff": 5.5,
"acc_threshold": 0.05,
"temperature": 25.0,
"contact_list": True,
"pymol_selection": True,
"selection": None,
"outtype": "json",
}
script = "file://../scripts/protein/Prodigy.py"
class ProdigySummary(Proc):DOCS
"""Summary of the output from `Prodigy`.
Input:
infiles: The output json file generated by `Prodigy`.
Output:
outdir: The directory of summary files generated by `ProdigySummary`.
Envs:
group (type=auto): The group of the samples for boxplots.
If `None`, don't do boxplots.
It can be a dict of group names and sample names, e.g.
`{"group1": ["sample1", "sample2"], "group2": ["sample3"]}`
or a file containing the group information, with the first column
being the sample names and the second column being the group names.
The file should be tab-delimited with no header.
"""
input = "infiles:files"
input_data = lambda ch: [[f"{odir}/_prodigy.tsv" for odir in ch.outdir]]
output = "outdir:dir:prodigy_summary"
lang = config.lang.rscript
envs = {"group": None}
script = "file://../scripts/protein/ProdigySummary.R"
plugin_opts = {"report": "file://../reports/protein/ProdigySummary.svelte"}
class MMCIF2PDB(Proc):DOCS
"""Convert mmCIF or PDBx file to PDB file.
Using [BeEM](https://github.com/kad-ecoli/BeEM)
Input:
infile: The input mmCIF or PDBx file.
Output:
outfile: The output PDB file.
The "outfmt" set to 3 to always output a single PDB file.
Envs:
tool (choice): The tool to use for conversion.
- maxit: Use MAXIT.
- beem: Use BeEM.
maxit: The path to the MAXIT executable.
beem: The path to the BeEM executable.
<more>: Other options for MAXIT/BeEM.
For BeEM, "outfmt" will not be used as it is set to 3.
"""
input = "infile:file"
output = "outfile:file:{{in.infile | stem}}.pdb"
lang = config.lang.python
envs = {
"tool": "maxit",
"maxit": config.exe.maxit,
"beem": config.exe.beem,
}
script = "file://../scripts/protein/MMCIF2PDB.py"
class RMSD(Proc):DOCS
"""Calculate the RMSD between two structures.
See also https://github.com/charnley/rmsd.
If the input is in mmCIF format, convert it to PDB first.
Input:
infile1: The first structure file.
infile2: The second structure file.
Output:
outfile: The output file containing the RMSD value.
Envs:
beem: The path to the BeEM executable.
calculate_rmsd: The path to the calculate_rmsd executable.
conv_tool (choice): The tool to use for conversion.
- maxit: Use MAXIT.
- beem: Use BeEM.
ca_only (flag): Whether to calculate RMSD using only C-alpha atoms.
duel (choice): How to handle the duel atoms. Default is "keep".
- keep: Keep both atoms.
- keep_first: Keep the first atom.
- keep_last: Keep the last atom.
- average: Average the coordinates.
reorder (flag): Whether to reorder the atoms in the structures.
<more>: Other options for calculate_rmsd.
"""
input = "infile1:file, infile2:file"
output = "outfile:file:{{in.infile1 | stem}}-{{in.infile2 | stem}}.rmsd.txt"
lang = config.lang.python
envs = {
"maxit": config.exe.maxit,
"beem": config.exe.beem,
"calculate_rmsd": config.exe.calculate_rmsd,
"conv_tool": "maxit",
"ca_only": False,
"duel": "keep",
"reorder": True,
}
script = "file://../scripts/protein/RMSD.py"