"""Plotting data"""
from ..core.proc import Proc
from ..core.config import config
class VennDiagram(Proc):DOCS
"""Plot Venn diagram
Needs `ggVennDiagram`
Input:
infile: The input file for data
If `envs.intype` is raw, it should be a data frame with row names
as categories and only column as elements separated by comma (`,`)
If it is `computed`, it should be a data frame with row names
the elements and columns the categories. The data should be binary
indicator (`0, 1`) indicating whether the elements are present
in the categories.
Output:
outfile: The output figure file
Envs:
inopts: The options for `read.table()` to read `in.infile`
intype: `raw` or `computed`. See `in.infile`
devpars: The parameters for `png()`
args: Additional arguments for `ggVennDiagram()`
ggs: Additional ggplot expression to adjust the plot
"""
input = "infile:file"
output = "outfile:file:{{in.infile | stem}}.venn.png"
lang = config.lang.rscript
envs = {
"inopts": {"row.names": -1, "header": False},
"intype": "raw",
"devpars": {"res": 100, "width": 800, "height": 600},
"args": {},
"ggs": None,
}
script = "file://../scripts/plot/VennDiagram.R"
class Heatmap(Proc):DOCS
"""Plot heatmaps using `ComplexHeatmap`
Examples:
>>> pipen run plot Heatmap \
>>> --in.infile data.txt \
>>> --in.annofiles anno.txt \
>>> --envs.args.row_names_gp 'r:fontsize5' \
>>> --envs.args.column_names_gp 'r:fontsize5' \
>>> --envs.args.clustering_distance_rows pearson \
>>> --envs.args.clustering_distance_columns pearson \
>>> --envs.args.show_row_names false \
>>> --envs.args.row_split 3 \
>>> --args.devpars.width 5000 \
>>> --args.devpars.height 5000 \
>>> --args.draw.merge_legends \
>>> --envs.args.heatmap_legend_param.title AUC \
>>> --envs.args.row_dend_reorder \
>>> --envs.args.column_dend_reorder \
>>> --envs.args.top_annotation \
>>> 'r:HeatmapAnnotation( \
>>> Mutation = as.matrix(annos[,(length(groups)+1):ncol(annos)]) \
>>> )' \
>>> --envs.args.right_annotation \
>>> 'r:rowAnnotation( \
>>> AUC = anno_boxplot(as.matrix(data), outline = F) \
>>> )' \
>>> --args.globals \
>>> 'fontsize8 = gpar(fontsize = 12); \
>>> fontsize5 = gpar(fontsize = 8); \
>>> groups = c ("Group1", "Group2", "Group3")' \
>>> --args.seed 8525
Input:
infile: The data matrix file
annofiles: The files for annotation data
Output:
outfile: The heatmap plot
outdir: Other data of the heatmap
Including RDS file of the heatmap, row clusters and col clusters.
Envs:
inopts: Options for `read.table()` to read `in.infile`
anopts: Options for `read.table()` to read `in.annofiles`
draw: Options for `ComplexHeatmap::draw()`
args: Arguments for `ComplexHeatmap::Heatmap()`
devpars: The parameters for device.
seed: The seed
globals: Some globals for the expression in `args` to be evaluated
Requires:
bioconductor-complexheatmap:
- check: {{proc.lang}} <(echo "library(ComplexHeatmap)")
"""
input = "infile:file, annofiles:files"
output = [
'outfile:file:{{in.infile | stem0 | append: ".heatmap"}}/'
'{{in.infile | stem0 | append: ".heatmap"}}.png',
'outdir:dir:{{in.infile | stem0 | append: ".heatmap"}}',
]
lang = config.lang.rscript
envs = {
"inopts": {"header": True, "row.names": -1},
"anopts": {"header": True, "row.names": -1},
"draw": {},
"devpars": {},
"args": {"heatmap_legend_param": {}},
"seed": None,
"globals": "",
}
script = "file://../scripts/plot/Heatmap.R"
class ROC(Proc):DOCS
"""Plot ROC curve using [`plotROC`](https://cran.r-project.org/web/packages/plotROC/vignettes/examples.html).
Input:
infile: The input file for data, tab-separated.
The first column should be ids of the records (this is optional if `envs.noids` is True).
The second column should be the labels of the records (1 for positive, 0 for negative).
If they are not binary, you can specify the positive label by `envs.pos_label`.
From the third column, it should be the scores of the different models.
Output:
outfile: The output figure file
Envs:
noids: Whether the input file has ids (first column) or not.
pos_label: The positive label.
ci: Whether to use `geom_rocci()` instead of `geom_roc()`.
devpars: The parameters for `png()`
args: Additional arguments for `geom_roc()` or `geom_rocci()` if `envs.ci` is True.
style_roc: Arguments for `style_roc()`
""" # noqa: E501
input = "infile:file"
output = "outfile:file:{{in.infile | stem}}.roc.png"
lang = config.lang.rscript
envs = {
"noids": False,
"pos_label": 1,
"ci": False,
"devpars": {"res": 100, "width": 750, "height": 600},
"args": {"labels": False},
"style_roc": {},
"show_auc": True,
}
script = "file://../scripts/plot/ROC.R"
class Manhattan(Proc):DOCS
"""Plot Manhattan plot.
Using the [`ggmanh`](https://bioconductor.org/packages/devel/bioc/vignettes/ggmanh/inst/doc/ggmanh.html) package.
Requires `ggmanh` v1.9.6 or later.
Input:
infile: The input file for data
It should contain at least three columns, the chromosome, the position
and the p-value of the SNPs.
Header is required.
Output:
outfile: The output figure file
Envs:
chrom_col: The column for chromosome
An integer (1-based) or a string indicating the column name.
pos_col: The column for position
An integer (1-based) or a string indicating the column name.
pval_col: The column for p-value
An integer (1-based) or a string indicating the column name.
label_col: The column for label.
Once specified, the significant SNPs will be labeled on the plot.
devpars (ns): The parameters for `png()`
- res (type=int): The resolution
- width (type=int): The width
- height (type=int): The height
title: The title of the plot
ylabel: The y-axis label
rescale (flag): Whether to rescale the p-values
rescale_ratio_threshold (type=float): Threshold of that triggers the rescale
signif (auto): A single value or a list of values to indicate the significance levels
Multiple values should be also separated by comma (`,`).
The minimum value will be used as the cutoff to determine if the SNPs are significant.
hicolors (auto): The colors for significant and non-significant SNPs
If a single color is given, the non-significant SNPs will be in grey.
Set it to None to disable the highlighting.
thin_n (type=int): Number of max points per horizontal partitions of the plot.
`0` or `None` to disable thinning.
thin_bins (type=int): Number of bins to partition the data.
zoom (auto): Chromosomes to zoom in
Each chromosome should be separated by comma (`,`) or in a list. Single chromosome is also accepted.
Ranges are also accepted, see `envs.chroms`.
Each chromosome will be saved in a separate file.
zoom_devpars (ns): The parameters for the zoomed plot
- width (type=int): The width
- height (type=int): The height, inherited from `devpars` by default
- res (type=int): The resolution, inherited from `devpars` by default
chroms (auto): The chromosomes and order to plot
A hyphen (`-`) can be used to indicate a range.
For example `chr1-22,chrX,chrY,chrM` will plot all autosomes, X, Y and M.
if `auto`, only the chromosomes in the data will be plotted in the order
they appear in the data.
args (ns): Additional arguments for `manhattan_plot()`.
See <https://rdrr.io/github/leejs-abv/ggmanh/man/manhattan_plot.html>.
Note that `-` will be replaced by `.` in the argument names.
- <more>: Additional arguments for `manhattan_plot()`
""" # noqa: E501
input = "infile:file"
output = "outfile:file:{{in.infile | stem0}}.manhattan.png"
lang = config.lang.rscript
envs = {
"chrom_col": 1,
"pos_col": 2,
"pval_col": 3,
"label_col": None,
"devpars": {"res": 100, "width": 1000, "height": 500},
"zoom_devpars": {"width": 500, "height": None, "res": None},
"title": None,
"ylabel": "-log10(p-value)",
"rescale": True,
"rescale_ratio_threshold": 5,
"signif": [5e-8, 1e-5],
"hicolors": None,
"thin_n": None,
"thin_bins": 200,
"zoom": None,
"chroms": "auto",
"args": {},
}
script = "file://../scripts/plot/Manhattan.R"
class QQPlot(Proc):DOCS
"""Generate QQ-plot or PP-plot using qqplotr.
See <https://cran.r-project.org/web/packages/qqplotr/vignettes/introduction.html>.
Input:
infile: The input file for data
It should contain at least one column of p-values or the values to be
plotted. Header is required.
theorfile: The file for theoretical values (optional)
This file should contain at least one column of theoretical values.
The values will be passed to `envs.theor_qfunc` to calculate the theoretical
quantiles.
Header is required.
Output:
outfile: The output figure file
Envs:
val_col: The column for values to be plotted
An integer (1-based) or a string indicating the column name.
devpars (ns): The parameters for `png()`
- res (type=int): The resolution
- width (type=int): The width
- height (type=int): The height
xlabel: The x-axis label
ylabel: The y-axis label
title: The title of the plot
trans: The transformation of the values
You can use `-log10` to transform the values to `-log10(values)`.
Otherwise you can a direct R function or a custom R function.
For example `function(x) -log10(x)`.
kind (choice): The kind of the plot, `qq` or `pp`
- qq: QQ-plot
- pp: PP-plot
theor_col: The column for theoretical values in `in.theorfile` if provided,
otherwise in `in.infile`.
An integer (1-based) or a string indicating the column name.
If `distribution` of `band`, `line`, or `point` is `custom`, this column
must be provided.
theor_trans: The transformation of the theoretical values.
The `theor_funs` have default functions to take the theoretical values.
This transformation will be applied to the theoretical values before
passing to the `theor_funs`.
theor_funs (ns): The R functions to generate density, quantile and deviates
of the theoretical distribution base on the theoretical values
if `distribution` of `band`, `line`, or `point` is `custom`.
- dcustom: The density function, used by band
- qcustom: The quantile function, used by point
- rcustom: The deviates function, used by line
args (ns): The common arguments for `envs.band`, `envs.line` and `envs.point`.
- distribution: The distribution of the theoretical quantiles
When `custom` is used, the `envs.theor_col` should be provided and
`values` will be added to `dparams` automatically.
- dparams (type=json): The parameters for the distribution
- <more>: Other shared arguments between `stat_*_band`, `stat_*_line`
and `stat_*_point`.
band (ns): The arguments for `stat_qq_band()` or `stat_pp_band()`.
See <https://rdrr.io/cran/qqplotr/man/stat_qq_band.html> and
<https://rdrr.io/cran/qqplotr/man/stat_pp_band.html>.
Set to `None` or `band.disabled` to True to disable the band.
- disabled (flag): Disable the band
- distribution: The distribution of the theoretical quantiles
When `custom` is used, the `envs.theor_col` should be provided and
`values` will be added to `dparams` automatically.
- dparams (type=json): The parameters for the distribution
- <more>: Additional arguments for `stat_qq_band()` or `stat_pp_band()`
line (ns): The arguments for `stat_qq_line()` or `stat_pp_line()`.
See <https://rdrr.io/cran/qqplot/man/stat_qq_line.html> and
<https://rdrr.io/cran/qqplot/man/stat_pp_line.html>.
Set to `None` or `line.disabled` to True to disable the line.
- disabled (flag): Disable the line
- distribution: The distribution of the theoretical quantiles
When `custom` is used, the `envs.theor_col` should be provided and
`values` will be added to `dparams` automatically.
- dparams (type=json): The parameters for the distribution
- <more>: Additional arguments for `stat_qq_line()` or `stat_pp_line()`
point (ns): The arguments for `geom_qq_point()` or `geom_pp_point()`.
See <https://rdrr.io/cran/qqplot/man/stat_qq_point.html> and
<https://rdrr.io/cran/qqplot/man/stat_pp_point.html>.
Set to `None` or `point.disabled` to True to disable the point.
- disabled (flag): Disable the point
- distribution: The distribution of the theoretical quantiles
When `custom` is used, the `envs.theor_col` should be provided and
`values` will be added to `dparams` automatically.
- dparams (type=json): The parameters for the distribution
- <more>: Additional arguments for `geom_qq_point()` or `geom_pp_point()`
ggs (list): Additional ggplot expression to adjust the plot.
"""
input = "infile:file, theorfile:file"
output = "outfile:file:{{in.infile | stem}}.{{envs.kind}}.png"
lang = config.lang.rscript
envs = {
"val_col": 1,
"theor_col": None,
"theor_trans": None,
"theor_funs": {
"dcustom": """
function(x, values, ...) {
density(values, from = min(values), to = max(values), n = length(x))$y
}
""",
"qcustom": "function(p, values, ...) {quantile(values, probs = p)}",
"rcustom": "function(n, values, ...) { sample(values, n, replace = TRUE) }",
},
"args": {"distribution": "norm", "dparams": {}},
"devpars": {"res": 100, "width": 1000, "height": 1000},
"xlabel": "Theoretical Quantiles",
"ylabel": "Observed Quantiles",
"title": "QQ-plot",
"trans": None,
"kind": "qq",
"band": {"disabled": False, "distribution": None, "dparams": None},
"line": {"disabled": False, "distribution": None, "dparams": None},
"point": {"disabled": False, "distribution": None, "dparams": None},
"ggs": None,
}
script = "file://../scripts/plot/QQPlot.R"
class Scatter(Proc):DOCS
"""Generate scatter plot using ggplot2.
[`ggpmisc`](https://cran.r-project.org/web/packages/ggpmisc/index.html) is used
for the stats and labels.
See also https://cran.r-project.org/web/packages/ggpmisc/vignettes/model-based-annotations.html
Input:
infile: The input file for data
It should contain at least two columns for x and y values.
Header is required.
Output:
outfile: The output figure file
Envs:
x_col: The column for x values
An integer (1-based) or a string indicating the column name.
y_col: The column for y values
An integer (1-based) or a string indicating the column name.
devpars (ns): The parameters for `png()`
- res (type=int): The resolution
- width (type=int): The width
- height (type=int): The height
args (ns): Additional arguments for `geom_point()`
See <https://ggplot2.tidyverse.org/reference/geom_point.html>.
- <more>: Additional arguments for `geom_point()`
mapping: Extra mapping for all geoms, including `stats`.
Should be `aes(color = group)` but all these are valid: `color = group` or
`(color = group)`.
ggs (list): Additional ggplot expression to adjust the plot.
formula: The formula for the model
stats (type=json): The stats to add to the plot.
A dict with keys available stats in `ggpmisc` (without `stat_`).
See <https://cran.r-project.org/web/packages/ggpmisc/vignettes/model-based-annotations.html#statistics>.
The values should be the arguments for the stats.
If you want a stat to be added multiple times, add a suffix `#x` to the key.
For example, `poly_line#1` and `poly_line#2` will add two polynomial lines.
""" # noqa: E501
input = "infile:file"
output = "outfile:file:{{in.infile | stem}}.scatter.png"
lang = config.lang.rscript
envs = {
"x_col": 1,
"y_col": 2,
"devpars": {"res": 100, "width": 1000, "height": 800},
"args": {},
"mapping": None,
"ggs": [],
"formula": "y ~ x",
"stats": {},
}
script = "file://../scripts/plot/Scatter.R"