Skip to contents

Box plot or violin plot with optional jitter points, trend line, statistical test, background, line, and highlight.

Usage

BoxPlot(
  data,
  x,
  x_sep = "_",
  y = NULL,
  in_form = c("long", "wide"),
  split_by = NULL,
  split_by_sep = "_",
  sort_x = c("none", "mean_asc", "mean_desc", "mean", "median_asc", "median_desc",
    "median"),
  flip = FALSE,
  keep_empty = FALSE,
  group_by = NULL,
  group_by_sep = "_",
  group_name = NULL,
  x_text_angle = ifelse(isTRUE(flip) && isTRUE(stack), 90, 45),
  fill_mode = ifelse(!is.null(group_by), "dodge", "x"),
  fill_reverse = FALSE,
  theme = "theme_this",
  theme_args = list(),
  palette = "Paired",
  palcolor = NULL,
  alpha = 1,
  aspect.ratio = NULL,
  legend.position = "right",
  legend.direction = "vertical",
  add_point = FALSE,
  pt_color = "grey30",
  pt_size = NULL,
  pt_alpha = 1,
  jitter_width = 0.5,
  jitter_height = 0.1,
  stack = FALSE,
  y_max = NULL,
  y_min = NULL,
  add_trend = FALSE,
  trend_color = NULL,
  trend_linewidth = 1,
  trend_ptsize = 2,
  add_stat = NULL,
  stat_name = NULL,
  stat_color = "black",
  stat_size = 1,
  stat_stroke = 1,
  stat_shape = 25,
  add_bg = FALSE,
  bg_palette = "stripe",
  bg_palcolor = NULL,
  bg_alpha = 0.2,
  add_line = NULL,
  line_color = "red2",
  line_width = 0.6,
  line_type = 2,
  highlight = NULL,
  highlight_color = "red2",
  highlight_size = 1,
  highlight_alpha = 1,
  comparisons = NULL,
  ref_group = NULL,
  pairwise_method = "wilcox.test",
  multiplegroup_comparisons = FALSE,
  multiple_method = "kruskal.test",
  sig_label = c("p.signif", "p.format"),
  sig_labelsize = 3.5,
  facet_by = NULL,
  facet_scales = "fixed",
  facet_ncol = NULL,
  facet_nrow = NULL,
  facet_byrow = TRUE,
  title = NULL,
  subtitle = NULL,
  xlab = NULL,
  ylab = NULL,
  seed = 8525,
  combine = TRUE,
  nrow = NULL,
  ncol = NULL,
  byrow = TRUE,
  ...
)

ViolinPlot(
  data,
  x,
  x_sep = "_",
  y = NULL,
  in_form = c("long", "wide"),
  split_by = NULL,
  split_by_sep = "_",
  sort_x = c("none", "mean_asc", "mean_desc", "mean", "median_asc", "median_desc",
    "median"),
  flip = FALSE,
  keep_empty = FALSE,
  group_by = NULL,
  group_by_sep = "_",
  group_name = NULL,
  x_text_angle = ifelse(isTRUE(flip) && isTRUE(stack), 90, 45),
  fill_mode = ifelse(!is.null(group_by), "dodge", "x"),
  fill_reverse = FALSE,
  theme = "theme_this",
  theme_args = list(),
  palette = "Paired",
  palcolor = NULL,
  alpha = 1,
  aspect.ratio = NULL,
  legend.position = "right",
  legend.direction = "vertical",
  add_point = FALSE,
  pt_color = "grey30",
  pt_size = NULL,
  pt_alpha = 1,
  jitter_width = 0.5,
  jitter_height = 0.1,
  stack = FALSE,
  y_max = NULL,
  y_min = NULL,
  add_box = FALSE,
  box_color = "black",
  box_width = 0.1,
  box_ptsize = 2.5,
  add_trend = FALSE,
  trend_color = NULL,
  trend_linewidth = 1,
  trend_ptsize = 2,
  add_stat = NULL,
  stat_name = NULL,
  stat_color = "black",
  stat_size = 1,
  stat_stroke = 1,
  stat_shape = 25,
  add_bg = FALSE,
  bg_palette = "stripe",
  bg_palcolor = NULL,
  bg_alpha = 0.2,
  add_line = NULL,
  line_color = "red2",
  line_width = 0.6,
  line_type = 2,
  highlight = NULL,
  highlight_color = "red2",
  highlight_size = 1,
  highlight_alpha = 1,
  comparisons = NULL,
  ref_group = NULL,
  pairwise_method = "wilcox.test",
  multiplegroup_comparisons = FALSE,
  multiple_method = "kruskal.test",
  sig_label = c("p.signif", "p.format"),
  sig_labelsize = 3.5,
  facet_by = NULL,
  facet_scales = "fixed",
  facet_ncol = NULL,
  facet_nrow = NULL,
  facet_byrow = TRUE,
  title = NULL,
  subtitle = NULL,
  xlab = NULL,
  ylab = NULL,
  seed = 8525,
  combine = TRUE,
  nrow = NULL,
  ncol = NULL,
  byrow = TRUE,
  ...
)

Arguments

data

A data frame.

x

A character string specifying the column name of the data frame to plot for the x-axis.

x_sep

A character string to concatenate the columns in x, if multiple columns are provided. When in_form is "wide", x columns will not be concatenated.

y

A character string specifying the column name of the data frame to plot for the y-axis.

in_form

A character string to specify the input data type. Either "long" or "wide".

split_by

The column(s) to split data by and plot separately.

split_by_sep

The separator for multiple split_by columns. See split_by

sort_x

A character string to specify the sorting of x-axis. Either "none", "mean_asc", "mean_desc", "mean", "median_asc", "median_desc", "median".

flip

A logical value to flip the plot.

keep_empty

A logical value indicating whether to keep empty groups. If FALSE, empty groups will be removed.

group_by

Columns to group the data for plotting For those plotting functions that do not support multiple groups, They will be concatenated into one column, using group_by_sep as the separator

group_by_sep

The separator for multiple group_by columns. See group_by

group_name

A character string to name the legend of dodge.

x_text_angle

A numeric value specifying the angle of the x-axis text.

fill_mode

A character string to specify the fill mode. Either "dodge", "x", "mean", "median".

fill_reverse

A logical value to reverse the fill colors for gradient fill (mean/median).

theme

A character string or a theme class (i.e. ggplot2::theme_classic) specifying the theme to use. Default is "theme_this".

theme_args

A list of arguments to pass to the theme function.

palette

A character string specifying the palette to use.

palcolor

A character string specifying the color to use in the palette.

alpha

A numeric value specifying the transparency of the plot.

aspect.ratio

A numeric value specifying the aspect ratio of the plot.

legend.position

A character string specifying the position of the legend. if waiver(), for single groups, the legend will be "none", otherwise "right".

legend.direction

A character string specifying the direction of the legend.

add_point

A logical value to add (jitter) points to the plot.

pt_color

A character string to specify the color of the points.

pt_size

A numeric value to specify the size of the points.

pt_alpha

A numeric value to specify the transparency of the points.

jitter_width

A numeric value to specify the width of the jitter.

jitter_height

A numeric value to specify the height of the jitter.

stack

A logical value whether to stack the facetted plot by 'facet_by'.

y_max

A numeric value or a character string to specify the maximum value of the y-axis.

y_min

A numeric value or a character string to specify the minimum value of the y-axis.

add_trend

A logical value to add trend line to the plot.

trend_color

A character string to specify the color of the trend line.

trend_linewidth

A numeric value to specify the width of the trend line.

trend_ptsize

A numeric value to specify the size of the trend line points.

add_stat

A character string to add statistical test to the plot.

stat_name

A character string to specify the name of the stat legend.

stat_color

A character string to specify the color of the statistical test.

stat_size

A numeric value to specify the size of the statistical test.

stat_stroke

A numeric value to specify the stroke of the statistical test.

stat_shape

A numeric value to specify the shape of the statistical test.

add_bg

A logical value to add background to the plot.

bg_palette

A character string to specify the palette of the background.

bg_palcolor

A character vector to specify the colors of the background.

bg_alpha

A numeric value to specify the transparency of the background.

add_line

A character string to add a line to the plot.

line_color

A character string to specify the color of the line.

line_width

A numeric value to specify the size of the line.

line_type

A numeric value to specify the type of the line.

highlight

A vector of character strings to highlight the points. It should be a subset of the row names of the data. If TRUE, it will highlight all points.

highlight_color

A character string to specify the color of the highlighted points.

highlight_size

A numeric value to specify the size of the highlighted points.

highlight_alpha

A numeric value to specify the transparency of the highlighted points.

comparisons

A logical value or a list of vectors to perform pairwise comparisons.

ref_group

A character string to specify the reference group for comparisons.

pairwise_method

A character string to specify the pairwise comparison method.

multiplegroup_comparisons

A logical value to perform multiple group comparisons.

multiple_method

A character string to specify the multiple group comparison method.

sig_label

A character string to specify the label of the significance test.

sig_labelsize

A numeric value to specify the size of the significance test label.

facet_by

A character string specifying the column name of the data frame to facet the plot. Otherwise, the data will be split by split_by and generate multiple plots and combine them into one using patchwork::wrap_plots

facet_scales

Whether to scale the axes of facets. Default is "fixed" Other options are "free", "free_x", "free_y". See ggplot2::facet_wrap

facet_ncol

A numeric value specifying the number of columns in the facet. When facet_by is a single column and facet_wrap is used.

facet_nrow

A numeric value specifying the number of rows in the facet. When facet_by is a single column and facet_wrap is used.

facet_byrow

A logical value indicating whether to fill the plots by row. Default is TRUE.

title

A character string specifying the title of the plot. A function can be used to generate the title based on the default title. This is useful when split_by is used and the title needs to be dynamic.

subtitle

A character string specifying the subtitle of the plot.

xlab

A character string specifying the x-axis label.

ylab

A character string specifying the y-axis label.

seed

The random seed to use. Default is 8525.

combine

Whether to combine the plots into one when facet is FALSE. Default is TRUE.

nrow

A numeric value specifying the number of rows in the facet.

ncol

A numeric value specifying the number of columns in the facet.

byrow

A logical value indicating whether to fill the plots by row.

...

Additional arguments.

add_box

A logical value to add box plot to the plot.

box_color

A character string to specify the color of the box plot.

box_width

A numeric value to specify the width of the box plot.

box_ptsize

A numeric value to specify the size of the box plot points in the middle.

Examples

set.seed(8525)
data <- data.frame(
    x = rep(LETTERS[1:8], 40),
    y = rnorm(320),
    group1 = sample(c("g1", "g2"), 320, replace = TRUE),
    group2 = sample(c("h1", "h2", "h3", "h4"), 320, replace = TRUE)
)

BoxPlot(data, x = "x", y = "y")

BoxPlot(data,
    x = "x", y = "y",
    stack = TRUE, flip = TRUE, facet_by = "group1",
    add_bg = TRUE, bg_palette = "Paired"
)


# wide form data
data_wide <- data.frame(
    A = rnorm(100),
    B = rnorm(100),
    C = rnorm(100)
)
BoxPlot(data_wide, x = c("A", "B", "C"), in_form = "wide")

ViolinPlot(data, x = "x", y = "y")

ViolinPlot(data, x = "x", y = "y", add_box = TRUE)

ViolinPlot(data, x = "x", y = "y", add_point = TRUE)

ViolinPlot(data, x = "x", y = "y", add_trend = TRUE)

ViolinPlot(data, x = "x", y = "y", add_stat = mean)

ViolinPlot(data, x = "x", y = "y", add_bg = TRUE)

ViolinPlot(data, x = "x", y = "y", add_line = 0)

ViolinPlot(data, x = "x", y = "y", group_by = "group1")

ViolinPlot(data,
    x = "x", y = "y", group_by = "group1",
    facet_by = "group2", add_box = TRUE
)
#> Warning: Groups with fewer than two datapoints have been dropped.
#>  Set `drop = FALSE` to consider such groups for position adjustment purposes.
#> Warning: Groups with fewer than two datapoints have been dropped.
#>  Set `drop = FALSE` to consider such groups for position adjustment purposes.

ViolinPlot(data, x = "x", y = "y", add_point = TRUE, highlight = 'group1 == "g1"',
    alpha = 0.8, highlight_size = 1.5, pt_size = 1, add_box = TRUE)

ViolinPlot(data,
    x = "x", y = "y", group_by = "group1",
    comparisons = TRUE
)

ViolinPlot(data,
    x = "x", y = "y", sig_label = "p.format",
    facet_by = "group2", comparisons = list(c("A", "B"))
)

ViolinPlot(data,
    x = "x", y = "y", fill_mode = "mean",
    facet_by = "group2", palette = "Blues"
)

ViolinPlot(data,
    x = "x", y = "y", stack = TRUE,
    facet_by = "group2", add_box = TRUE, add_bg = TRUE,
    bg_palette = "Paired"
)