datar.apis.dplyr

module

datar.apis.dplyr

</>

Functions

across(_data, *args, _names, **kwargs) (T) — Apply the same transformation to multiple columns</>
add_count(_data, *args, wt, sort, name, **kwargs) (Any) — Add a count column to a data frame</>
add_tally(_data, wt, sort, name) (Any) — Add a count column to a data frame</>
all_of(_data, x) (Any) — For strict selection.</>
anti_join(x, y, by, copy, na_matches) (Any) — Anti join two data frames by matching rows.</>
any_of(_data, x, vars) (Any) — For strict selection.</>
arrange(_data, *args, _by_group, **kwargs) (Any) — orders the rows of a data frame by the values of selected columns.</>
between(x, left, right, inclusive) (Any) — Check if a value is between two other values</>
bind_cols(*data, _name_repair, _copy) (Any) — Bind columns of give dataframes</>
bind_rows(*data, _id, _copy, **kwargs) (Any) — Bind rows of give dataframes</>
c_across(_data, _cols) (T) — Apply the same transformation to multiple columns rowwisely</>
case_match(_x, *args, _default, _dtypes) (T) — This function allows you to vectorise multiple switch() statements.Each case is evaluated sequentially and the first match for each element determines the corresponding value in the output vector. If no cases match, the _default is used. </>
case_when(cond, value, *more_cases) (Any) — Vectorise multiple if_else() statements.</>
coalesce(x, *replace) (Any) — Replace missing values with the first non-missing value</>
consecutive_id(x, *args) (Sequence) — Generate consecutive ids</>
contains(_data, match, ignore_case, vars) (Any) — Select columns that contain a string.</>
count(_data, *args, wt, sort, name, _drop, **kwargs) (Any) — Count the number of rows in each group</>
cross_join(x, y, copy, suffix) (T) — Cross joins match each row in x to every row in y, resulting in adata frame with nrow(x) * nrow(y) rows. </>
cumall(x) (Any) — Get cumulative bool. All cases after first False</>
cumany(x) (Any) — Get cumulative bool. All cases after first True</>
cume_dist(x, na_last) (Any) — Get the cume_dist of x</>
cummean(x, na_rm) (Any) — Cumulative mean</>
cur_column(_data, _name) (Any) — Get the current column</>
cur_data(_data) (Any) — Get the current dataframe</>
cur_data_all(_data) (Any) — Get the current data for the current group includingthe grouping variables </>
cur_group(_data) (Any) — Get the current group</>
cur_group_id(_data) (Any) — Get the current group id</>
cur_group_rows(_data) (Any) — Get the current group row indices</>
dense_rank(x, na_last) (Any) — Get the dense rank of x</>
desc(x) (Any) — Transform a vector into a format that will be sorted in descending order</>
distinct(_data, *args, keep_all, _preserve) (Any) — Filter a data frame based on conditions</>
ends_with(_data, match, ignore_case, vars) (Any) — Select columns that end with a string.</>
everything(_data) (Any) — Select all variables.</>
filter_(_data, *conditions, _preserve) (Any) — Filter a data frame based on conditions</>
first(x, order_by, default) (Any) — Extract the first element of a vector</>
full_join(x, y, by, copy, suffix, keep, na_matches, multiple, unmatched, relationship) (Any) — Full join two data frames by matching rows.</>
glimpse(_data, width, formatter) (Any) — Display a summary of a data frame</>
group_by(_data, *args, _add, _drop) (Any) — Create a grouped frame</>
group_by_drop_default(_data) (Any) — Get the default value of _drop of a frame</>
group_cols(_data) (Any) — Get the group columns of a frame</>
group_data(_data) (Any) — Get the group data of a frame</>
group_indices(_data) (Any) — Get the group indices of a frame</>
group_keys(_data) (Any) — Get the group keys of a frame</>
group_map(_data, _f, *args, _keep, **kwargs) (Any) — Apply a function to each group</>
group_modify(_data, _f, *args, _keep, **kwargs) (Any) — Apply a function to each group</>
group_rows(_data) (Any) — Get the group rows of a frame</>
group_size(_data) (Any) — Get the group sizes of a frame</>
group_split(_data, *args, _keep, **kwargs) (Any) — Split a grouped frame into a list of data frames</>
group_trim(_data, _drop) (Any) — Remove empty groups</>
group_vars(_data) (Any) — Get the grouping variables of a frame</>
group_walk(_data, _f, *args, _keep, **kwargs) (Any) — Apply a function to each group</>
if_all(_data, *args, _names, **kwargs) (Any) — Apply the same predicate function to a selection of columns and combinethe results True if all elements are True. </>
if_any(_data, *args, _names, **kwargs) (Any) — Apply the same predicate function to a selection of columns and combinethe results True if any element is True. </>
if_else(condition, true, false, missing) (Any) — Where condition is TRUE, the matching value from true, where it's FALSE,the matching value from false, otherwise missing. </>
inner_join(x, y, by, copy, suffix, keep, na_matches, multiple, unmatched, relationship) (Any) — Inner join two data frames by matching rows.</>
lag(x, n, default, order_by) (Any) — Shift a vector by n positions.</>
last(x, order_by, default) (Any) — Extract the last element of a vector</>
last_col(_data, offset, vars) (Any) — Select the last column.</>
lead(x, n, default, order_by) (Any) — Shift a vector by n positions.</>
left_join(x, y, by, copy, suffix, keep, na_matches, multiple, unmatched, relationship) (Any) — Left join two data frames by matching rows.</>
matches(_data, match, ignore_case, vars) (Any) — Select columns that match a regular expression.</>
min_rank(x, na_last) (Any) — Get the min rank of x</>
mutate(_data, *args, _keep, _before, _after, **kwargs) (Any) — Add new columns to a data frame.</>
n(_data) (Any) — Get the current group size</>
n_distinct(_data, na_rm) (Any) — Count the number of distinct values</>
n_groups(_data) (int) — Get the number of groups of a frame</>
na_if(x, value) (Any) — Replace values with missing values</>
near(x, y, tol) (Any) — Check if values are approximately equal</>
nest_join(x, y, by, copy, keep, name, na_matches, unmatched) (Any) — Nest join two data frames by matching rows.</>
nth(x, n, order_by, default) (Any) — Extract the nth element of a vector</>
ntile(x, n) (Any) — a rough rank, which breaks the input vector into n buckets.The size of the buckets may differ by up to one, larger buckets have lower rank. </>
num_range(prefix, range_, width) (Any) — Matches a numerical range like x01, x02, x03.</>
order_by(order, call) (Any) — Order the data by the given order</>
percent_rank(x, na_last) (Any) — Get the percent rank of x</>
pick(_data, *args) (T) — Pick columns by name</>
pull(_data, var, name, to) (Any) — Pull a series or a dataframe from a dataframe</>
recode(_x, *args, _default, _missing, **kwargs) (Any) — Recode a vector, replacing elements in it</>
recode_factor(_x, *args, _default, _missing, _ordered, **kwargs) (Any) — Recode a factor, replacing levels in it</>
relocate(_data, *args, _before, _after, **kwargs) (Any) — change column positions</>
rename(_data, **kwargs) (Any) — Rename columns</>
rename_with(_data, _fn, *args, **kwargs) (Any) — Rename columns with a function</>
right_join(x, y, by, copy, suffix, keep, na_matches, multiple, unmatched, relationship) (Any) — Right join two data frames by matching rows.</>
row_number(x) (Any) — Get the row number of x</>
rows_append(x, y, **kwargs) (Any) — Append rows in y to x</>
rows_delete(x, y, by, unmatched, **kwargs) (Any) — Delete rows in x that match keys in y</>
rows_insert(x, y, by, conflict, **kwargs) (Any) — Insert rows from y into x</>
rows_patch(x, y, by, unmatched, **kwargs) (Any) — Patch rows in x with values from y</>
rows_update(x, y, by, unmatched, **kwargs) (Any) — Update rows in x with values from y</>
rows_upsert(x, y, by, **kwargs) (Any) — Upsert rows in x with values from y</>
rowwise(_data, *cols) (Any) — Create a rowwise frame</>
select(_data, *args, **kwargs) (Any) — Select columns from a data frame.</>
semi_join(x, y, by, copy, na_matches) (Any) — Semi join two data frames by matching rows.</>
slice_(_data, *args, _preserve) (Any) — Extract rows by their position</>
slice_head(_data, n, prop) (Any) — Extract the first rows</>
slice_max(_data, order_by, n, prop, with_ties) (Any) — Extract rows with the maximum value</>
slice_min(_data, order_by, n, prop, with_ties) (Any) — Extract rows with the minimum value</>
slice_sample(_data, n, prop, weight_by, replace) (Any) — Extract rows by sampling</>
slice_tail(_data, n, prop) (Any) — Extract the last rows</>
starts_with(_data, match, ignore_case, vars) (Any) — Select columns that start with a string.</>
summarise(_data, *args, _groups, **kwargs) (Any) — Summarise a data frame.</>
symdiff(x, y) (T) — Get the symmetric difference of two dataframes</>
tally(_data, wt, sort, name) (Any) — Count the number of rows in each group</>
transmute(_data, *args, _before, _after, **kwargs) (Any) — Add new columns to a data frame and remove existing columnsusing mutate with _keep="none". </>
ungroup(_data, *cols) (Any) — Remove grouping variables</>
union_all(x, y) (Any) — Combine two data frames together.</>
where(_data, fn) (Any) — Selects the variables for which a function returns True.</>
with_groups(_data, _groups, _func, *args, **kwargs) (Any) — Modify the grouping variables for a single operation.</>
with_order(order, func, x, *args, **kwargs) (Any) — Control argument and result of a window function</>

function

`datar.apis.dplyr.pick(_data`, `*args)`

</>

Pick columns by name

The original API: https://dplyr.tidyverse.org/reference/pick.html

Parameters

_data (T) — The dataframe
*args — The columns to pick

Returns (T)

The picked dataframe

function

`datar.apis.dplyr.across(_data`, `*args`, `_names=None`, `**kwargs)`

</>

Apply the same transformation to multiple columns

The original API: https://dplyr.tidyverse.org/reference/across.html

Examples

>>> iris >> mutate(across(c(f.Sepal_Length, f.Sepal_Width), round))
    Sepal_Length  Sepal_Width  Petal_Length  Petal_Width    Species
       <float64>    <float64>     <float64>    <float64>   <object>
0            5.0          4.0           1.4          0.2     setosa
1            5.0          3.0           1.4          0.2     setosa
..           ...          ...           ...          ...        ...

>>> iris >> group_by(f.Species) >> summarise(
>>>     across(starts_with("Sepal"), mean)
>>> )
      Species  Sepal_Length  Sepal_Width
     <object>     <float64>    <float64>
0      setosa         5.006        3.428
1  versicolor         5.936        2.770
2   virginica         6.588        2.974

Parameters

_data (T) — The dataframe.
*args — If given, the first 2 elements should be columns and functionsapply to each of the selected columns. The rest of them will be the arguments for the functions.
_names (optional) — A glue specification that describes how to namethe output columns. This can use {_col} to stand for the selected column name, and {_fn} to stand for the name of the function being applied. The default (None) is equivalent to {_col} for the single function case and {_col}_{_fn} for the case where a list is used for _fns. In such a case, {_fn} is 0-based. To use 1-based index, use {_fn1}
**kwargs — Keyword arguments for the functions
_fn_context — Defines the context to evaluate the arguments for functionsif they are plain functions. Note that registered functions will use its own context

Returns (T)

A dataframe with one column for each column and each function.

function

`datar.apis.dplyr.c_across(_data`, `_cols=None)`

</>

Apply the same transformation to multiple columns rowwisely

Parameters

_data (T) — The dataframe
_cols (optional) — The columns

Returns (T)

A rowwise tibble

function

`datar.apis.dplyr.if_any(_data`, `*args`, `_names=None`, `**kwargs)` → Any

</>

Apply the same predicate function to a selection of columns and combinethe results True if any element is True.

`datar.apis.dplyr.if_all(_data`, `*args`, `_names=None`, `**kwargs)` → Any

</>

Apply the same predicate function to a selection of columns and combinethe results True if all elements are True.

`datar.apis.dplyr.symdiff(x`, `y)`

</>

Get the symmetric difference of two dataframes

It computes the symmetric difference, i.e. all rows in x that aren't in y and all rows in y that aren't in x.

The original API: https://dplyr.tidyverse.org/reference/setops.html

Parameters

x (T) — A dataframe
y (T) — A dataframe

Returns (T)

The symmetric difference of x and y

function

`datar.apis.dplyr.arrange(_data`, `*args`, `_by_group=False`, `**kwargs)`

</>

orders the rows of a data frame by the values of selected columns.

The original API: https://dplyr.tidyverse.org/reference/arrange.html

Parameters

_data — A data frame
_by_group (optional) — If TRUE, will sort first by grouping variable.Applies to grouped data frames only.
**kwargs — Name-value pairs that apply with mutate
*series — Variables, or functions of variables.Use desc() to sort a variable in descending order.

Returns (Any)

An object of the same type as _data.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.

function

`datar.apis.dplyr.bind_rows(*data`, `_id=None`, `_copy=True`, `**kwargs)`

</>

Bind rows of give dataframes

Original APIs https://dplyr.tidyverse.org/reference/bind.html

Parameters

*data — Dataframes to combine
_id (optional) — The name of the id columns
_copy (bool, optional) — If False, do not copy data unnecessarily.Original API does not support this. This argument will be passed by to pandas.concat() as copy argument.
**kwargs — A mapping of dataframe, keys will be used as _id col.

Returns (Any)

The combined dataframe

function

`datar.apis.dplyr.bind_cols(*data`, `_name_repair='unique'`, `_copy=True)`

</>

Bind columns of give dataframes

Note that unlike dplyr, mismatched dimensions are allowed and missing rows will be filled with NAs

Parameters

*data — Dataframes to bind
_name_repair (optional) —
treatment of problematic column names:
- - "minimal": No name repair or checks, beyond basic existence,
- - "unique": Make sure names are unique and not empty,
- - "check_unique": (default value), no name repair,
  but check they are unique,
- - "universal": Make the names unique and syntactic
- - a function: apply custom name repair
_copy (bool, optional) — If False, do not copy data unnecessarily.Original API does not support this. This argument will be passed by to pandas.concat() as copy argument.

Returns (Any)

The combined dataframe

function

`datar.apis.dplyr.cur_column(_data`, `_name)`

</>

Get the current column

Parameters

_data — The dataframe
_name — The column name

Returns (Any)

The current column

function

`datar.apis.dplyr.cur_data(_data)`

</>

Get the current dataframe

Parameters

_data — The dataframe

Returns (Any)

The current dataframe

function

`datar.apis.dplyr.n(_data)`

</>

Get the current group size

Parameters

_data — The dataframe

Returns (Any)

The number of rows

function

`datar.apis.dplyr.cur_data_all(_data)`

</>

Get the current data for the current group includingthe grouping variables

Parameters

_data — The dataframe

Returns (Any)

The current dataframe

function

`datar.apis.dplyr.cur_group(_data)`

</>

Get the current group

Parameters

_data — The dataframe

Returns (Any)

The current group

function

`datar.apis.dplyr.cur_group_id(_data)`

</>

Get the current group id

Parameters

_data — The dataframe

Returns (Any)

The current group id

function

`datar.apis.dplyr.cur_group_rows(_data)`

</>

Get the current group row indices

Parameters

_data — The dataframe

Returns (Any)

The current group rows

function

`datar.apis.dplyr.count(_data`, `*args`, `wt=None`, `sort=False`, `name=None`, `_drop=None`, `**kwargs)`

</>

Count the number of rows in each group

Original API: https://dplyr.tidyverse.org/reference/count.html

Parameters

_data — A data frame
*args — Variables, or functions of variables.Use desc() to sort a variable in descending order.
wt (optional) — A variable or function of variables to weight by.
sort (optional) — If TRUE, the result will be sorted by the count.
name (optional) — The name of the count column.
_drop (optional) — If False, keep grouping variables even if they are not used.Original API does not support this.
**kwargs — Name-value pairs that apply with mutate

Returns (Any)

A data frame with the same number of rows as the number of groups.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.

function

`datar.apis.dplyr.tally(_data`, `wt=None`, `sort=False`, `name=None)`

</>

Count the number of rows in each group

Original API: https://dplyr.tidyverse.org/reference/count.html

Parameters

_data — A data frame
wt (optional) — A variable or function of variables to weight by.
sort (optional) — If TRUE, the result will be sorted by the count.
name (optional) — The name of the count column.

Returns (Any)

A data frame with the same number of rows as the number of groups.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.

function

`datar.apis.dplyr.add_count(_data`, `*args`, `wt=None`, `sort=False`, `name='n'`, `**kwargs)`

</>

Add a count column to a data frame

Original API: https://dplyr.tidyverse.org/reference/count.html

Parameters

_data — A data frame
*args — Variables, or functions of variables.Use desc() to sort a variable in descending order.
wt (optional) — A variable or function of variables to weight by.
sort (optional) — If TRUE, the result will be sorted by the count.
name (optional) — The name of the count column.
**kwargs — Name-value pairs that apply with mutate

Returns (Any)

A data frame with the same number of rows as the number of groups.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.

function

`datar.apis.dplyr.add_tally(_data`, `wt=None`, `sort=False`, `name='n')`

</>

Add a count column to a data frame

Original API: https://dplyr.tidyverse.org/reference/count.html

Parameters

_data — A data frame
wt (optional) — A variable or function of variables to weight by.
sort (optional) — If TRUE, the result will be sorted by the count.
name (optional) — The name of the count column.

Returns (Any)

A data frame with the same number of rows as the number of groups.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.

function

`datar.apis.dplyr.desc(x)`

</>

Transform a vector into a format that will be sorted in descending order

This is useful within arrange().

The original API: https://dplyr.tidyverse.org/reference/desc.html

Parameters

x — vector to transform

Returns (Any)

The descending order of x

function

`datar.apis.dplyr.filter_(_data`, `*conditions`, `_preserve=False)`

</>

Filter a data frame based on conditions

The original API: https://dplyr.tidyverse.org/reference/filter.html

Parameters

_data — A data frame
*conditions — Conditions to filter by.
_preserve (bool, optional) — If True, keep grouping variables even if they are not used.

Returns (Any)

The subset dataframe

function

`datar.apis.dplyr.distinct(_data`, `*args`, `keep_all=False`, `_preserve=False)`

</>

Filter a data frame based on conditions

The original API: https://dplyr.tidyverse.org/reference/distinct.html

Parameters

_data — A data frame
*args — Variables to filter by.
keep_all (bool, optional) — If True, keep all rows that match.
_preserve (bool, optional) — If True, keep grouping variables even if they are not used.

Returns (Any)

The subset dataframe

function

`datar.apis.dplyr.n_distinct(_data`, `na_rm=True)`

</>

Count the number of distinct values

The original API: https://dplyr.tidyverse.org/reference/distinct.html

Parameters

_data — A data frame
na_rm (bool, optional) — If True, remove missing values before counting.

Returns (Any)

The number of distinct values

function

`datar.apis.dplyr.glimpse(_data`, `width=None`, `formatter=None)` → Any

</>

Display a summary of a data frame

The original API: https://dplyr.tidyverse.org/reference/glimpse.html

Parameters

_data — A data frame
width (int, optional) — Width of output, defaults to the width of the console.
formatter (optional) — A single-dispatch function to format a single element.

function

`datar.apis.dplyr.slice_(_data`, `*args`, `_preserve=False)`

</>

Extract rows by their position

The original API: https://dplyr.tidyverse.org/reference/slice.html

Parameters

_data — A data frame
*args — Positions to extract.
_preserve (bool, optional) — If True, keep grouping variables even if they are not used.

Returns (Any)

The subset dataframe

function

`datar.apis.dplyr.slice_head(_data`, `n=None`, `prop=None)`

</>

Extract the first rows

The original API: https://dplyr.tidyverse.org/reference/slice.html

Parameters

_data — A data frame
n (int, optional) — Number of rows to extract.
prop (float, optional) — Proportion of rows to extract.

Returns (Any)

The subset dataframe

function

`datar.apis.dplyr.slice_tail(_data`, `n=None`, `prop=None)`

</>

Extract the last rows

The original API: https://dplyr.tidyverse.org/reference/slice.html

Parameters

_data — A data frame
n (int, optional) — Number of rows to extract.
prop (float, optional) — Proportion of rows to extract.

Returns (Any)

The subset dataframe

function

`datar.apis.dplyr.slice_sample(_data`, `n=1`, `prop=None`, `weight_by=None`, `replace=False)`

</>

Extract rows by sampling

The original API: https://dplyr.tidyverse.org/reference/slice.html

Parameters

_data — A data frame
n (int, optional) — Number of rows to extract.
prop (float, optional) — Proportion of rows to extract.
weight_by (optional) — A variable or function of variables to weight by.
replace (bool, optional) — If True, sample with replacement.

Returns (Any)

The subset dataframe

function

`datar.apis.dplyr.slice_min(_data`, `order_by`, `n=1`, `prop=None`, `with_ties=None)`

</>

Extract rows with the minimum value

The original API: https://dplyr.tidyverse.org/reference/slice.html

Parameters

_data — A data frame
order_by — A variable or function of variables to order by.
n (int, optional) — Number of rows to extract.
prop (float, optional) — Proportion of rows to extract.
with_ties (bool | str, optional) — If True, extract all rows with the minimum value.If "first", extract the first row with the minimum value. If "last", extract the last row with the minimum value.

Returns (Any)

The subset dataframe

function

`datar.apis.dplyr.slice_max(_data`, `order_by`, `n=1`, `prop=None`, `with_ties=None)`

</>

Extract rows with the maximum value

The original API: https://dplyr.tidyverse.org/reference/slice.html

Parameters

_data — A data frame
order_by — A variable or function of variables to order by.
n (int, optional) — Number of rows to extract.
prop (float, optional) — Proportion of rows to extract.
with_ties (bool | str, optional) — If True, extract all rows with the maximum value.If "first", extract the first row with the maximum value. If "last", extract the last row with the maximum value.

Returns (Any)

The subset dataframe

function

`datar.apis.dplyr.between(x`, `left`, `right`, `inclusive='both')`

</>

Check if a value is between two other values

The original API: https://dplyr.tidyverse.org/reference/between.html

Parameters

x — A value
left — The left bound
right — The right bound
inclusive (str, optional) — Either both, neither, left or right.Include boundaries. Whether to set each bound as closed or open.

Returns (Any)

A bool value if x is scalar, otherwise an array of boolean valuesNote that it will be always False when NA appears in x, left or right.

function

`datar.apis.dplyr.cummean(x`, `na_rm=False)`

</>

Cumulative mean

The original API: https://dplyr.tidyverse.org/reference/cumall.html

Parameters

x — A numeric vector
na_rm (bool, optional) — If True, remove missing values before computing.

Returns (Any)

An array of cumulative means

function

`datar.apis.dplyr.cumall(x)`

</>

Get cumulative bool. All cases after first False

The original API: https://dplyr.tidyverse.org/reference/cumall.html

Parameters

x — A logical vector

Returns (Any)

An array of cumulative conjunctions

function

`datar.apis.dplyr.cumany(x)`

</>

Get cumulative bool. All cases after first True

The original API: https://dplyr.tidyverse.org/reference/cumany.html

Parameters

x — A logical vector

Returns (Any)

An array of cumulative disjunctions

function

`datar.apis.dplyr.coalesce(x`, `*replace)`

</>

Replace missing values with the first non-missing value

The original API: https://dplyr.tidyverse.org/reference/coalesce.html

Parameters

x — A vector
*replace — Values to replace missing values with.

Returns (Any)

An array of values

function

`datar.apis.dplyr.consecutive_id(x`, `*args)`

</>

Generate consecutive ids

The original API: https://dplyr.tidyverse.org/reference/consecutive_id.html

Parameters

x — A vector
*args — Other vectors

Returns (Sequence)

A sequence of consecutive ids

function

`datar.apis.dplyr.na_if(x`, `value)`

</>

Replace values with missing values

The original API: https://dplyr.tidyverse.org/reference/na_if.html

Parameters

x — A vector
value — Values to replace with missing values.

Returns (Any)

An array of values

function

`datar.apis.dplyr.near(x`, `y`, `tol=1e-08)`

</>

Check if values are approximately equal

The original API: https://dplyr.tidyverse.org/reference/near.html

Parameters

x — A numeric vector
y — A numeric vector
tol (float, optional) — Tolerance

Returns (Any)

An array of boolean values

function

`datar.apis.dplyr.nth(x`, `n`, `order_by=None`, `default=None)`

</>

Extract the nth element of a vector

The original API: https://dplyr.tidyverse.org/reference/nth.html

Parameters

x — A vector
n — The index of the element to extract.
order_by (optional) — A variable or function of variables to order by.
default (optional) — A default value to return if n is out of bounds.

Returns (Any)

A value

function

`datar.apis.dplyr.first(x`, `order_by=None`, `default=None)`

</>

Extract the first element of a vector

The original API: https://dplyr.tidyverse.org/reference/nth.html

Parameters

x — A vector
order_by (optional) — A variable or function of variables to order by.
default (optional) — A default value to return if x is empty.

Returns (Any)

A value

function

`datar.apis.dplyr.last(x`, `order_by=None`, `default=None)`

</>

Extract the last element of a vector

The original API: https://dplyr.tidyverse.org/reference/nth.html

Parameters

x — A vector
order_by (optional) — A variable or function of variables to order by.
default (optional) — A default value to return if x is empty.

Returns (Any)

A value

function

`datar.apis.dplyr.group_by(_data`, `*args`, `_add=False`, `_drop=None)`

</>

Create a grouped frame

The original API: https://dplyr.tidyverse.org/reference/group_by.html

Parameters

_data — A data frame
*args — A variable or function of variables to group by.
_add (bool, optional) — If True, add grouping variables to an existing group.
_drop (bool, optional) — If True, drop grouping variables from the output.

Returns (Any)

A grouped frame

function

`datar.apis.dplyr.ungroup(_data`, `*cols)`

</>

Remove grouping variables

The original API: https://dplyr.tidyverse.org/reference/ungroup.html

Parameters

_data — A grouped frame
*cols (str | int) — Columns to remove grouping variables from.

Returns (Any)

A data frame

function

`datar.apis.dplyr.rowwise(_data`, `*cols)`

</>

Create a rowwise frame

The original API: https://dplyr.tidyverse.org/reference/rowwise.html

Parameters

_data — A data frame
*cols (str | int) — Columns to make rowwise.

Returns (Any)

A rowwise frame

function

`datar.apis.dplyr.group_by_drop_default(_data)`

</>

Get the default value of _drop of a frame

The original API: https://dplyr.tidyverse.org/reference/group_by.html

Parameters

_data — A data frame

Returns (Any)

A bool value

function

`datar.apis.dplyr.group_vars(_data)`

</>

Get the grouping variables of a frame

The original API: https://dplyr.tidyverse.org/reference/group_vars.html

Parameters

_data — A grouped frame

Returns (Any)

A list of grouping variables

function

`datar.apis.dplyr.group_indices(_data)`

</>

Get the group indices of a frame

The original API: https://dplyr.tidyverse.org/reference/group_indices.html

Parameters

_data — A grouped frame

Returns (Any)

A list of group indices

function

`datar.apis.dplyr.group_keys(_data)`

</>

Get the group keys of a frame

The original API: https://dplyr.tidyverse.org/reference/group_keys.html

Parameters

_data — A grouped frame

Returns (Any)

A list of group keys

function

`datar.apis.dplyr.group_size(_data)`

</>

Get the group sizes of a frame

The original API: https://dplyr.tidyverse.org/reference/group_size.html

Parameters

_data — A grouped frame

Returns (Any)

A list of group sizes

function

`datar.apis.dplyr.group_rows(_data)`

</>

Get the group rows of a frame

The original API: https://dplyr.tidyverse.org/reference/group_rows.html

Parameters

_data — A grouped frame

Returns (Any)

A list of group rows

function

`datar.apis.dplyr.group_cols(_data)`

</>

Get the group columns of a frame

The original API: https://dplyr.tidyverse.org/reference/group_cols.html

Parameters

_data — A grouped frame

Returns (Any)

A list of group columns

function

`datar.apis.dplyr.group_data(_data)`

</>

Get the group data of a frame

The original API: https://dplyr.tidyverse.org/reference/group_data.html

Parameters

_data — A grouped frame

Returns (Any)

A list of group data

function

`datar.apis.dplyr.n_groups(_data)`

</>

Get the number of groups of a frame

The original API: https://dplyr.tidyverse.org/reference/n_groups.html

Parameters

_data — A grouped frame

Returns (int)

An int value

function

`datar.apis.dplyr.group_map(_data`, `_f`, `*args`, `_keep=False`, `**kwargs)`

</>

Apply a function to each group

The original API: https://dplyr.tidyverse.org/reference/group_map.html

Parameters

_data — A grouped frame
_f — A function to apply to each group.
*args — Additional arguments to pass to func.
_keep (bool, optional) — If True, keep the grouping variables in the output.
**kwargs — Additional keyword arguments to pass to func.

Returns (Any)

A list of results

function

`datar.apis.dplyr.group_modify(_data`, `_f`, `*args`, `_keep=False`, `**kwargs)`

</>

Apply a function to each group

The original API: https://dplyr.tidyverse.org/reference/group_modify.html

Parameters

_data — A grouped frame
_f — A function to apply to each group.
*args — Additional arguments to pass to func.
_keep (bool, optional) — If True, keep the grouping variables in the output.
**kwargs — Additional keyword arguments to pass to func.

Returns (Any)

A data frame

function

`datar.apis.dplyr.group_split(_data`, `*args`, `_keep=False`, `**kwargs)`

</>

Split a grouped frame into a list of data frames

The original API: https://dplyr.tidyverse.org/reference/group_split.html

Parameters

_data — A grouped frame
*args — Additional arguments to pass to func.
_keep (bool, optional) — If True, keep the grouping variables in the output.
**kwargs — Additional keyword arguments to pass to func.

Returns (Any)

A list of data frames

function

`datar.apis.dplyr.group_trim(_data`, `_drop=None)`

</>

Remove empty groups

The original API: https://dplyr.tidyverse.org/reference/group_trim.html

Parameters

_data — A grouped frame
_drop (optional) — See group_by.

Returns (Any)

A grouped frame

function

`datar.apis.dplyr.group_walk(_data`, `_f`, `*args`, `_keep=False`, `**kwargs)`

</>

Apply a function to each group

The original API: https://dplyr.tidyverse.org/reference/group_walk.html

Parameters

_data — A grouped frame
_f — A function to apply to each group.
*args — Additional arguments to pass to func.
**kwargs — Additional keyword arguments to pass to func.

Returns (Any)

A grouped frame

function

`datar.apis.dplyr.with_groups(_data`, `_groups`, `_func`, `*args`, `**kwargs)`

</>

Modify the grouping variables for a single operation.

Parameters

_data — A data frame
_groups — columns passed by group_byUse None to temporarily ungroup.
_func — Function to apply to regrouped data.

Returns (Any)

The new data frame with operations applied.

function

`datar.apis.dplyr.if_else(condition`, `true`, `false`, `missing=None)`

</>

Where condition is TRUE, the matching value from true, where it's FALSE,the matching value from false, otherwise missing.

Note that NAs will be False in condition if missing is not specified

Parameters

condition — the conditions
true — and
false — Values to use for TRUE and FALSE values of condition.They must be either the same length as condition, or length 1.
missing (optional) — If not None, will be used to replace missing values

Returns (Any)

A series with values replaced.

function

`datar.apis.dplyr.case_match(_x`, `*args`, `_default=None`, `_dtypes=None)` → T

</>

This function allows you to vectorise multiple switch() statements.Each case is evaluated sequentially and the first match for each element determines the corresponding value in the output vector. If no cases match, the _default is used.

The original API: https://dplyr.tidyverse.org/reference/case_match.html

Parameters

_x (T) — A vector
*args — A series of condition-value pairs
_default (optional) — The default value
_dtypes (optional) — The data types of the output

function

`datar.apis.dplyr.case_when(cond`, `value`, `*more_cases)`

</>

Vectorise multiple if_else() statements.

Parameters

cond — A boolean vector
value — A vector with values to replace
*more_cases — A list of tuples (cond, value)

Returns (Any)

A vector with values replaced.

function

`datar.apis.dplyr.inner_join(x`, `y`, `by=None`, `copy=False`, `suffix=('_x', '_y')`, `keep=False`, `na_matches='na'`, `multiple='all'`, `unmatched='drop'`, `relationship=None)`

</>

Inner join two data frames by matching rows.

The original API: https://dplyr.tidyverse.org/reference/join.html

Parameters

x — A data frame
y — A data frame
by (optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.
copy (bool, optional) — If True, always copy the data.
suffix (Sequence, optional) — A tuple of suffixes to apply to overlapping columns.
keep (bool, optional) — If True, keep the grouping variables in the output.
na_matches (str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.
multiple (str, optional) — How should multiple matches be handled?"all": All matches are returned. "first": The first match is returned. "last": The last match is returned. "any": Any of the matched rows in y
unmatched (str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.
relationship (str, optional) — The relationship between x and y.None: No expected relationship. "one_to_one": Each row in x matches at most one row in y. "one_to_many": Each row in x matches zero or more rows in y. "many_to_one": Each row in x matches at most one row in y. "many_to_many": Each row in x matches zero or more rows in y.

Returns (Any)

A data frame

function

`datar.apis.dplyr.left_join(x`, `y`, `by=None`, `copy=False`, `suffix=('_x', '_y')`, `keep=False`, `na_matches='na'`, `multiple='all'`, `unmatched='drop'`, `relationship=None)`

</>

Left join two data frames by matching rows.

The original API: https://dplyr.tidyverse.org/reference/join.html

Parameters

x — A data frame
y — A data frame
by (optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.
copy (bool, optional) — If True, always copy the data.
suffix (Sequence, optional) — A tuple of suffixes to apply to overlapping columns.
keep (bool, optional) — If True, keep the grouping variables in the output.
na_matches (str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.
multiple (str, optional) — How should multiple matches be handled?"all": All matches are returned. "first": The first match is returned. "last": The last match is returned. "any": Any of the matched rows in y
unmatched (str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.
relationship (str, optional) — The relationship between x and y.None: No expected relationship. "one_to_one": Each row in x matches at most one row in y. "one_to_many": Each row in x matches zero or more rows in y. "many_to_one": Each row in x matches at most one row in y. "many_to_many": Each row in x matches zero or more rows in y.

Returns (Any)

A data frame

function

`datar.apis.dplyr.right_join(x`, `y`, `by=None`, `copy=False`, `suffix=('_x', '_y')`, `keep=False`, `na_matches='na'`, `multiple='all'`, `unmatched='drop'`, `relationship=None)`

</>

Right join two data frames by matching rows.

The original API: https://dplyr.tidyverse.org/reference/join.html

Parameters

x — A data frame
y — A data frame
by (optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.
copy (bool, optional) — If True, always copy the data.
suffix (Sequence, optional) — A tuple of suffixes to apply to overlapping columns.
keep (bool, optional) — If True, keep the grouping variables in the output.
na_matches (str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.
multiple (str, optional) — How should multiple matches be handled?"all": All matches are returned. "first": The first match is returned. "last": The last match is returned. "any": Any of the matched rows in y
unmatched (str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.
relationship (str, optional) — The relationship between x and y.None: No expected relationship. "one_to_one": Each row in x matches at most one row in y. "one_to_many": Each row in x matches zero or more rows in y. "many_to_one": Each row in x matches at most one row in y. "many_to_many": Each row in x matches zero or more rows in y.

Returns (Any)

A data frame

function

`datar.apis.dplyr.full_join(x`, `y`, `by=None`, `copy=False`, `suffix=('_x', '_y')`, `keep=False`, `na_matches='na'`, `multiple='all'`, `unmatched='drop'`, `relationship=None)`

</>

Full join two data frames by matching rows.

The original API: https://dplyr.tidyverse.org/reference/join.html

Parameters

x — A data frame
y — A data frame
by (optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.
copy (bool, optional) — If True, always copy the data.
suffix (Sequence, optional) — A tuple of suffixes to apply to overlapping columns.
keep (bool, optional) — If True, keep the grouping variables in the output.
na_matches (str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.
multiple (str, optional) — How should multiple matches be handled?"all": All matches are returned. "first": The first match is returned. "last": The last match is returned. "any": Any of the matched rows in y
unmatched (str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.
relationship (str, optional) — The relationship between x and y.None: No expected relationship. "one_to_one": Each row in x matches at most one row in y. "one_to_many": Each row in x matches zero or more rows in y. "many_to_one": Each row in x matches at most one row in y. "many_to_many": Each row in x matches zero or more rows in y.

Returns (Any)

A data frame

function

`datar.apis.dplyr.semi_join(x`, `y`, `by=None`, `copy=False`, `na_matches='na')`

</>

Semi join two data frames by matching rows.

The original API: https://dplyr.tidyverse.org/reference/join.html

Parameters

x — A data frame
y — A data frame
by (optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.
copy (bool, optional) — If True, always copy the data.
na_matches (str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.

Returns (Any)

A data frame

function

`datar.apis.dplyr.anti_join(x`, `y`, `by=None`, `copy=False`, `na_matches='na')`

</>

Anti join two data frames by matching rows.

The original API: https://dplyr.tidyverse.org/reference/join.html

Parameters

x — A data frame
y — A data frame
by (optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.
copy (bool, optional) — If True, always copy the data.
na_matches (str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.

Returns (Any)

A data frame

function

`datar.apis.dplyr.nest_join(x`, `y`, `by=None`, `copy=False`, `keep=False`, `name=None`, `na_matches='na'`, `unmatched='drop')`

</>

Nest join two data frames by matching rows.

The original API: https://dplyr.tidyverse.org/reference/join.html

Parameters

x — A data frame
y — A data frame
by (optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.
copy (bool, optional) — If True, always copy the data.
keep (bool, optional) — If True, keep the grouping variables in the output.
name (optional) — The name of the column to store the nested data frame.
na_matches (str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.
unmatched (str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.

Returns (Any)

A data frame

function

`datar.apis.dplyr.cross_join(x`, `y`, `copy=False`, `suffix=('_x', '_y'))`

</>

Cross joins match each row in x to every row in y, resulting in adata frame with nrow(x) * nrow(y) rows.

The original API: https://dplyr.tidyverse.org/reference/cross_join.html

Parameters

x (T) — A data frame
y (T) — A data frame
copy (bool, optional) — If True, always copy the data.
suffix (Sequence, optional) — A tuple of suffixes to apply to overlapping columns.

Returns (T)

An object of the same type as x (including the same groups).

function

`datar.apis.dplyr.lead(x`, `n=1`, `default=None`, `order_by=None)`

</>

Shift a vector by n positions.

The original API: https://dplyr.tidyverse.org/reference/lead.html

Parameters

x — A vector
n (optional) — The number of positions to shift.
default (optional) — The default value to use for positions that don't exist.
order_by (optional) — A vector of column names to order by.

Returns (Any)

A vector

function

`datar.apis.dplyr.lag(x`, `n=1`, `default=None`, `order_by=None)`

</>

Shift a vector by n positions.

The original API: https://dplyr.tidyverse.org/reference/lag.html

Parameters

x — A vector
n (optional) — The number of positions to shift.
default (optional) — The default value to use for positions that don't exist.
order_by (optional) — A vector of column names to order by.

Returns (Any)

A vector

function

`datar.apis.dplyr.mutate(_data`, `*args`, `_keep='all'`, `_before=None`, `_after=None`, `**kwargs)`

</>

Add new columns to a data frame.

The original API: https://dplyr.tidyverse.org/reference/mutate.html

Parameters

_data — A data frame
*args — and
_keep (str, optional) —
allows you to control which columns from _data are retainedin the output:
- - "all", the default, retains all variables.
- - "used" keeps any variables used to make new variables;
  it's useful for checking your work as it displays inputs and
  outputs side-by-side.
- - "unused" keeps only existing variables not used to make new
  variables.
- - "none", only keeps grouping keys (like transmute()).
_before (optional) — A list of column names to put the new columns before.
_after (optional) — A list of column names to put the new columns after.
**kwargs —
Name-value pairs. The name gives the name of the columnin the output. The value can be:
- - A vector of length 1, which will be recycled to the correct
  length.
- - A vector the same length as the current group (or the whole
  data frame if ungrouped).
- - None to remove the column

Returns (Any)

An object of the same type as _data. The output has the followingproperties:

- Rows are not affected.
- Existing columns will be preserved according to the _keep
argument. New columns will be placed according to the
_before and _after arguments. If _keep = "none"
(as in transmute()), the output order is determined only
by ..., not the order of existing columns.
- Columns given value None will be removed
- Groups will be recomputed if a grouping variable is mutated.
- Data frame attributes are preserved.

function

`datar.apis.dplyr.transmute(_data`, `*args`, `_before=None`, `_after=None`, `**kwargs)`

</>

Add new columns to a data frame and remove existing columnsusing mutate with _keep="none".

The original API: https://dplyr.tidyverse.org/reference/mutate.html

Parameters

_data — A data frame
*args — and
_before (optional) — A list of column names to put the new columns before.
_after (optional) — A list of column names to put the new columns after.
**kwargs —
Name-value pairs. The name gives the name of the columnin the output. The value can be:
- - A vector of length 1, which will be recycled to the correct
  length.
- - A vector the same length as the current group (or the whole
  data frame if ungrouped).
- - None to remove the column

Returns (Any)

An object of the same type as _data. The output has the followingproperties:

- Rows are not affected.
- Existing columns will be preserved according to the _keep
argument. New columns will be placed according to the
_before and _after arguments. If _keep = "none"
(as in transmute()), the output order is determined only
by ..., not the order of existing columns.
- Columns given value None will be removed
- Groups will be recomputed if a grouping variable is mutated.
- Data frame attributes are preserved.

function

`datar.apis.dplyr.order_by(order`, `call)`

</>

Order the data by the given order

Note

This function should be called as an argument of a verb. If you want to call it regularly, try with_order()

Examples

>>> df = tibble(x=c[1:6])>>> df >> mutate(y=order_by(c[5:], cumsum(f.x)))
>>> # df.y:
>>> # 15, 14, 12, 9, 5

Parameters

order — An iterable to control the data order
data — The data to be ordered

Returns (Any)

A Function expression for verb to evaluate.

function

`datar.apis.dplyr.with_order(order`, `func`, `x`, `*args`, `**kwargs)`

</>

Control argument and result of a window function

Examples

>>> with_order([5,4,3,2,1], cumsum, [1,2,3,4,5])>>> # 15, 14, 12, 9, 5

Parameters

order — An iterable to order the arugment and result
func — The window function
x — The first arugment for the function
*args — and
**kwargs — Other arugments for the function

Returns (Any)

The ordered result or an expression if there is expression in arguments

function

`datar.apis.dplyr.pull(_data`, `var=-1`, `name=None`, `to=None)`

</>

Pull a series or a dataframe from a dataframe

Parameters

_data — The dataframe
var (str | int, optional) — The column to pull, either the name or the index
name (optional) —
The name of the pulled value
- - If to is frame, or the value pulled is data frame, it will be
  the column names
- - If to is series, it will be the series name. If multiple names
  are given, only the first name will be used.
- - If to is series, but value pulled is a data frame, then a
  dictionary of series with the series names as keys or given name
  as keys.
to (optional) —
Type of data to return.Only works when pulling a for name a$b
- - series: Return a pandas Series object
  Group information will be lost
  If pulled value is a dataframe, it will return a dict of series,
  with the series names or the name provided.
- - array: Return a numpy.ndarray object
- - frame: Return a DataFrame with that column
- - list: Return a python list
- - dict: Return a dict with name as keys and pulled value as values
  Only a single column is allowed to pull
- - If not provided: series when pulled data has only one columns.
  dict if name provided and has the same length as the pulled
  single column. Otherwise frame.

Returns (Any)

The data according to to

function

`datar.apis.dplyr.row_number(html())`

</>

Get the row number of x

Note that this function doesn't support piping.

Parameters

x — The data to get row numberDefaults to Symbolic() so the whole data is used by default when called row_number()

Returns (Any)

The row number

function

`datar.apis.dplyr.ntile(html()`, `n=None)`

</>

a rough rank, which breaks the input vector into n buckets.The size of the buckets may differ by up to one, larger buckets have lower rank.

Note that this function doesn't support piping.

Parameters

x — The data to get rownumberDefaults to Symbolic() so the whole data is used by default when called ntile(n=...)
n (int, optional) — The number of groups to divide the data into

Returns (Any)

The row number

function

`datar.apis.dplyr.min_rank(html()`, `na_last='keep')`

</>

Get the min rank of x

Note that this function doesn't support piping.

Parameters

x — The data to get row numberDefaults to Symbolic() so the whole data is used by default when called min_rank()
na_last (str, optional) —
How NA values are ranked
- - "keep": NA values are ranked at the end
- - "top": NA values are ranked at the top
- - "bottom": NA values are ranked at the bottom

Returns (Any)

The row number

function

`datar.apis.dplyr.dense_rank(html()`, `na_last='keep')`

</>

Get the dense rank of x

Note that this function doesn't support piping.

Parameters

x — The data to get row numberDefaults to Symbolic() so the whole data is used by default when called dense_rank()
na_last (str, optional) —
How NA values are ranked
- - "keep": NA values are ranked at the end
- - "top": NA values are ranked at the top
- - "bottom": NA values are ranked at the bottom

Returns (Any)

The row number

function

`datar.apis.dplyr.percent_rank(html()`, `na_last='keep')`

</>

Get the percent rank of x

Note that this function doesn't support piping.

Parameters

x — The data to get row numberDefaults to Symbolic() so the whole data is used by default when called percent_rank()
na_last (str, optional) —
How NA values are ranked
- - "keep": NA values are ranked at the end
- - "top": NA values are ranked at the top
- - "bottom": NA values are ranked at the bottom

Returns (Any)

The row number

function

`datar.apis.dplyr.cume_dist(html()`, `na_last='keep')`

</>

Get the cume_dist of x

Note that this function doesn't support piping.

Parameters

x — The data to get row numberDefaults to Symbolic() so the whole data is used by default when called cume_dist()
na_last (str, optional) —
How NA values are ranked
- - "keep": NA values are ranked at the end
- - "top": NA values are ranked at the top
- - "bottom": NA values are ranked at the bottom

Returns (Any)

The row number

function

`datar.apis.dplyr.recode(_x`, `*args`, `_default=None`, `_missing=None`, `**kwargs)`

</>

Recode a vector, replacing elements in it

Parameters

*args — and
_default (optional) — If supplied, all values not otherwise matched will begiven this value. If not supplied and if the replacements are the same type as the original values in series, unmatched values are not changed. If not supplied and if the replacements are not compatible, unmatched values are replaced with np.nan.
_missing (optional) — If supplied, any missing values in .x will be replacedby this value.
**kwargs — replacements
x — A vector to modify

Returns (Any)

The vector with values replaced

function

`datar.apis.dplyr.recode_factor(_x`, `*args`, `_default=None`, `_missing=None`, `_ordered=False`, `**kwargs)`

</>

Recode a factor, replacing levels in it

Parameters

*args — and
_default (optional) — If supplied, all values not otherwise matched will begiven this value. If not supplied and if the replacements are the same type as the original values in series, unmatched values are not changed. If not supplied and if the replacements are not compatible, unmatched values are replaced with np.nan.
_missing (optional) — If supplied, any missing values in .x will be replacedby this value.
_ordered (bool, optional) — If True, the factor will be ordered
**kwargs — replacements
x — A factor to modify

Returns (Any)

The factor with levels replaced

function

`datar.apis.dplyr.relocate(_data`, `*args`, `_before=None`, `_after=None`, `**kwargs)`

</>

change column positions

See original API https://dplyr.tidyverse.org/reference/relocate.html

Parameters

_data — A data frame
*args — and
_before (int | str, optional) — and
_after (int | str, optional) — Destination. Supplying neither will move columns tothe left-hand side; specifying both is an error.
**kwargs — Columns to rename and move

Returns (Any)

An object of the same type as .data. The output has the followingproperties:

- Rows are not affected.
- The same columns appear in the output, but (usually) in a
different place.
- Data frame attributes are preserved.
- Groups are not affected

function

`datar.apis.dplyr.rename(_data`, `**kwargs)`

</>

Rename columns

See original API https://dplyr.tidyverse.org/reference/rename.html

Parameters

_data — A data frame
**kwargs — Columns to rename

Returns (Any)

The dataframe with new names

function

`datar.apis.dplyr.rename_with(_data`, `_fn`, `*args`, `**kwargs)`

</>

Rename columns with a function

See original API https://dplyr.tidyverse.org/reference/rename.html

Parameters

_data — A data frame
_fn — A function to apply to column names
*args — the columns to rename and non-keyword arguments for the _fn.If *args is not provided, then assuming all columns, and no non-keyword arguments are allowed to pass to the function, use keyword arguments instead.
**kwargs — keyword arguments for _fn

Returns (Any)

The dataframe with new names

function

`datar.apis.dplyr.rows_insert(x`, `y`, `by=None`, `conflict='error'`, `**kwargs)`

</>

Insert rows from y into x

See original API https://dplyr.tidyverse.org/reference/rows.html

Parameters

x — A data frame
y — A data frame
by (optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.
conflict (str, optional) —
How to handle conflicts
- - "error": Throw an error
- - "ignore": Ignore conflicts
**kwargs — Additional arguments to pass to the backend, such ascopy and in_place. Depends on the backend implementation.

Returns (Any)

A data frame with all existing rows and potentially new rows

function

`datar.apis.dplyr.rows_update(x`, `y`, `by=None`, `unmatched='error'`, `**kwargs)`

</>

Update rows in x with values from y

See original API https://dplyr.tidyverse.org/reference/rows.html

Parameters

x — A data frame
y — A data frame
by (optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.
unmatched (str, optional) — how should keys in y that are unmatched by the keysin x be handled? One of - "error", the default, will error if there are any keys in y that are unmatched by the keys in x. "ignore" will ignore rows in y with keys that are unmatched by the keys in x.
**kwargs — Additional arguments to pass to the backend, such ascopy and in_place. Depends on the backend implementation.

Returns (Any)

A data frame with all existing rows and potentially new rows

function

`datar.apis.dplyr.rows_patch(x`, `y`, `by=None`, `unmatched='error'`, `**kwargs)`

</>

Patch rows in x with values from y

See original API https://dplyr.tidyverse.org/reference/rows.html

Parameters

x — A data frame
y — A data frame
by (optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.
unmatched (str, optional) — how should keys in y that are unmatched by the keysin x be handled? One of - "error", the default, will error if there are any keys in y that are unmatched by the keys in x. "ignore" will ignore rows in y with keys that are unmatched by the keys in x.
**kwargs — Additional arguments to pass to the backend, such ascopy and in_place. Depends on the backend implementation.

Returns (Any)

A data frame with NA values overwritten and the number of rows preserved

function

`datar.apis.dplyr.rows_upsert(x`, `y`, `by=None`, `**kwargs)`

</>

Upsert rows in x with values from y

See original API https://dplyr.tidyverse.org/reference/rows.html

Parameters

x — A data frame
y — A data frame
by (optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.
**kwargs — Additional arguments to pass to the backend, such ascopy and in_place. Depends on the backend implementation.

Returns (Any)

A data frame with inserted or updated depending on whether or notthe key value in y already exists in x. Key values in y must be unique.

function

`datar.apis.dplyr.rows_delete(x`, `y`, `by=None`, `unmatched='error'`, `**kwargs)`

</>

Delete rows in x that match keys in y

See original API https://dplyr.tidyverse.org/reference/rows.html

Parameters

x — A data frame
y — A data frame
by (optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.
unmatched (str, optional) — how should keys in y that are unmatched by the keysin x be handled? One of - "error", the default, will error if there are any keys in y that are unmatched by the keys in x. "ignore" will ignore rows in y with keys that are unmatched by the keys in x.
**kwargs — Additional arguments to pass to the backend, such ascopy and in_place. Depends on the backend implementation.

Returns (Any)

A data frame with rows deleted

function

`datar.apis.dplyr.rows_append(x`, `y`, `**kwargs)`

</>

Append rows in y to x

See original API https://dplyr.tidyverse.org/reference/rows.html

Parameters

x — A data frame
y — A data frame
**kwargs — Additional arguments to pass to the backend, such ascopy and in_place. Depends on the backend implementation.

Returns (Any)

A data frame with rows appended

function

`datar.apis.dplyr.select(_data`, `*args`, `**kwargs)`

</>

Select columns from a data frame.

See original API https://dplyr.tidyverse.org/reference/select.html

Parameters

_data — A data frame
*args — A list of columns to select
**kwargs — A list of columns to select

Returns (Any)

A data frame with only the selected columns

function

`datar.apis.dplyr.union_all(x`, `y)`

</>

Combine two data frames together.

See original API https://dplyr.tidyverse.org/reference/setops.html

Parameters

x — A data frame
y — A data frame

Returns (Any)

A data frame with rows from x and y

function

`datar.apis.dplyr.summarise(_data`, `*args`, `_groups=None`, `**kwargs)`

</>

Summarise a data frame.

See original API https://dplyr.tidyverse.org/reference/summarise.html

Parameters

_data — A data frame
*args — and
_groups (str, optional) —
Grouping structure of the result.
- - "drop_last": dropping the last level of grouping.
- - "drop": All levels of grouping are dropped.
- - "keep": Same grouping structure as _data.
- - "rowwise": Each row is its own group.
**kwargs — Name-value pairs, where value is the summarizeddata for each group

Returns (Any)

A data frame with the summarised columns

function

`datar.apis.dplyr.where(_data`, `fn)`

</>

Selects the variables for which a function returns True.

See original API https://dplyr.tidyverse.org/reference/filter.html

Parameters

_data — A data frame
fn (Callable) — A function that returns True or False.Currently it has to be `register_func/func_factory registered function purrr-like formula not supported yet.

Returns (Any)

The matched columns

function

`datar.apis.dplyr.everything(_data)`

</>

Select all variables.

See original API https://dplyr.tidyverse.org/reference/select.html

Parameters

_data — A data frame

Returns (Any)

All columns

function

`datar.apis.dplyr.last_col(_data`, `offset=0`, `vars=None)`

</>

Select the last column.

See original API https://dplyr.tidyverse.org/reference/select.html

Parameters

_data — A data frame
offset (int, optional) — The offset of the last column
vars (optional) — A list of columns to select

Returns (Any)

The last column

function

`datar.apis.dplyr.starts_with(_data`, `match`, `ignore_case=True`, `vars=None)`

</>

Select columns that start with a string.

See original API https://dplyr.tidyverse.org/reference/select.html

Parameters

_data — A data frame
match — The string to match
ignore_case (bool, optional) — Ignore case when matching
vars (optional) — A list of columns to select

Returns (Any)

The matched columns

function

`datar.apis.dplyr.ends_with(_data`, `match`, `ignore_case=True`, `vars=None)`

</>

Select columns that end with a string.

See original API https://dplyr.tidyverse.org/reference/select.html

Parameters

_data — A data frame
match — The string to match
ignore_case (bool, optional) — Ignore case when matching
vars (optional) — A list of columns to select

Returns (Any)

The matched columns

function

`datar.apis.dplyr.contains(_data`, `match`, `ignore_case=True`, `vars=None)`

</>

Select columns that contain a string.

See original API https://dplyr.tidyverse.org/reference/select.html

Parameters

_data — A data frame
match — The string to match
ignore_case (bool, optional) — Ignore case when matching
vars (optional) — A list of columns to select

Returns (Any)

The matched columns

function

`datar.apis.dplyr.matches(_data`, `match`, `ignore_case=True`, `vars=None)`

</>

Select columns that match a regular expression.

See original API https://dplyr.tidyverse.org/reference/select.html

Parameters

_data — A data frame
match — The regular expression to match
ignore_case (bool, optional) — Ignore case when matching
vars (optional) — A list of columns to select

Returns (Any)

The matched columns

function

`datar.apis.dplyr.num_range(prefix`, `range_`, `width=None)`

</>

Matches a numerical range like x01, x02, x03.

Parameters

prefix (str) — A prefix that starts the numeric range.
range_ — A sequence of integers, like range(3) (produces 0,1,2).
width (int, optional) — Optionally, the "width" of the numeric range.For example, a range of 2 gives "01", a range of three "001", etc.
_data — The data piped in

Returns (Any)

A list of ranges with prefix.

function

`datar.apis.dplyr.all_of(_data`, `x)`

</>

For strict selection.

If any of the variables in the character vector is missing, an error is thrown.

Parameters

_data — The data piped in
x — A set of variables to match the columns

Returns (Any)

The matched column names

Raises

ColumnNotExistingError — When any of the elements in x does not existin _data columns

function

`datar.apis.dplyr.any_of(_data`, `x`, `vars=None)`

</>

For strict selection.

If any of the variables in the character vector is missing, an error is thrown.

Parameters

_data — The data piped in
x — A set of variables to match the columns
vars (optional) — A list of columns to select

Returns (Any)

The matched column names

Raises

ColumnNotExistingError — When any of the elements in x does not existin _data columns

datar.apis.dplyr

datar.apis.dplyr

datar.apis.dplyr.pick(_data, *args)

datar.apis.dplyr.across(_data, *args, _names=None, **kwargs)

datar.apis.dplyr.c_across(_data, _cols=None)

datar.apis.dplyr.if_any(_data, *args, _names=None, **kwargs) → Any

datar.apis.dplyr.if_all(_data, *args, _names=None, **kwargs) → Any

datar.apis.dplyr.symdiff(x, y)

datar.apis.dplyr.arrange(_data, *args, _by_group=False, **kwargs)

datar.apis.dplyr.bind_rows(*data, _id=None, _copy=True, **kwargs)

datar.apis.dplyr.bind_cols(*data, _name_repair='unique', _copy=True)

datar.apis.dplyr.cur_column(_data, _name)

datar.apis.dplyr.cur_data(_data)

datar.apis.dplyr.n(_data)

datar.apis.dplyr.cur_data_all(_data)

datar.apis.dplyr.cur_group(_data)

datar.apis.dplyr.cur_group_id(_data)

datar.apis.dplyr.cur_group_rows(_data)

datar.apis.dplyr.count(_data, *args, wt=None, sort=False, name=None, _drop=None, **kwargs)

datar.apis.dplyr.tally(_data, wt=None, sort=False, name=None)

datar.apis.dplyr.add_count(_data, *args, wt=None, sort=False, name='n', **kwargs)

datar.apis.dplyr.add_tally(_data, wt=None, sort=False, name='n')

datar.apis.dplyr.desc(x)

datar.apis.dplyr.filter_(_data, *conditions, _preserve=False)

datar.apis.dplyr.distinct(_data, *args, keep_all=False, _preserve=False)

datar.apis.dplyr.n_distinct(_data, na_rm=True)

datar.apis.dplyr.glimpse(_data, width=None, formatter=None) → Any

datar.apis.dplyr.slice_(_data, *args, _preserve=False)

datar.apis.dplyr.slice_head(_data, n=None, prop=None)

datar.apis.dplyr.slice_tail(_data, n=None, prop=None)

datar.apis.dplyr.slice_sample(_data, n=1, prop=None, weight_by=None, replace=False)

datar.apis.dplyr.slice_min(_data, order_by, n=1, prop=None, with_ties=None)

datar.apis.dplyr.slice_max(_data, order_by, n=1, prop=None, with_ties=None)

datar.apis.dplyr.between(x, left, right, inclusive='both')

datar.apis.dplyr.cummean(x, na_rm=False)

datar.apis.dplyr.cumall(x)

datar.apis.dplyr.cumany(x)

datar.apis.dplyr.coalesce(x, *replace)

datar.apis.dplyr.consecutive_id(x, *args)

datar.apis.dplyr.na_if(x, value)

datar.apis.dplyr.near(x, y, tol=1e-08)

datar.apis.dplyr.nth(x, n, order_by=None, default=None)

datar.apis.dplyr.first(x, order_by=None, default=None)

datar.apis.dplyr.last(x, order_by=None, default=None)

datar.apis.dplyr.group_by(_data, *args, _add=False, _drop=None)

datar.apis.dplyr.ungroup(_data, *cols)

datar.apis.dplyr.rowwise(_data, *cols)

datar.apis.dplyr.group_by_drop_default(_data)

datar.apis.dplyr.group_vars(_data)

datar.apis.dplyr.group_indices(_data)

datar.apis.dplyr.group_keys(_data)

datar.apis.dplyr.group_size(_data)

datar.apis.dplyr.group_rows(_data)

datar.apis.dplyr.group_cols(_data)

datar.apis.dplyr.group_data(_data)

datar.apis.dplyr.n_groups(_data)

datar.apis.dplyr.group_map(_data, _f, *args, _keep=False, **kwargs)

datar.apis.dplyr.group_modify(_data, _f, *args, _keep=False, **kwargs)

datar.apis.dplyr.group_split(_data, *args, _keep=False, **kwargs)

datar.apis.dplyr.group_trim(_data, _drop=None)

datar.apis.dplyr.group_walk(_data, _f, *args, _keep=False, **kwargs)

datar.apis.dplyr.with_groups(_data, _groups, _func, *args, **kwargs)

datar.apis.dplyr.if_else(condition, true, false, missing=None)

datar.apis.dplyr.case_match(_x, *args, _default=None, _dtypes=None) → T

datar.apis.dplyr.case_when(cond, value, *more_cases)

datar.apis.dplyr.inner_join(x, y, by=None, copy=False, suffix=('_x', '_y'), keep=False, na_matches='na', multiple='all', unmatched='drop', relationship=None)

datar.apis.dplyr.left_join(x, y, by=None, copy=False, suffix=('_x', '_y'), keep=False, na_matches='na', multiple='all', unmatched='drop', relationship=None)

datar.apis.dplyr.right_join(x, y, by=None, copy=False, suffix=('_x', '_y'), keep=False, na_matches='na', multiple='all', unmatched='drop', relationship=None)

datar.apis.dplyr.full_join(x, y, by=None, copy=False, suffix=('_x', '_y'), keep=False, na_matches='na', multiple='all', unmatched='drop', relationship=None)

datar.apis.dplyr.semi_join(x, y, by=None, copy=False, na_matches='na')

datar.apis.dplyr.anti_join(x, y, by=None, copy=False, na_matches='na')

datar.apis.dplyr.nest_join(x, y, by=None, copy=False, keep=False, name=None, na_matches='na', unmatched='drop')

datar.apis.dplyr.cross_join(x, y, copy=False, suffix=('_x', '_y'))

datar.apis.dplyr.lead(x, n=1, default=None, order_by=None)

datar.apis.dplyr.lag(x, n=1, default=None, order_by=None)

datar.apis.dplyr.mutate(_data, *args, _keep='all', _before=None, _after=None, **kwargs)

datar.apis.dplyr.transmute(_data, *args, _before=None, _after=None, **kwargs)

datar.apis.dplyr.order_by(order, call)

datar.apis.dplyr.with_order(order, func, x, *args, **kwargs)

datar.apis.dplyr.pull(_data, var=-1, name=None, to=None)

`datar.apis.dplyr.pick(_data`, `*args)`

`datar.apis.dplyr.across(_data`, `*args`, `_names=None`, `**kwargs)`

`datar.apis.dplyr.c_across(_data`, `_cols=None)`

`datar.apis.dplyr.if_any(_data`, `*args`, `_names=None`, `**kwargs)` → Any

`datar.apis.dplyr.if_all(_data`, `*args`, `_names=None`, `**kwargs)` → Any

`datar.apis.dplyr.symdiff(x`, `y)`

`datar.apis.dplyr.arrange(_data`, `*args`, `_by_group=False`, `**kwargs)`

`datar.apis.dplyr.bind_rows(*data`, `_id=None`, `_copy=True`, `**kwargs)`

`datar.apis.dplyr.bind_cols(*data`, `_name_repair='unique'`, `_copy=True)`

`datar.apis.dplyr.cur_column(_data`, `_name)`

`datar.apis.dplyr.cur_data(_data)`

`datar.apis.dplyr.n(_data)`

`datar.apis.dplyr.cur_data_all(_data)`

`datar.apis.dplyr.cur_group(_data)`

`datar.apis.dplyr.cur_group_id(_data)`

`datar.apis.dplyr.cur_group_rows(_data)`

`datar.apis.dplyr.count(_data`, `*args`, `wt=None`, `sort=False`, `name=None`, `_drop=None`, `**kwargs)`

`datar.apis.dplyr.tally(_data`, `wt=None`, `sort=False`, `name=None)`

`datar.apis.dplyr.add_count(_data`, `*args`, `wt=None`, `sort=False`, `name='n'`, `**kwargs)`

`datar.apis.dplyr.add_tally(_data`, `wt=None`, `sort=False`, `name='n')`

`datar.apis.dplyr.desc(x)`

`datar.apis.dplyr.filter_(_data`, `*conditions`, `_preserve=False)`

`datar.apis.dplyr.distinct(_data`, `*args`, `keep_all=False`, `_preserve=False)`

`datar.apis.dplyr.n_distinct(_data`, `na_rm=True)`

`datar.apis.dplyr.glimpse(_data`, `width=None`, `formatter=None)` → Any

`datar.apis.dplyr.slice_(_data`, `*args`, `_preserve=False)`

`datar.apis.dplyr.slice_head(_data`, `n=None`, `prop=None)`

`datar.apis.dplyr.slice_tail(_data`, `n=None`, `prop=None)`

`datar.apis.dplyr.slice_sample(_data`, `n=1`, `prop=None`, `weight_by=None`, `replace=False)`

`datar.apis.dplyr.slice_min(_data`, `order_by`, `n=1`, `prop=None`, `with_ties=None)`

`datar.apis.dplyr.slice_max(_data`, `order_by`, `n=1`, `prop=None`, `with_ties=None)`

`datar.apis.dplyr.between(x`, `left`, `right`, `inclusive='both')`

`datar.apis.dplyr.cummean(x`, `na_rm=False)`

`datar.apis.dplyr.cumall(x)`

`datar.apis.dplyr.cumany(x)`

`datar.apis.dplyr.coalesce(x`, `*replace)`

`datar.apis.dplyr.consecutive_id(x`, `*args)`

`datar.apis.dplyr.na_if(x`, `value)`

`datar.apis.dplyr.near(x`, `y`, `tol=1e-08)`

`datar.apis.dplyr.nth(x`, `n`, `order_by=None`, `default=None)`

`datar.apis.dplyr.first(x`, `order_by=None`, `default=None)`

`datar.apis.dplyr.last(x`, `order_by=None`, `default=None)`

`datar.apis.dplyr.group_by(_data`, `*args`, `_add=False`, `_drop=None)`

`datar.apis.dplyr.ungroup(_data`, `*cols)`

`datar.apis.dplyr.rowwise(_data`, `*cols)`

`datar.apis.dplyr.group_by_drop_default(_data)`

`datar.apis.dplyr.group_vars(_data)`

`datar.apis.dplyr.group_indices(_data)`

`datar.apis.dplyr.group_keys(_data)`

`datar.apis.dplyr.group_size(_data)`

`datar.apis.dplyr.group_rows(_data)`

`datar.apis.dplyr.group_cols(_data)`

`datar.apis.dplyr.group_data(_data)`

`datar.apis.dplyr.n_groups(_data)`

`datar.apis.dplyr.group_map(_data`, `_f`, `*args`, `_keep=False`, `**kwargs)`

`datar.apis.dplyr.group_modify(_data`, `_f`, `*args`, `_keep=False`, `**kwargs)`

`datar.apis.dplyr.group_split(_data`, `*args`, `_keep=False`, `**kwargs)`

`datar.apis.dplyr.group_trim(_data`, `_drop=None)`

`datar.apis.dplyr.group_walk(_data`, `_f`, `*args`, `_keep=False`, `**kwargs)`

`datar.apis.dplyr.with_groups(_data`, `_groups`, `_func`, `*args`, `**kwargs)`

`datar.apis.dplyr.if_else(condition`, `true`, `false`, `missing=None)`

`datar.apis.dplyr.case_match(_x`, `*args`, `_default=None`, `_dtypes=None)` → T

`datar.apis.dplyr.case_when(cond`, `value`, `*more_cases)`

`datar.apis.dplyr.inner_join(x`, `y`, `by=None`, `copy=False`, `suffix=('_x', '_y')`, `keep=False`, `na_matches='na'`, `multiple='all'`, `unmatched='drop'`, `relationship=None)`

`datar.apis.dplyr.left_join(x`, `y`, `by=None`, `copy=False`, `suffix=('_x', '_y')`, `keep=False`, `na_matches='na'`, `multiple='all'`, `unmatched='drop'`, `relationship=None)`

`datar.apis.dplyr.right_join(x`, `y`, `by=None`, `copy=False`, `suffix=('_x', '_y')`, `keep=False`, `na_matches='na'`, `multiple='all'`, `unmatched='drop'`, `relationship=None)`

`datar.apis.dplyr.full_join(x`, `y`, `by=None`, `copy=False`, `suffix=('_x', '_y')`, `keep=False`, `na_matches='na'`, `multiple='all'`, `unmatched='drop'`, `relationship=None)`

`datar.apis.dplyr.semi_join(x`, `y`, `by=None`, `copy=False`, `na_matches='na')`

`datar.apis.dplyr.anti_join(x`, `y`, `by=None`, `copy=False`, `na_matches='na')`

`datar.apis.dplyr.nest_join(x`, `y`, `by=None`, `copy=False`, `keep=False`, `name=None`, `na_matches='na'`, `unmatched='drop')`

`datar.apis.dplyr.cross_join(x`, `y`, `copy=False`, `suffix=('_x', '_y'))`

`datar.apis.dplyr.lead(x`, `n=1`, `default=None`, `order_by=None)`

`datar.apis.dplyr.lag(x`, `n=1`, `default=None`, `order_by=None)`

`datar.apis.dplyr.mutate(_data`, `*args`, `_keep='all'`, `_before=None`, `_after=None`, `**kwargs)`

`datar.apis.dplyr.transmute(_data`, `*args`, `_before=None`, `_after=None`, `**kwargs)`

`datar.apis.dplyr.order_by(order`, `call)`

`datar.apis.dplyr.with_order(order`, `func`, `x`, `*args`, `**kwargs)`

`datar.apis.dplyr.pull(_data`, `var=-1`, `name=None`, `to=None)`

`datar.apis.dplyr.row_number(html())`

`datar.apis.dplyr.ntile(html()`, `n=None)`