datar.apis.dplyr
datar.apis.dplyr
across
(
_data
,*args
,_names
,**kwargs
)
(T) — Apply the same transformation to multiple columns</>add_count
(
_data
,*args
,wt
,sort
,name
,**kwargs
)
(Any) — Add a count column to a data frame</>add_tally
(
_data
,wt
,sort
,name
)
(Any) — Add a count column to a data frame</>all_of
(
_data
,x
)
(Any) — For strict selection.</>anti_join
(
x
,y
,by
,copy
,na_matches
)
(Any) — Anti join two data frames by matching rows.</>any_of
(
_data
,x
,vars
)
(Any) — For strict selection.</>arrange
(
_data
,*args
,_by_group
,**kwargs
)
(Any) — orders the rows of a data frame by the values of selected columns.</>between
(
x
,left
,right
,inclusive
)
(Any) — Check if a value is between two other values</>bind_cols
(
*data
,_name_repair
,_copy
)
(Any) — Bind columns of give dataframes</>bind_rows
(
*data
,_id
,_copy
,**kwargs
)
(Any) — Bind rows of give dataframes</>c_across
(
_data
,_cols
)
(T) — Apply the same transformation to multiple columns rowwisely</>case_match
(
_x
,*args
,_default
,_dtypes
)
(T) — This function allows you to vectorise multipleswitch()
statements.Each case is evaluated sequentially and the first match for each element determines the corresponding value in the output vector. If no cases match, the_default
is used. </>case_when
(
cond
,value
,*more_cases
)
(Any) — Vectorise multipleif_else()
statements.</>coalesce
(
x
,*replace
)
(Any) — Replace missing values with the first non-missing value</>consecutive_id
(
x
,*args
)
(Sequence) — Generate consecutive ids</>contains
(
_data
,match
,ignore_case
,vars
)
(Any) — Select columns that contain a string.</>count
(
_data
,*args
,wt
,sort
,name
,_drop
,**kwargs
)
(Any) — Count the number of rows in each group</>cross_join
(
x
,y
,copy
,suffix
)
(T) — Cross joins match each row in x to every row in y, resulting in adata frame with nrow(x) * nrow(y) rows. </>cumall
(
x
)
(Any) — Get cumulative bool. All cases after first False</>cumany
(
x
)
(Any) — Get cumulative bool. All cases after first True</>cume_dist
(
x
,na_last
)
(Any) — Get the cume_dist of x</>cummean
(
x
,na_rm
)
(Any) — Cumulative mean</>cur_column
(
_data
,_name
)
(Any) — Get the current column</>cur_data
(
_data
)
(Any) — Get the current dataframe</>cur_data_all
(
_data
)
(Any) — Get the current data for the current group includingthe grouping variables </>cur_group
(
_data
)
(Any) — Get the current group</>cur_group_id
(
_data
)
(Any) — Get the current group id</>cur_group_rows
(
_data
)
(Any) — Get the current group row indices</>dense_rank
(
x
,na_last
)
(Any) — Get the dense rank of x</>desc
(
x
)
(Any) — Transform a vector into a format that will be sorted in descending order</>distinct
(
_data
,*args
,keep_all
,_preserve
)
(Any) — Filter a data frame based on conditions</>ends_with
(
_data
,match
,ignore_case
,vars
)
(Any) — Select columns that end with a string.</>everything
(
_data
)
(Any) — Select all variables.</>filter_
(
_data
,*conditions
,_preserve
)
(Any) — Filter a data frame based on conditions</>first
(
x
,order_by
,default
)
(Any) — Extract the first element of a vector</>full_join
(
x
,y
,by
,copy
,suffix
,keep
,na_matches
,multiple
,unmatched
,relationship
)
(Any) — Full join two data frames by matching rows.</>glimpse
(
_data
,width
,formatter
)
(Any) — Display a summary of a data frame</>group_by
(
_data
,*args
,_add
,_drop
)
(Any) — Create a grouped frame</>group_by_drop_default
(
_data
)
(Any) — Get the default value of_drop
of a frame</>group_cols
(
_data
)
(Any) — Get the group columns of a frame</>group_data
(
_data
)
(Any) — Get the group data of a frame</>group_indices
(
_data
)
(Any) — Get the group indices of a frame</>group_keys
(
_data
)
(Any) — Get the group keys of a frame</>group_map
(
_data
,_f
,*args
,_keep
,**kwargs
)
(Any) — Apply a function to each group</>group_modify
(
_data
,_f
,*args
,_keep
,**kwargs
)
(Any) — Apply a function to each group</>group_rows
(
_data
)
(Any) — Get the group rows of a frame</>group_size
(
_data
)
(Any) — Get the group sizes of a frame</>group_split
(
_data
,*args
,_keep
,**kwargs
)
(Any) — Split a grouped frame into a list of data frames</>group_trim
(
_data
,_drop
)
(Any) — Remove empty groups</>group_vars
(
_data
)
(Any) — Get the grouping variables of a frame</>group_walk
(
_data
,_f
,*args
,_keep
,**kwargs
)
(Any) — Apply a function to each group</>if_all
(
_data
,*args
,_names
,**kwargs
)
(Any) — Apply the same predicate function to a selection of columns and combinethe results True if all elements are True. </>if_any
(
_data
,*args
,_names
,**kwargs
)
(Any) — Apply the same predicate function to a selection of columns and combinethe results True if any element is True. </>if_else
(
condition
,true
,false
,missing
)
(Any) — Where condition is TRUE, the matching value from true, where it's FALSE,the matching value from false, otherwise missing. </>inner_join
(
x
,y
,by
,copy
,suffix
,keep
,na_matches
,multiple
,unmatched
,relationship
)
(Any) — Inner join two data frames by matching rows.</>lag
(
x
,n
,default
,order_by
)
(Any) — Shift a vector byn
positions.</>last
(
x
,order_by
,default
)
(Any) — Extract the last element of a vector</>last_col
(
_data
,offset
,vars
)
(Any) — Select the last column.</>lead
(
x
,n
,default
,order_by
)
(Any) — Shift a vector byn
positions.</>left_join
(
x
,y
,by
,copy
,suffix
,keep
,na_matches
,multiple
,unmatched
,relationship
)
(Any) — Left join two data frames by matching rows.</>matches
(
_data
,match
,ignore_case
,vars
)
(Any) — Select columns that match a regular expression.</>min_rank
(
x
,na_last
)
(Any) — Get the min rank of x</>mutate
(
_data
,*args
,_keep
,_before
,_after
,**kwargs
)
(Any) — Add new columns to a data frame.</>n
(
_data
)
(Any) — Get the current group size</>n_distinct
(
_data
,na_rm
)
(Any) — Count the number of distinct values</>n_groups
(
_data
)
(int) — Get the number of groups of a frame</>na_if
(
x
,value
)
(Any) — Replace values with missing values</>near
(
x
,y
,tol
)
(Any) — Check if values are approximately equal</>nest_join
(
x
,y
,by
,copy
,keep
,name
,na_matches
,unmatched
)
(Any) — Nest join two data frames by matching rows.</>nth
(
x
,n
,order_by
,default
)
(Any) — Extract the nth element of a vector</>ntile
(
x
,n
)
(Any) — a rough rank, which breaks the input vector into n buckets.The size of the buckets may differ by up to one, larger buckets have lower rank. </>num_range
(
prefix
,range_
,width
)
(Any) — Matches a numerical range like x01, x02, x03.</>order_by
(
order
,call
)
(Any) — Order the data by the given order</>percent_rank
(
x
,na_last
)
(Any) — Get the percent rank of x</>pick
(
_data
,*args
)
(T) — Pick columns by name</>pull
(
_data
,var
,name
,to
)
(Any) — Pull a series or a dataframe from a dataframe</>recode
(
_x
,*args
,_default
,_missing
,**kwargs
)
(Any) — Recode a vector, replacing elements in it</>recode_factor
(
_x
,*args
,_default
,_missing
,_ordered
,**kwargs
)
(Any) — Recode a factor, replacing levels in it</>relocate
(
_data
,*args
,_before
,_after
,**kwargs
)
(Any) — change column positions</>rename
(
_data
,**kwargs
)
(Any) — Rename columns</>rename_with
(
_data
,_fn
,*args
,**kwargs
)
(Any) — Rename columns with a function</>right_join
(
x
,y
,by
,copy
,suffix
,keep
,na_matches
,multiple
,unmatched
,relationship
)
(Any) — Right join two data frames by matching rows.</>row_number
(
x
)
(Any) — Get the row number of x</>rows_append
(
x
,y
,**kwargs
)
(Any) — Append rows in y to x</>rows_delete
(
x
,y
,by
,unmatched
,**kwargs
)
(Any) — Delete rows in x that match keys in y</>rows_insert
(
x
,y
,by
,conflict
,**kwargs
)
(Any) — Insert rows from y into x</>rows_patch
(
x
,y
,by
,unmatched
,**kwargs
)
(Any) — Patch rows in x with values from y</>rows_update
(
x
,y
,by
,unmatched
,**kwargs
)
(Any) — Update rows in x with values from y</>rows_upsert
(
x
,y
,by
,**kwargs
)
(Any) — Upsert rows in x with values from y</>rowwise
(
_data
,*cols
)
(Any) — Create a rowwise frame</>select
(
_data
,*args
,**kwargs
)
(Any) — Select columns from a data frame.</>semi_join
(
x
,y
,by
,copy
,na_matches
)
(Any) — Semi join two data frames by matching rows.</>slice_
(
_data
,*args
,_preserve
)
(Any) — Extract rows by their position</>slice_head
(
_data
,n
,prop
)
(Any) — Extract the first rows</>slice_max
(
_data
,order_by
,n
,prop
,with_ties
)
(Any) — Extract rows with the maximum value</>slice_min
(
_data
,order_by
,n
,prop
,with_ties
)
(Any) — Extract rows with the minimum value</>slice_sample
(
_data
,n
,prop
,weight_by
,replace
)
(Any) — Extract rows by sampling</>slice_tail
(
_data
,n
,prop
)
(Any) — Extract the last rows</>starts_with
(
_data
,match
,ignore_case
,vars
)
(Any) — Select columns that start with a string.</>summarise
(
_data
,*args
,_groups
,**kwargs
)
(Any) — Summarise a data frame.</>symdiff
(
x
,y
)
(T) — Get the symmetric difference of two dataframes</>tally
(
_data
,wt
,sort
,name
)
(Any) — Count the number of rows in each group</>transmute
(
_data
,*args
,_before
,_after
,**kwargs
)
(Any) — Add new columns to a data frame and remove existing columnsusing mutate with_keep="none"
. </>ungroup
(
_data
,*cols
)
(Any) — Remove grouping variables</>union_all
(
x
,y
)
(Any) — Combine two data frames together.</>where
(
_data
,fn
)
(Any) — Selects the variables for which a function returns True.</>with_groups
(
_data
,_groups
,_func
,*args
,**kwargs
)
(Any) — Modify the grouping variables for a single operation.</>with_order
(
order
,func
,x
,*args
,**kwargs
)
(Any) — Control argument and result of a window function</>
datar.apis.dplyr.
pick
(
_data
, *args
)
datar.apis.dplyr.
across
(
_data
, *args
, _names=None
, **kwargs
)
Apply the same transformation to multiple columns
The original API: https://dplyr.tidyverse.org/reference/across.html
>>> iris >> mutate(across(c(f.Sepal_Length, f.Sepal_Width), round))
Sepal_Length Sepal_Width Petal_Length Petal_Width Species
<float64> <float64> <float64> <float64> <object>
0 5.0 4.0 1.4 0.2 setosa
1 5.0 3.0 1.4 0.2 setosa
.. ... ... ... ... ...
>>> iris >> group_by(f.Species) >> summarise(
>>> across(starts_with("Sepal"), mean)
>>> )
Species Sepal_Length Sepal_Width
<object> <float64> <float64>
0 setosa 5.006 3.428
1 versicolor 5.936 2.770
2 virginica 6.588 2.974
_data
(T) — The dataframe.*args
— If given, the first 2 elements should be columns and functionsapply to each of the selected columns. The rest of them will be the arguments for the functions._names
(optional) — A glue specification that describes how to namethe output columns. This can use{_col}
to stand for the selected column name, and{_fn}
to stand for the name of the function being applied. The default (None) is equivalent to{_col}
for the single function case and{_col}_{_fn}
for the case where a list is used for _fns. In such a case,{_fn}
is 0-based. To use 1-based index, use{_fn1}
**kwargs
— Keyword arguments for the functions_fn_context
— Defines the context to evaluate the arguments for functionsif they are plain functions. Note that registered functions will use its own context
A dataframe with one column for each column and each function.
datar.apis.dplyr.
c_across
(
_data
, _cols=None
)
datar.apis.dplyr.
if_any
(
_data
, *args
, _names=None
, **kwargs
)
→ Any
Apply the same predicate function to a selection of columns and combinethe results True if any element is True.
datar.apis.dplyr.
if_all
(
_data
, *args
, _names=None
, **kwargs
)
→ Any
Apply the same predicate function to a selection of columns and combinethe results True if all elements are True.
datar.apis.dplyr.
symdiff
(
x
, y
)
Get the symmetric difference of two dataframes
It computes the symmetric difference, i.e. all rows in x that aren't in y and all rows in y that aren't in x.
The original API: https://dplyr.tidyverse.org/reference/setops.html
The symmetric difference of x and y
datar.apis.dplyr.
arrange
(
_data
, *args
, _by_group=False
, **kwargs
)
orders the rows of a data frame by the values of selected columns.
The original API: https://dplyr.tidyverse.org/reference/arrange.html
_data
— A data frame_by_group
(optional) — If TRUE, will sort first by grouping variable.Applies to grouped data frames only.**kwargs
— Name-value pairs that apply with mutate*series
— Variables, or functions of variables.Use desc() to sort a variable in descending order.
An object of the same type as _data.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.
datar.apis.dplyr.
bind_rows
(
*data
, _id=None
, _copy=True
, **kwargs
)
Bind rows of give dataframes
Original APIs https://dplyr.tidyverse.org/reference/bind.html
*data
— Dataframes to combine_id
(optional) — The name of the id columns_copy
(bool, optional) — IfFalse
, do not copy data unnecessarily.Original API does not support this. This argument will be passed by topandas.concat()
ascopy
argument.**kwargs
— A mapping of dataframe, keys will be used as _id col.
The combined dataframe
datar.apis.dplyr.
bind_cols
(
*data
, _name_repair='unique'
, _copy=True
)
Bind columns of give dataframes
Note that unlike dplyr
, mismatched dimensions are allowed and
missing rows will be filled with NA
s
*data
— Dataframes to bind_name_repair
(optional) — treatment of problematic column names:- - "minimal": No name repair or checks, beyond basic existence,
- - "unique": Make sure names are unique and not empty,
- - "check_unique": (default value), no name repair,
but check they are unique, - - "universal": Make the names unique and syntactic
- - a function: apply custom name repair
_copy
(bool, optional) — IfFalse
, do not copy data unnecessarily.Original API does not support this. This argument will be passed by topandas.concat()
ascopy
argument.
The combined dataframe
datar.apis.dplyr.
cur_column
(
_data
, _name
)
Get the current column
_data
— The dataframe_name
— The column name
The current column
datar.apis.dplyr.
cur_data
(
_data
)
Get the current dataframe
_data
— The dataframe
The current dataframe
datar.apis.dplyr.
n
(
_data
)
Get the current group size
_data
— The dataframe
The number of rows
datar.apis.dplyr.
cur_data_all
(
_data
)
Get the current data for the current group includingthe grouping variables
_data
— The dataframe
The current dataframe
datar.apis.dplyr.
cur_group
(
_data
)
Get the current group
_data
— The dataframe
The current group
datar.apis.dplyr.
cur_group_id
(
_data
)
Get the current group id
_data
— The dataframe
The current group id
datar.apis.dplyr.
cur_group_rows
(
_data
)
Get the current group row indices
_data
— The dataframe
The current group rows
datar.apis.dplyr.
count
(
_data
, *args
, wt=None
, sort=False
, name=None
, _drop=None
, **kwargs
)
Count the number of rows in each group
Original API: https://dplyr.tidyverse.org/reference/count.html
_data
— A data frame*args
— Variables, or functions of variables.Use desc() to sort a variable in descending order.wt
(optional) — A variable or function of variables to weight by.sort
(optional) — If TRUE, the result will be sorted by the count.name
(optional) — The name of the count column._drop
(optional) — IfFalse
, keep grouping variables even if they are not used.Original API does not support this.**kwargs
— Name-value pairs that apply with mutate
A data frame with the same number of rows as the number of groups.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.
datar.apis.dplyr.
tally
(
_data
, wt=None
, sort=False
, name=None
)
Count the number of rows in each group
Original API: https://dplyr.tidyverse.org/reference/count.html
_data
— A data framewt
(optional) — A variable or function of variables to weight by.sort
(optional) — If TRUE, the result will be sorted by the count.name
(optional) — The name of the count column.
A data frame with the same number of rows as the number of groups.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.
datar.apis.dplyr.
add_count
(
_data
, *args
, wt=None
, sort=False
, name='n'
, **kwargs
)
Add a count column to a data frame
Original API: https://dplyr.tidyverse.org/reference/count.html
_data
— A data frame*args
— Variables, or functions of variables.Use desc() to sort a variable in descending order.wt
(optional) — A variable or function of variables to weight by.sort
(optional) — If TRUE, the result will be sorted by the count.name
(optional) — The name of the count column.**kwargs
— Name-value pairs that apply with mutate
A data frame with the same number of rows as the number of groups.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.
datar.apis.dplyr.
add_tally
(
_data
, wt=None
, sort=False
, name='n'
)
Add a count column to a data frame
Original API: https://dplyr.tidyverse.org/reference/count.html
_data
— A data framewt
(optional) — A variable or function of variables to weight by.sort
(optional) — If TRUE, the result will be sorted by the count.name
(optional) — The name of the count column.
A data frame with the same number of rows as the number of groups.The output has the following properties: All rows appear in the output, but (usually) in a different place. Columns are not modified. Groups are not modified. Data frame attributes are preserved.
datar.apis.dplyr.
desc
(
x
)
Transform a vector into a format that will be sorted in descending order
This is useful within arrange().
The original API: https://dplyr.tidyverse.org/reference/desc.html
x
— vector to transform
The descending order of x
datar.apis.dplyr.
filter_
(
_data
, *conditions
, _preserve=False
)
Filter a data frame based on conditions
The original API: https://dplyr.tidyverse.org/reference/filter.html
_data
— A data frame*conditions
— Conditions to filter by._preserve
(bool, optional) — IfTrue
, keep grouping variables even if they are not used.
The subset dataframe
datar.apis.dplyr.
distinct
(
_data
, *args
, keep_all=False
, _preserve=False
)
Filter a data frame based on conditions
The original API: https://dplyr.tidyverse.org/reference/distinct.html
_data
— A data frame*args
— Variables to filter by.keep_all
(bool, optional) — IfTrue
, keep all rows that match._preserve
(bool, optional) — IfTrue
, keep grouping variables even if they are not used.
The subset dataframe
datar.apis.dplyr.
n_distinct
(
_data
, na_rm=True
)
Count the number of distinct values
The original API: https://dplyr.tidyverse.org/reference/distinct.html
_data
— A data framena_rm
(bool, optional) — IfTrue
, remove missing values before counting.
The number of distinct values
datar.apis.dplyr.
glimpse
(
_data
, width=None
, formatter=None
)
→ Any
Display a summary of a data frame
The original API: https://dplyr.tidyverse.org/reference/glimpse.html
_data
— A data framewidth
(int, optional) — Width of output, defaults to the width of the console.formatter
(optional) — A single-dispatch function to format a single element.
datar.apis.dplyr.
slice_
(
_data
, *args
, _preserve=False
)
Extract rows by their position
The original API: https://dplyr.tidyverse.org/reference/slice.html
_data
— A data frame*args
— Positions to extract._preserve
(bool, optional) — IfTrue
, keep grouping variables even if they are not used.
The subset dataframe
datar.apis.dplyr.
slice_head
(
_data
, n=None
, prop=None
)
Extract the first rows
The original API: https://dplyr.tidyverse.org/reference/slice.html
_data
— A data framen
(int, optional) — Number of rows to extract.prop
(float, optional) — Proportion of rows to extract.
The subset dataframe
datar.apis.dplyr.
slice_tail
(
_data
, n=None
, prop=None
)
Extract the last rows
The original API: https://dplyr.tidyverse.org/reference/slice.html
_data
— A data framen
(int, optional) — Number of rows to extract.prop
(float, optional) — Proportion of rows to extract.
The subset dataframe
datar.apis.dplyr.
slice_sample
(
_data
, n=1
, prop=None
, weight_by=None
, replace=False
)
Extract rows by sampling
The original API: https://dplyr.tidyverse.org/reference/slice.html
_data
— A data framen
(int, optional) — Number of rows to extract.prop
(float, optional) — Proportion of rows to extract.weight_by
(optional) — A variable or function of variables to weight by.replace
(bool, optional) — IfTrue
, sample with replacement.
The subset dataframe
datar.apis.dplyr.
slice_min
(
_data
, order_by
, n=1
, prop=None
, with_ties=None
)
Extract rows with the minimum value
The original API: https://dplyr.tidyverse.org/reference/slice.html
_data
— A data frameorder_by
— A variable or function of variables to order by.n
(int, optional) — Number of rows to extract.prop
(float, optional) — Proportion of rows to extract.with_ties
(bool | str, optional) — IfTrue
, extract all rows with the minimum value.If "first", extract the first row with the minimum value. If "last", extract the last row with the minimum value.
The subset dataframe
datar.apis.dplyr.
slice_max
(
_data
, order_by
, n=1
, prop=None
, with_ties=None
)
Extract rows with the maximum value
The original API: https://dplyr.tidyverse.org/reference/slice.html
_data
— A data frameorder_by
— A variable or function of variables to order by.n
(int, optional) — Number of rows to extract.prop
(float, optional) — Proportion of rows to extract.with_ties
(bool | str, optional) — IfTrue
, extract all rows with the maximum value.If "first", extract the first row with the maximum value. If "last", extract the last row with the maximum value.
The subset dataframe
datar.apis.dplyr.
between
(
x
, left
, right
, inclusive='both'
)
Check if a value is between two other values
The original API: https://dplyr.tidyverse.org/reference/between.html
x
— A valueleft
— The left boundright
— The right boundinclusive
(str, optional) — Eitherboth
,neither
,left
orright
.Include boundaries. Whether to set each bound as closed or open.
A bool value if x
is scalar, otherwise an array of boolean valuesNote that it will be always False when NA appears in x, left or right.
datar.apis.dplyr.
cummean
(
x
, na_rm=False
)
Cumulative mean
The original API: https://dplyr.tidyverse.org/reference/cumall.html
x
— A numeric vectorna_rm
(bool, optional) — IfTrue
, remove missing values before computing.
An array of cumulative means
datar.apis.dplyr.
cumall
(
x
)
Get cumulative bool. All cases after first False
The original API: https://dplyr.tidyverse.org/reference/cumall.html
x
— A logical vector
An array of cumulative conjunctions
datar.apis.dplyr.
cumany
(
x
)
Get cumulative bool. All cases after first True
The original API: https://dplyr.tidyverse.org/reference/cumany.html
x
— A logical vector
An array of cumulative disjunctions
datar.apis.dplyr.
coalesce
(
x
, *replace
)
Replace missing values with the first non-missing value
The original API: https://dplyr.tidyverse.org/reference/coalesce.html
x
— A vector*replace
— Values to replace missing values with.
An array of values
datar.apis.dplyr.
consecutive_id
(
x
, *args
)
Generate consecutive ids
The original API: https://dplyr.tidyverse.org/reference/consecutive_id.html
x
— A vector*args
— Other vectors
A sequence of consecutive ids
datar.apis.dplyr.
na_if
(
x
, value
)
Replace values with missing values
The original API: https://dplyr.tidyverse.org/reference/na_if.html
x
— A vectorvalue
— Values to replace with missing values.
An array of values
datar.apis.dplyr.
near
(
x
, y
, tol=1e-08
)
Check if values are approximately equal
The original API: https://dplyr.tidyverse.org/reference/near.html
x
— A numeric vectory
— A numeric vectortol
(float, optional) — Tolerance
An array of boolean values
datar.apis.dplyr.
nth
(
x
, n
, order_by=None
, default=None
)
Extract the nth element of a vector
The original API: https://dplyr.tidyverse.org/reference/nth.html
x
— A vectorn
— The index of the element to extract.order_by
(optional) — A variable or function of variables to order by.default
(optional) — A default value to return ifn
is out of bounds.
A value
datar.apis.dplyr.
first
(
x
, order_by=None
, default=None
)
Extract the first element of a vector
The original API: https://dplyr.tidyverse.org/reference/nth.html
x
— A vectororder_by
(optional) — A variable or function of variables to order by.default
(optional) — A default value to return ifx
is empty.
A value
datar.apis.dplyr.
last
(
x
, order_by=None
, default=None
)
Extract the last element of a vector
The original API: https://dplyr.tidyverse.org/reference/nth.html
x
— A vectororder_by
(optional) — A variable or function of variables to order by.default
(optional) — A default value to return ifx
is empty.
A value
datar.apis.dplyr.
group_by
(
_data
, *args
, _add=False
, _drop=None
)
Create a grouped frame
The original API: https://dplyr.tidyverse.org/reference/group_by.html
_data
— A data frame*args
— A variable or function of variables to group by._add
(bool, optional) — IfTrue
, add grouping variables to an existing group._drop
(bool, optional) — IfTrue
, drop grouping variables from the output.
A grouped frame
datar.apis.dplyr.
ungroup
(
_data
, *cols
)
Remove grouping variables
The original API: https://dplyr.tidyverse.org/reference/ungroup.html
_data
— A grouped frame*cols
(str | int) — Columns to remove grouping variables from.
A data frame
datar.apis.dplyr.
rowwise
(
_data
, *cols
)
Create a rowwise frame
The original API: https://dplyr.tidyverse.org/reference/rowwise.html
_data
— A data frame*cols
(str | int) — Columns to make rowwise.
A rowwise frame
datar.apis.dplyr.
group_by_drop_default
(
_data
)
Get the default value of _drop
of a frame
The original API: https://dplyr.tidyverse.org/reference/group_by.html
_data
— A data frame
A bool value
datar.apis.dplyr.
group_vars
(
_data
)
Get the grouping variables of a frame
The original API: https://dplyr.tidyverse.org/reference/group_vars.html
_data
— A grouped frame
A list of grouping variables
datar.apis.dplyr.
group_indices
(
_data
)
Get the group indices of a frame
The original API: https://dplyr.tidyverse.org/reference/group_indices.html
_data
— A grouped frame
A list of group indices
datar.apis.dplyr.
group_keys
(
_data
)
Get the group keys of a frame
The original API: https://dplyr.tidyverse.org/reference/group_keys.html
_data
— A grouped frame
A list of group keys
datar.apis.dplyr.
group_size
(
_data
)
Get the group sizes of a frame
The original API: https://dplyr.tidyverse.org/reference/group_size.html
_data
— A grouped frame
A list of group sizes
datar.apis.dplyr.
group_rows
(
_data
)
Get the group rows of a frame
The original API: https://dplyr.tidyverse.org/reference/group_rows.html
_data
— A grouped frame
A list of group rows
datar.apis.dplyr.
group_cols
(
_data
)
Get the group columns of a frame
The original API: https://dplyr.tidyverse.org/reference/group_cols.html
_data
— A grouped frame
A list of group columns
datar.apis.dplyr.
group_data
(
_data
)
Get the group data of a frame
The original API: https://dplyr.tidyverse.org/reference/group_data.html
_data
— A grouped frame
A list of group data
datar.apis.dplyr.
n_groups
(
_data
)
Get the number of groups of a frame
The original API: https://dplyr.tidyverse.org/reference/n_groups.html
_data
— A grouped frame
An int value
datar.apis.dplyr.
group_map
(
_data
, _f
, *args
, _keep=False
, **kwargs
)
Apply a function to each group
The original API: https://dplyr.tidyverse.org/reference/group_map.html
_data
— A grouped frame_f
— A function to apply to each group.*args
— Additional arguments to pass tofunc
._keep
(bool, optional) — IfTrue
, keep the grouping variables in the output.**kwargs
— Additional keyword arguments to pass tofunc
.
A list of results
datar.apis.dplyr.
group_modify
(
_data
, _f
, *args
, _keep=False
, **kwargs
)
Apply a function to each group
The original API: https://dplyr.tidyverse.org/reference/group_modify.html
_data
— A grouped frame_f
— A function to apply to each group.*args
— Additional arguments to pass tofunc
._keep
(bool, optional) — IfTrue
, keep the grouping variables in the output.**kwargs
— Additional keyword arguments to pass tofunc
.
A data frame
datar.apis.dplyr.
group_split
(
_data
, *args
, _keep=False
, **kwargs
)
Split a grouped frame into a list of data frames
The original API: https://dplyr.tidyverse.org/reference/group_split.html
_data
— A grouped frame*args
— Additional arguments to pass tofunc
._keep
(bool, optional) — IfTrue
, keep the grouping variables in the output.**kwargs
— Additional keyword arguments to pass tofunc
.
A list of data frames
datar.apis.dplyr.
group_trim
(
_data
, _drop=None
)
Remove empty groups
The original API: https://dplyr.tidyverse.org/reference/group_trim.html
_data
— A grouped frame_drop
(optional) — Seegroup_by
.
A grouped frame
datar.apis.dplyr.
group_walk
(
_data
, _f
, *args
, _keep=False
, **kwargs
)
Apply a function to each group
The original API: https://dplyr.tidyverse.org/reference/group_walk.html
_data
— A grouped frame_f
— A function to apply to each group.*args
— Additional arguments to pass tofunc
.**kwargs
— Additional keyword arguments to pass tofunc
.
A grouped frame
datar.apis.dplyr.
with_groups
(
_data
, _groups
, _func
, *args
, **kwargs
)
Modify the grouping variables for a single operation.
_data
— A data frame_groups
— columns passed by group_byUse None to temporarily ungroup._func
— Function to apply to regrouped data.
The new data frame with operations applied.
datar.apis.dplyr.
if_else
(
condition
, true
, false
, missing=None
)
Where condition is TRUE, the matching value from true, where it's FALSE,the matching value from false, otherwise missing.
Note that NAs will be False in condition if missing is not specified
condition
— the conditionstrue
— andfalse
— Values to use for TRUE and FALSE values of condition.They must be either the same length as condition, or length 1.missing
(optional) — If not None, will be used to replace missing values
A series with values replaced.
datar.apis.dplyr.
case_match
(
_x
, *args
, _default=None
, _dtypes=None
)
→ T
This function allows you to vectorise multiple switch()
statements.Each case is evaluated sequentially and the first match for each element
determines the corresponding value in the output vector.
If no cases match, the _default
is used.
The original API: https://dplyr.tidyverse.org/reference/case_match.html
_x
(T) — A vector*args
— A series of condition-value pairs_default
(optional) — The default value_dtypes
(optional) — The data types of the output
datar.apis.dplyr.
case_when
(
cond
, value
, *more_cases
)
Vectorise multiple if_else()
statements.
cond
— A boolean vectorvalue
— A vector with values to replace*more_cases
— A list of tuples (cond, value)
A vector with values replaced.
datar.apis.dplyr.
inner_join
(
x
, y
, by=None
, copy=False
, suffix=('_x', '_y')
, keep=False
, na_matches='na'
, multiple='all'
, unmatched='drop'
, relationship=None
)
Inner join two data frames by matching rows.
The original API: https://dplyr.tidyverse.org/reference/join.html
x
— A data framey
— A data frameby
(optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.copy
(bool, optional) — If True, always copy the data.suffix
(Sequence, optional) — A tuple of suffixes to apply to overlapping columns.keep
(bool, optional) — If True, keep the grouping variables in the output.na_matches
(str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.multiple
(str, optional) — How should multiple matches be handled?"all": All matches are returned. "first": The first match is returned. "last": The last match is returned. "any": Any of the matched rows in yunmatched
(str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.relationship
(str, optional) — The relationship between x and y.None: No expected relationship. "one_to_one": Each row in x matches at most one row in y. "one_to_many": Each row in x matches zero or more rows in y. "many_to_one": Each row in x matches at most one row in y. "many_to_many": Each row in x matches zero or more rows in y.
A data frame
datar.apis.dplyr.
left_join
(
x
, y
, by=None
, copy=False
, suffix=('_x', '_y')
, keep=False
, na_matches='na'
, multiple='all'
, unmatched='drop'
, relationship=None
)
Left join two data frames by matching rows.
The original API: https://dplyr.tidyverse.org/reference/join.html
x
— A data framey
— A data frameby
(optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.copy
(bool, optional) — If True, always copy the data.suffix
(Sequence, optional) — A tuple of suffixes to apply to overlapping columns.keep
(bool, optional) — If True, keep the grouping variables in the output.na_matches
(str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.multiple
(str, optional) — How should multiple matches be handled?"all": All matches are returned. "first": The first match is returned. "last": The last match is returned. "any": Any of the matched rows in yunmatched
(str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.relationship
(str, optional) — The relationship between x and y.None: No expected relationship. "one_to_one": Each row in x matches at most one row in y. "one_to_many": Each row in x matches zero or more rows in y. "many_to_one": Each row in x matches at most one row in y. "many_to_many": Each row in x matches zero or more rows in y.
A data frame
datar.apis.dplyr.
right_join
(
x
, y
, by=None
, copy=False
, suffix=('_x', '_y')
, keep=False
, na_matches='na'
, multiple='all'
, unmatched='drop'
, relationship=None
)
Right join two data frames by matching rows.
The original API: https://dplyr.tidyverse.org/reference/join.html
x
— A data framey
— A data frameby
(optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.copy
(bool, optional) — If True, always copy the data.suffix
(Sequence, optional) — A tuple of suffixes to apply to overlapping columns.keep
(bool, optional) — If True, keep the grouping variables in the output.na_matches
(str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.multiple
(str, optional) — How should multiple matches be handled?"all": All matches are returned. "first": The first match is returned. "last": The last match is returned. "any": Any of the matched rows in yunmatched
(str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.relationship
(str, optional) — The relationship between x and y.None: No expected relationship. "one_to_one": Each row in x matches at most one row in y. "one_to_many": Each row in x matches zero or more rows in y. "many_to_one": Each row in x matches at most one row in y. "many_to_many": Each row in x matches zero or more rows in y.
A data frame
datar.apis.dplyr.
full_join
(
x
, y
, by=None
, copy=False
, suffix=('_x', '_y')
, keep=False
, na_matches='na'
, multiple='all'
, unmatched='drop'
, relationship=None
)
Full join two data frames by matching rows.
The original API: https://dplyr.tidyverse.org/reference/join.html
x
— A data framey
— A data frameby
(optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.copy
(bool, optional) — If True, always copy the data.suffix
(Sequence, optional) — A tuple of suffixes to apply to overlapping columns.keep
(bool, optional) — If True, keep the grouping variables in the output.na_matches
(str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.multiple
(str, optional) — How should multiple matches be handled?"all": All matches are returned. "first": The first match is returned. "last": The last match is returned. "any": Any of the matched rows in yunmatched
(str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.relationship
(str, optional) — The relationship between x and y.None: No expected relationship. "one_to_one": Each row in x matches at most one row in y. "one_to_many": Each row in x matches zero or more rows in y. "many_to_one": Each row in x matches at most one row in y. "many_to_many": Each row in x matches zero or more rows in y.
A data frame
datar.apis.dplyr.
semi_join
(
x
, y
, by=None
, copy=False
, na_matches='na'
)
Semi join two data frames by matching rows.
The original API: https://dplyr.tidyverse.org/reference/join.html
x
— A data framey
— A data frameby
(optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.copy
(bool, optional) — If True, always copy the data.na_matches
(str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.
A data frame
datar.apis.dplyr.
anti_join
(
x
, y
, by=None
, copy=False
, na_matches='na'
)
Anti join two data frames by matching rows.
The original API: https://dplyr.tidyverse.org/reference/join.html
x
— A data framey
— A data frameby
(optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.copy
(bool, optional) — If True, always copy the data.na_matches
(str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.
A data frame
datar.apis.dplyr.
nest_join
(
x
, y
, by=None
, copy=False
, keep=False
, name=None
, na_matches='na'
, unmatched='drop'
)
Nest join two data frames by matching rows.
The original API: https://dplyr.tidyverse.org/reference/join.html
x
— A data framey
— A data frameby
(optional) — A list of column names to join by.If None, use the intersection of the columns of x and y.copy
(bool, optional) — If True, always copy the data.keep
(bool, optional) — If True, keep the grouping variables in the output.name
(optional) — The name of the column to store the nested data frame.na_matches
(str, optional) — How should NA values be matched?"na": NA values are equal. "never": NA values are never matched.unmatched
(str, optional) — How should unmatched keys that would result in dropped rowsbe handled? "drop": Drop unmatched keys. "error": Raise an error.
A data frame
datar.apis.dplyr.
cross_join
(
x
, y
, copy=False
, suffix=('_x', '_y')
)
Cross joins match each row in x to every row in y, resulting in adata frame with nrow(x) * nrow(y) rows.
The original API: https://dplyr.tidyverse.org/reference/cross_join.html
An object of the same type as x (including the same groups).
datar.apis.dplyr.
lead
(
x
, n=1
, default=None
, order_by=None
)
Shift a vector by n
positions.
The original API: https://dplyr.tidyverse.org/reference/lead.html
x
— A vectorn
(optional) — The number of positions to shift.default
(optional) — The default value to use for positions that don't exist.order_by
(optional) — A vector of column names to order by.
A vector
datar.apis.dplyr.
lag
(
x
, n=1
, default=None
, order_by=None
)
Shift a vector by n
positions.
The original API: https://dplyr.tidyverse.org/reference/lag.html
x
— A vectorn
(optional) — The number of positions to shift.default
(optional) — The default value to use for positions that don't exist.order_by
(optional) — A vector of column names to order by.
A vector
datar.apis.dplyr.
mutate
(
_data
, *args
, _keep='all'
, _before=None
, _after=None
, **kwargs
)
Add new columns to a data frame.
The original API: https://dplyr.tidyverse.org/reference/mutate.html
_data
— A data frame*args
— and_keep
(str, optional) — allows you to control which columns from _data are retainedin the output:- - "all", the default, retains all variables.
- - "used" keeps any variables used to make new variables;
it's useful for checking your work as it displays inputs and
outputs side-by-side. - - "unused" keeps only existing variables not used to make new
variables. - - "none", only keeps grouping keys (like transmute()).
_before
(optional) — A list of column names to put the new columns before._after
(optional) — A list of column names to put the new columns after.**kwargs
— Name-value pairs. The name gives the name of the columnin the output. The value can be:- - A vector of length 1, which will be recycled to the correct
length. - - A vector the same length as the current group (or the whole
data frame if ungrouped). - - None to remove the column
- - A vector of length 1, which will be recycled to the correct
An object of the same type as _data. The output has the followingproperties:
- - Rows are not affected.
- - Existing columns will be preserved according to the _keep
argument. New columns will be placed according to the
_before and _after arguments. If _keep = "none"
(as in transmute()), the output order is determined only
by ..., not the order of existing columns. - - Columns given value None will be removed
- - Groups will be recomputed if a grouping variable is mutated.
- - Data frame attributes are preserved.
datar.apis.dplyr.
transmute
(
_data
, *args
, _before=None
, _after=None
, **kwargs
)
Add new columns to a data frame and remove existing columnsusing mutate with _keep="none"
.
The original API: https://dplyr.tidyverse.org/reference/mutate.html
_data
— A data frame*args
— and_before
(optional) — A list of column names to put the new columns before._after
(optional) — A list of column names to put the new columns after.**kwargs
— Name-value pairs. The name gives the name of the columnin the output. The value can be:- - A vector of length 1, which will be recycled to the correct
length. - - A vector the same length as the current group (or the whole
data frame if ungrouped). - - None to remove the column
- - A vector of length 1, which will be recycled to the correct
An object of the same type as _data. The output has the followingproperties:
- - Rows are not affected.
- - Existing columns will be preserved according to the _keep
argument. New columns will be placed according to the
_before and _after arguments. If _keep = "none"
(as in transmute()), the output order is determined only
by ..., not the order of existing columns. - - Columns given value None will be removed
- - Groups will be recomputed if a grouping variable is mutated.
- - Data frame attributes are preserved.
datar.apis.dplyr.
order_by
(
order
, call
)
Order the data by the given order
Note
This function should be called as an argument
of a verb. If you want to call it regularly, try with_order()
>>> df = tibble(x=c[1:6])>>> df >> mutate(y=order_by(c[5:], cumsum(f.x)))
>>> # df.y:
>>> # 15, 14, 12, 9, 5
order
— An iterable to control the data orderdata
— The data to be ordered
A Function expression for verb to evaluate.
datar.apis.dplyr.
with_order
(
order
, func
, x
, *args
, **kwargs
)
Control argument and result of a window function
>>> with_order([5,4,3,2,1], cumsum, [1,2,3,4,5])>>> # 15, 14, 12, 9, 5
order
— An iterable to order the arugment and resultfunc
— The window functionx
— The first arugment for the function*args
— and**kwargs
— Other arugments for the function
The ordered result or an expression if there is expression in arguments
datar.apis.dplyr.
pull
(
_data
, var=-1
, name=None
, to=None
)
Pull a series or a dataframe from a dataframe
_data
— The dataframevar
(str | int, optional) — The column to pull, either the name or the indexname
(optional) — The name of the pulled value- - If
to
is frame, or the value pulled is data frame, it will be
the column names - - If
to
is series, it will be the series name. If multiple names
are given, only the first name will be used. - - If
to
is series, but value pulled is a data frame, then a
dictionary of series with the series names as keys or givenname
as keys.
- - If
to
(optional) — Type of data to return.Only works when pullinga
for namea$b
- - series: Return a pandas Series object
Group information will be lost
If pulled value is a dataframe, it will return a dict of series,
with the series names or thename
provided. - - array: Return a numpy.ndarray object
- - frame: Return a DataFrame with that column
- - list: Return a python list
- - dict: Return a dict with
name
as keys and pulled value as values
Only a single column is allowed to pull - - If not provided:
series
when pulled data has only one columns.
dict
ifname
provided and has the same length as the pulled
single column. Otherwiseframe
.
- - series: Return a pandas Series object
The data according to to
datar.apis.dplyr.
row_number
(
__html__()
)
Get the row number of x
Note that this function doesn't support piping.
x
— The data to get row numberDefaults toSymbolic()
so the whole data is used by default when calledrow_number()
The row number
datar.apis.dplyr.
ntile
(
__html__()
, n=None
)
a rough rank, which breaks the input vector into n buckets.The size of the buckets may differ by up to one, larger buckets have lower rank.
Note that this function doesn't support piping.
x
— The data to get rownumberDefaults toSymbolic()
so the whole data is used by default when calledntile(n=...)
n
(int, optional) — The number of groups to divide the data into
The row number
datar.apis.dplyr.
min_rank
(
__html__()
, na_last='keep'
)
Get the min rank of x
Note that this function doesn't support piping.
x
— The data to get row numberDefaults toSymbolic()
so the whole data is used by default when calledmin_rank()
na_last
(str, optional) — How NA values are ranked- - "keep": NA values are ranked at the end
- - "top": NA values are ranked at the top
- - "bottom": NA values are ranked at the bottom
The row number
datar.apis.dplyr.
dense_rank
(
__html__()
, na_last='keep'
)
Get the dense rank of x
Note that this function doesn't support piping.
x
— The data to get row numberDefaults toSymbolic()
so the whole data is used by default when calleddense_rank()
na_last
(str, optional) — How NA values are ranked- - "keep": NA values are ranked at the end
- - "top": NA values are ranked at the top
- - "bottom": NA values are ranked at the bottom
The row number
datar.apis.dplyr.
percent_rank
(
__html__()
, na_last='keep'
)
Get the percent rank of x
Note that this function doesn't support piping.
x
— The data to get row numberDefaults toSymbolic()
so the whole data is used by default when calledpercent_rank()
na_last
(str, optional) — How NA values are ranked- - "keep": NA values are ranked at the end
- - "top": NA values are ranked at the top
- - "bottom": NA values are ranked at the bottom
The row number
datar.apis.dplyr.
cume_dist
(
__html__()
, na_last='keep'
)
Get the cume_dist of x
Note that this function doesn't support piping.
x
— The data to get row numberDefaults toSymbolic()
so the whole data is used by default when calledcume_dist()
na_last
(str, optional) — How NA values are ranked- - "keep": NA values are ranked at the end
- - "top": NA values are ranked at the top
- - "bottom": NA values are ranked at the bottom
The row number
datar.apis.dplyr.
recode
(
_x
, *args
, _default=None
, _missing=None
, **kwargs
)
Recode a vector, replacing elements in it
*args
— and_default
(optional) — If supplied, all values not otherwise matched will begiven this value. If not supplied and if the replacements are the same type as the original values in series, unmatched values are not changed. If not supplied and if the replacements are not compatible, unmatched values are replaced with np.nan._missing
(optional) — If supplied, any missing values in .x will be replacedby this value.**kwargs
— replacementsx
— A vector to modify
The vector with values replaced
datar.apis.dplyr.
recode_factor
(
_x
, *args
, _default=None
, _missing=None
, _ordered=False
, **kwargs
)
Recode a factor, replacing levels in it
*args
— and_default
(optional) — If supplied, all values not otherwise matched will begiven this value. If not supplied and if the replacements are the same type as the original values in series, unmatched values are not changed. If not supplied and if the replacements are not compatible, unmatched values are replaced with np.nan._missing
(optional) — If supplied, any missing values in .x will be replacedby this value._ordered
(bool, optional) — If True, the factor will be ordered**kwargs
— replacementsx
— A factor to modify
The factor with levels replaced
datar.apis.dplyr.
relocate
(
_data
, *args
, _before=None
, _after=None
, **kwargs
)
change column positions
See original API https://dplyr.tidyverse.org/reference/relocate.html
_data
— A data frame*args
— and_before
(int | str, optional) — and_after
(int | str, optional) — Destination. Supplying neither will move columns tothe left-hand side; specifying both is an error.**kwargs
— Columns to rename and move
An object of the same type as .data. The output has the followingproperties:
- - Rows are not affected.
- - The same columns appear in the output, but (usually) in a
different place. - - Data frame attributes are preserved.
- - Groups are not affected
datar.apis.dplyr.
rename
(
_data
, **kwargs
)
Rename columns
See original API https://dplyr.tidyverse.org/reference/rename.html
_data
— A data frame**kwargs
— Columns to rename
The dataframe with new names
datar.apis.dplyr.
rename_with
(
_data
, _fn
, *args
, **kwargs
)
Rename columns with a function
See original API https://dplyr.tidyverse.org/reference/rename.html
_data
— A data frame_fn
— A function to apply to column names*args
— the columns to rename and non-keyword arguments for the_fn
.If*args
is not provided, then assuming all columns, and no non-keyword arguments are allowed to pass to the function, use keyword arguments instead.**kwargs
— keyword arguments for_fn
The dataframe with new names
datar.apis.dplyr.
rows_insert
(
x
, y
, by=None
, conflict='error'
, **kwargs
)
Insert rows from y into x
See original API https://dplyr.tidyverse.org/reference/rows.html
x
— A data framey
— A data frameby
(optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.conflict
(str, optional) — How to handle conflicts- - "error": Throw an error
- - "ignore": Ignore conflicts
**kwargs
— Additional arguments to pass to the backend, such ascopy
andin_place
. Depends on the backend implementation.
A data frame with all existing rows and potentially new rows
datar.apis.dplyr.
rows_update
(
x
, y
, by=None
, unmatched='error'
, **kwargs
)
Update rows in x with values from y
See original API https://dplyr.tidyverse.org/reference/rows.html
x
— A data framey
— A data frameby
(optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.unmatched
(str, optional) — how should keys in y that are unmatched by the keysin x be handled? One of - "error", the default, will error if there are any keys in y that are unmatched by the keys in x. "ignore" will ignore rows in y with keys that are unmatched by the keys in x.**kwargs
— Additional arguments to pass to the backend, such ascopy
andin_place
. Depends on the backend implementation.
A data frame with all existing rows and potentially new rows
datar.apis.dplyr.
rows_patch
(
x
, y
, by=None
, unmatched='error'
, **kwargs
)
Patch rows in x with values from y
See original API https://dplyr.tidyverse.org/reference/rows.html
x
— A data framey
— A data frameby
(optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.unmatched
(str, optional) — how should keys in y that are unmatched by the keysin x be handled? One of - "error", the default, will error if there are any keys in y that are unmatched by the keys in x. "ignore" will ignore rows in y with keys that are unmatched by the keys in x.**kwargs
— Additional arguments to pass to the backend, such ascopy
andin_place
. Depends on the backend implementation.
A data frame with NA values overwritten and the number of rows preserved
datar.apis.dplyr.
rows_upsert
(
x
, y
, by=None
, **kwargs
)
Upsert rows in x with values from y
See original API https://dplyr.tidyverse.org/reference/rows.html
x
— A data framey
— A data frameby
(optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.**kwargs
— Additional arguments to pass to the backend, such ascopy
andin_place
. Depends on the backend implementation.
A data frame with inserted or updated depending on whether or notthe key value in y already exists in x. Key values in y must be unique.
datar.apis.dplyr.
rows_delete
(
x
, y
, by=None
, unmatched='error'
, **kwargs
)
Delete rows in x that match keys in y
See original API https://dplyr.tidyverse.org/reference/rows.html
x
— A data framey
— A data frameby
(optional) — An unnamed character vector giving the key columns.The key columns must exist in both x and y. Keys typically uniquely identify each row, but this is only enforced for the key values of y By default, we use the first column in y, since the first column is a reasonable place to put an identifier variable.unmatched
(str, optional) — how should keys in y that are unmatched by the keysin x be handled? One of - "error", the default, will error if there are any keys in y that are unmatched by the keys in x. "ignore" will ignore rows in y with keys that are unmatched by the keys in x.**kwargs
— Additional arguments to pass to the backend, such ascopy
andin_place
. Depends on the backend implementation.
A data frame with rows deleted
datar.apis.dplyr.
rows_append
(
x
, y
, **kwargs
)
Append rows in y to x
See original API https://dplyr.tidyverse.org/reference/rows.html
x
— A data framey
— A data frame**kwargs
— Additional arguments to pass to the backend, such ascopy
andin_place
. Depends on the backend implementation.
A data frame with rows appended
datar.apis.dplyr.
select
(
_data
, *args
, **kwargs
)
Select columns from a data frame.
See original API https://dplyr.tidyverse.org/reference/select.html
_data
— A data frame*args
— A list of columns to select**kwargs
— A list of columns to select
A data frame with only the selected columns
datar.apis.dplyr.
union_all
(
x
, y
)
Combine two data frames together.
See original API https://dplyr.tidyverse.org/reference/setops.html
x
— A data framey
— A data frame
A data frame with rows from x and y
datar.apis.dplyr.
summarise
(
_data
, *args
, _groups=None
, **kwargs
)
Summarise a data frame.
See original API https://dplyr.tidyverse.org/reference/summarise.html
_data
— A data frame*args
— and_groups
(str, optional) — Grouping structure of the result.- - "drop_last": dropping the last level of grouping.
- - "drop": All levels of grouping are dropped.
- - "keep": Same grouping structure as _data.
- - "rowwise": Each row is its own group.
**kwargs
— Name-value pairs, where value is the summarizeddata for each group
A data frame with the summarised columns
datar.apis.dplyr.
where
(
_data
, fn
)
Selects the variables for which a function returns True.
See original API https://dplyr.tidyverse.org/reference/filter.html
_data
— A data framefn
(Callable) — A function that returns True or False.Currently it has to be `register_func/func_factory registered function purrr-like formula not supported yet.
The matched columns
datar.apis.dplyr.
everything
(
_data
)
Select all variables.
See original API https://dplyr.tidyverse.org/reference/select.html
_data
— A data frame
All columns
datar.apis.dplyr.
last_col
(
_data
, offset=0
, vars=None
)
Select the last column.
See original API https://dplyr.tidyverse.org/reference/select.html
_data
— A data frameoffset
(int, optional) — The offset of the last columnvars
(optional) — A list of columns to select
The last column
datar.apis.dplyr.
starts_with
(
_data
, match
, ignore_case=True
, vars=None
)
Select columns that start with a string.
See original API https://dplyr.tidyverse.org/reference/select.html
_data
— A data framematch
— The string to matchignore_case
(bool, optional) — Ignore case when matchingvars
(optional) — A list of columns to select
The matched columns
datar.apis.dplyr.
ends_with
(
_data
, match
, ignore_case=True
, vars=None
)
Select columns that end with a string.
See original API https://dplyr.tidyverse.org/reference/select.html
_data
— A data framematch
— The string to matchignore_case
(bool, optional) — Ignore case when matchingvars
(optional) — A list of columns to select
The matched columns
datar.apis.dplyr.
contains
(
_data
, match
, ignore_case=True
, vars=None
)
Select columns that contain a string.
See original API https://dplyr.tidyverse.org/reference/select.html
_data
— A data framematch
— The string to matchignore_case
(bool, optional) — Ignore case when matchingvars
(optional) — A list of columns to select
The matched columns
datar.apis.dplyr.
matches
(
_data
, match
, ignore_case=True
, vars=None
)
Select columns that match a regular expression.
See original API https://dplyr.tidyverse.org/reference/select.html
_data
— A data framematch
— The regular expression to matchignore_case
(bool, optional) — Ignore case when matchingvars
(optional) — A list of columns to select
The matched columns
datar.apis.dplyr.
num_range
(
prefix
, range_
, width=None
)
Matches a numerical range like x01, x02, x03.
prefix
(str) — A prefix that starts the numeric range.range_
— A sequence of integers, likerange(3)
(produces0,1,2
).width
(int, optional) — Optionally, the "width" of the numeric range.For example, a range of 2 gives "01", a range of three "001", etc._data
— The data piped in
A list of ranges with prefix.
datar.apis.dplyr.
all_of
(
_data
, x
)
For strict selection.
If any of the variables in the character vector is missing, an error is thrown.
_data
— The data piped inx
— A set of variables to match the columns
The matched column names
ColumnNotExistingError
— When any of the elements inx
does not existin_data
columns
datar.apis.dplyr.
any_of
(
_data
, x
, vars=None
)
For strict selection.
If any of the variables in the character vector is missing, an error is thrown.
_data
— The data piped inx
— A set of variables to match the columnsvars
(optional) — A list of columns to select
The matched column names
ColumnNotExistingError
— When any of the elements inx
does not existin_data
columns