summarise
In [1]:
Copied!
# https://dplyr.tidyverse.org/reference/summarise.html
%run nb_helpers.py
from datar.data import mtcars, starwars
from datar.all import *
nb_header(summarise)
# https://dplyr.tidyverse.org/reference/summarise.html
%run nb_helpers.py
from datar.data import mtcars, starwars
from datar.all import *
nb_header(summarise)
Try this notebook on binder.
★ summarise¶
Summarise a data frame.¶
See original API
https://dplyr.tidyverse.org/reference/summarise.html
Args:¶
_data
: A data frame
_groups
: Grouping structure of the result.
- "drop_last": dropping the last level of grouping.
- "drop": All levels of grouping are dropped.
- "keep": Same grouping structure as _data.
- "rowwise": Each row is its own group.
*args
: and
**kwargs
: Name-value pairs, where value is the summarized
data for each group
Returns:¶
A data frame with the summarised columns
In [8]:
Copied!
mtcars >> summarise(mean=mean(f.disp), n=n())
mtcars >> summarise(mean=mean(f.disp), n=n())
Out[8]:
mean | n | |
---|---|---|
<float64> | <int64> | |
0 | 230.721875 | 32 |
In [9]:
Copied!
mtcars >> \
group_by(f.cyl) >> \
summarise(mean=mean(f.disp), n=n())
mtcars >> \
group_by(f.cyl) >> \
summarise(mean=mean(f.disp), n=n())
Out[9]:
cyl | mean | n | |
---|---|---|---|
<int64> | <float64> | <int64> | |
0 | 6 | 183.314286 | 7 |
1 | 4 | 105.136364 | 11 |
2 | 8 | 353.100000 | 14 |
In [2]:
Copied!
mtcars >> \
group_by(f.cyl) >> \
summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75))
mtcars >> \
group_by(f.cyl) >> \
summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75))
[2022-12-02 14:46:41][datar][ INFO] `summarise()` has grouped output by ['cyl'] (override with `_groups` argument)
Out[2]:
cyl | qs | prob | |
---|---|---|---|
<int64> | <object> | <float64> | |
0 | 6 | [160.0, 196.3] | 0.25 |
1 | 6 | [160.0, 196.3] | 0.75 |
2 | 4 | [78.85, 120.65] | 0.25 |
3 | 4 | [78.85, 120.65] | 0.75 |
4 | 8 | [301.75, 390.0] | 0.25 |
5 | 8 | [301.75, 390.0] | 0.75 |
TibbleGrouped: cyl (n=3)
In [3]:
Copied!
with options_context(dplyr_summarise_inform=False):
mtcars >> \
group_by(f.cyl) >> \
summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75))
with options_context(dplyr_summarise_inform=False):
mtcars >> \
group_by(f.cyl) >> \
summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75))
Out[3]:
cyl | qs | prob | |
---|---|---|---|
<int64> | <object> | <float64> | |
0 | 6 | [160.0, 196.3] | 0.25 |
1 | 6 | [160.0, 196.3] | 0.75 |
2 | 4 | [78.85, 120.65] | 0.25 |
3 | 4 | [78.85, 120.65] | 0.75 |
4 | 8 | [301.75, 390.0] | 0.25 |
5 | 8 | [301.75, 390.0] | 0.75 |
TibbleGrouped: cyl (n=3)
In [4]:
Copied!
mtcars >> \
group_by(f.cyl, f.vs) >> \
summarise(cyl_n = n()) >> \
group_vars()
mtcars >> \
group_by(f.cyl, f.vs) >> \
summarise(cyl_n = n()) >> \
group_vars()
[2022-12-02 14:46:52][datar][ INFO] `summarise()` has grouped output by ['cyl'] (override with `_groups` argument)
Out[4]:
['cyl']
In [5]:
Copied!
# Unlike dplyr's summarise, f.disp can be reused.
mtcars >> \
group_by(f.cyl) >> \
summarise(disp=mean(f.disp), sd=sd(f.disp))
# Unlike dplyr's summarise, f.disp can be reused.
mtcars >> \
group_by(f.cyl) >> \
summarise(disp=mean(f.disp), sd=sd(f.disp))
Out[5]:
cyl | disp | sd | |
---|---|---|---|
<int64> | <float64> | <float64> | |
0 | 6 | 183.314286 | NaN |
1 | 4 | 105.136364 | NaN |
2 | 8 | 353.100000 | NaN |
In [6]:
Copied!
# Create temporary variable
mtcars >> \
group_by(f.cyl) >> \
summarise(_disp_m2=mean(f.disp), disp_m2=f._disp_m2 * 2)
# Create temporary variable
mtcars >> \
group_by(f.cyl) >> \
summarise(_disp_m2=mean(f.disp), disp_m2=f._disp_m2 * 2)
Out[6]:
cyl | disp_m2 | |
---|---|---|
<int64> | <float64> | |
0 | 6 | 366.628571 |
1 | 4 | 210.272727 |
2 | 8 | 706.200000 |
In [8]:
Copied!
var = "mass"
starwars >> summarise(avg = mean(f[var]))
var = "mass"
starwars >> summarise(avg = mean(f[var]))
Out[8]:
avg | |
---|---|
<float64> | |
0 | 97.311864 |