summarise

In [1]:

Copied!

# https://dplyr.tidyverse.org/reference/summarise.html
%run nb_helpers.py

from datar.data import mtcars, starwars
from datar.all import *

nb_header(summarise)
# https://dplyr.tidyverse.org/reference/summarise.html
%run nb_helpers.py

from datar.data import mtcars, starwars
from datar.all import *

nb_header(summarise)

Try this notebook on binder.

★ summarise
¶

Summarise a data frame.¶

See original API
https://dplyr.tidyverse.org/reference/summarise.html

Args:¶

_data: A data frame
_groups: Grouping structure of the result.
- "drop_last": dropping the last level of grouping.

- "drop": All levels of grouping are dropped.

- "keep": Same grouping structure as _data.

- "rowwise": Each row is its own group.

*args: and
**kwargs: Name-value pairs, where value is the summarized
data for each group

Returns:¶

A data frame with the summarised columns

In [8]:

Copied!

mtcars >> summarise(mean=mean(f.disp), n=n())
mtcars >> summarise(mean=mean(f.disp), n=n())

Out[8]:

	mean	n
	<float64>	<int64>
0	230.721875	32

In [9]:

Copied!

mtcars >> \
  group_by(f.cyl) >> \
  summarise(mean=mean(f.disp), n=n()) 
mtcars >> \
  group_by(f.cyl) >> \
  summarise(mean=mean(f.disp), n=n())

Out[9]:

	cyl	mean	n
	<int64>	<float64>	<int64>
0	6	183.314286	7
1	4	105.136364	11
2	8	353.100000	14

In [2]:

Copied!

mtcars >> \
   group_by(f.cyl) >> \
   summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75)) 
mtcars >> \
   group_by(f.cyl) >> \
   summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75))

[2022-12-02 14:46:41][datar][   INFO] `summarise()` has grouped output by ['cyl'] (override with `_groups` argument)

Out[2]:

	cyl	qs	prob
	<int64>	<object>	<float64>
0	6	[160.0, 196.3]	0.25
1	6	[160.0, 196.3]	0.75
2	4	[78.85, 120.65]	0.25
3	4	[78.85, 120.65]	0.75
4	8	[301.75, 390.0]	0.25
5	8	[301.75, 390.0]	0.75

TibbleGrouped: cyl (n=3)

In [3]:

Copied!





with options_context(dplyr_summarise_inform=False):
    mtcars >> \
        group_by(f.cyl) >> \
        summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75)) 
with options_context(dplyr_summarise_inform=False):
    mtcars >> \
        group_by(f.cyl) >> \
        summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75)) 

Out[3]:

	cyl	qs	prob
	<int64>	<object>	<float64>
0	6	[160.0, 196.3]	0.25
1	6	[160.0, 196.3]	0.75
2	4	[78.85, 120.65]	0.25
3	4	[78.85, 120.65]	0.75
4	8	[301.75, 390.0]	0.25
5	8	[301.75, 390.0]	0.75

TibbleGrouped: cyl (n=3)

In [4]:

Copied!





mtcars >> \
  group_by(f.cyl, f.vs) >> \
  summarise(cyl_n = n()) >> \
  group_vars()
mtcars >> \
  group_by(f.cyl, f.vs) >> \
  summarise(cyl_n = n()) >> \
  group_vars()

[2022-12-02 14:46:52][datar][   INFO] `summarise()` has grouped output by ['cyl'] (override with `_groups` argument)

Out[4]:

['cyl']

In [5]:

Copied!





# Unlike dplyr's summarise, f.disp can be reused.
mtcars >> \
  group_by(f.cyl) >> \
  summarise(disp=mean(f.disp), sd=sd(f.disp)) 
# Unlike dplyr's summarise, f.disp can be reused.
mtcars >> \
  group_by(f.cyl) >> \
  summarise(disp=mean(f.disp), sd=sd(f.disp)) 

Out[5]:

	cyl	disp	sd
	<int64>	<float64>	<float64>
0	6	183.314286	NaN
1	4	105.136364	NaN
2	8	353.100000	NaN

In [6]:

Copied!





# Create temporary variable
mtcars >> \
  group_by(f.cyl) >> \
  summarise(_disp_m2=mean(f.disp), disp_m2=f._disp_m2 * 2) 
# Create temporary variable
mtcars >> \
  group_by(f.cyl) >> \
  summarise(_disp_m2=mean(f.disp), disp_m2=f._disp_m2 * 2) 

Out[6]:

	cyl	disp_m2
	<int64>	<float64>
0	6	366.628571
1	4	210.272727
2	8	706.200000