group_by
In [1]:
Copied!
# https://dplyr.tidyverse.org/reference/group_by.html
%run nb_helpers.py
from datar.data import mtcars
from datar.all import *
nb_header(group_by, ungroup)
# https://dplyr.tidyverse.org/reference/group_by.html
%run nb_helpers.py
from datar.data import mtcars
from datar.all import *
nb_header(group_by, ungroup)
Try this notebook on binder.
★ group_by¶
Create a grouped frame¶
The original API:
https://dplyr.tidyverse.org/reference/group_by.html
Args:¶
_data
: A data frame
*args
: A variable or function of variables to group by.
_add
: If True
, add grouping variables to an existing group.
_drop
: If True
, drop grouping variables from the output.
Returns:¶
A grouped frame
★ ungroup¶
Remove grouping variables¶
The original API:
https://dplyr.tidyverse.org/reference/ungroup.html
Args:¶
_data
: A grouped frame
*cols
: Columns to remove grouping variables from.
Returns:¶
A data frame
In [2]:
Copied!
by_cyl = mtcars >> group_by(f.cyl)
by_cyl
by_cyl = mtcars >> group_by(f.cyl)
by_cyl
Out[2]:
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
---|---|---|---|---|---|---|---|---|---|---|---|
<float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | |
0 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
1 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
2 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
3 | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
4 | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
5 | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
6 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 |
7 | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
8 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 |
9 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 |
10 | 17.8 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 |
11 | 16.4 | 8 | 275.8 | 180 | 3.07 | 4.070 | 17.40 | 0 | 0 | 3 | 3 |
12 | 17.3 | 8 | 275.8 | 180 | 3.07 | 3.730 | 17.60 | 0 | 0 | 3 | 3 |
13 | 15.2 | 8 | 275.8 | 180 | 3.07 | 3.780 | 18.00 | 0 | 0 | 3 | 3 |
14 | 10.4 | 8 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 |
15 | 10.4 | 8 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 |
16 | 14.7 | 8 | 440.0 | 230 | 3.23 | 5.345 | 17.42 | 0 | 0 | 3 | 4 |
17 | 32.4 | 4 | 78.7 | 66 | 4.08 | 2.200 | 19.47 | 1 | 1 | 4 | 1 |
18 | 30.4 | 4 | 75.7 | 52 | 4.93 | 1.615 | 18.52 | 1 | 1 | 4 | 2 |
19 | 33.9 | 4 | 71.1 | 65 | 4.22 | 1.835 | 19.90 | 1 | 1 | 4 | 1 |
20 | 21.5 | 4 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 |
21 | 15.5 | 8 | 318.0 | 150 | 2.76 | 3.520 | 16.87 | 0 | 0 | 3 | 2 |
22 | 15.2 | 8 | 304.0 | 150 | 3.15 | 3.435 | 17.30 | 0 | 0 | 3 | 2 |
23 | 13.3 | 8 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 |
24 | 19.2 | 8 | 400.0 | 175 | 3.08 | 3.845 | 17.05 | 0 | 0 | 3 | 2 |
25 | 27.3 | 4 | 79.0 | 66 | 4.08 | 1.935 | 18.90 | 1 | 1 | 4 | 1 |
26 | 26.0 | 4 | 120.3 | 91 | 4.43 | 2.140 | 16.70 | 0 | 1 | 5 | 2 |
27 | 30.4 | 4 | 95.1 | 113 | 3.77 | 1.513 | 16.90 | 1 | 1 | 5 | 2 |
28 | 15.8 | 8 | 351.0 | 264 | 4.22 | 3.170 | 14.50 | 0 | 1 | 5 | 4 |
29 | 19.7 | 6 | 145.0 | 175 | 3.62 | 2.770 | 15.50 | 0 | 1 | 5 | 6 |
30 | 15.0 | 8 | 301.0 | 335 | 3.54 | 3.570 | 14.60 | 0 | 1 | 5 | 8 |
31 | 21.4 | 4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 |
TibbleGrouped: cyl (n=3)
In [3]:
Copied!
by_cyl >> group_vars()
by_cyl >> group_vars()
Out[3]:
['cyl']
In [4]:
Copied!
by_cyl >> summarise(
disp = mean(f.disp),
hp = mean(f.hp)
)
by_cyl >> summarise(
disp = mean(f.disp),
hp = mean(f.hp)
)
Out[4]:
cyl | disp | hp | |
---|---|---|---|
<int64> | <float64> | <float64> | |
0 | 6 | 183.314286 | 122.285714 |
1 | 4 | 105.136364 | 82.636364 |
2 | 8 | 353.100000 | 209.214286 |
In [5]:
Copied!
by_cyl >> summarise(
disp = f.disp.mean(),
hp = f.hp.mean()
)
by_cyl >> summarise(
disp = f.disp.mean(),
hp = f.hp.mean()
)
Out[5]:
cyl | disp | hp | |
---|---|---|---|
<int64> | <float64> | <float64> | |
0 | 6 | 183.314286 | 122.285714 |
1 | 4 | 105.136364 | 82.636364 |
2 | 8 | 353.100000 | 209.214286 |
In [6]:
Copied!
by_cyl >> filter(f.disp == max(f.disp))
by_cyl >> filter(f.disp == max(f.disp))
Out[6]:
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
---|---|---|---|---|---|---|---|---|---|---|---|
<float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | |
0 | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
1 | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
2 | 10.4 | 8 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 |
TibbleGrouped: cyl (n=3)
In [7]:
Copied!
by_vs_am = mtcars >> group_by(f.vs, f.am)
by_vs = by_vs_am >> summarise(n=n())
by_vs
by_vs_am = mtcars >> group_by(f.vs, f.am)
by_vs = by_vs_am >> summarise(n=n())
by_vs
[2022-12-02 14:03:28][datar][ INFO] `summarise()` has grouped output by ['vs'] (override with `_groups` argument)
Out[7]:
vs | am | n | |
---|---|---|---|
<int64> | <int64> | <int64> | |
0 | 0 | 1 | 6 |
1 | 1 | 1 | 7 |
2 | 1 | 0 | 7 |
3 | 0 | 0 | 12 |
TibbleGrouped: vs (n=2)
In [8]:
Copied!
by_vs >> summarise(n=sum(f.n))
by_vs >> summarise(n=sum(f.n))
Out[8]:
vs | n | |
---|---|---|
<int64> | <int64> | |
0 | 0 | 18 |
1 | 1 | 14 |
In [9]:
Copied!
by_vs >> \
ungroup() >> \
summarise(n = sum(f.n))
by_vs >> \
ungroup() >> \
summarise(n = sum(f.n))
Out[9]:
n | |
---|---|
<int64> | |
0 | 32 |
In [10]:
Copied!
mtcars_vsam = mtcars >> group_by(vsam=f.vs + f.am)
mtcars_vsam
mtcars_vsam = mtcars >> group_by(vsam=f.vs + f.am)
mtcars_vsam
Out[10]:
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | vsam | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
<float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | <int64> | |
0 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 | 1 |
1 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 | 1 |
2 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 | 2 |
3 | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 | 1 |
4 | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 | 0 |
5 | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 | 1 |
6 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 | 0 |
7 | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 | 1 |
8 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 | 1 |
9 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 | 1 |
10 | 17.8 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 | 1 |
11 | 16.4 | 8 | 275.8 | 180 | 3.07 | 4.070 | 17.40 | 0 | 0 | 3 | 3 | 0 |
12 | 17.3 | 8 | 275.8 | 180 | 3.07 | 3.730 | 17.60 | 0 | 0 | 3 | 3 | 0 |
13 | 15.2 | 8 | 275.8 | 180 | 3.07 | 3.780 | 18.00 | 0 | 0 | 3 | 3 | 0 |
14 | 10.4 | 8 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 | 0 |
15 | 10.4 | 8 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 | 0 |
16 | 14.7 | 8 | 440.0 | 230 | 3.23 | 5.345 | 17.42 | 0 | 0 | 3 | 4 | 0 |
17 | 32.4 | 4 | 78.7 | 66 | 4.08 | 2.200 | 19.47 | 1 | 1 | 4 | 1 | 2 |
18 | 30.4 | 4 | 75.7 | 52 | 4.93 | 1.615 | 18.52 | 1 | 1 | 4 | 2 | 2 |
19 | 33.9 | 4 | 71.1 | 65 | 4.22 | 1.835 | 19.90 | 1 | 1 | 4 | 1 | 2 |
20 | 21.5 | 4 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 | 1 |
21 | 15.5 | 8 | 318.0 | 150 | 2.76 | 3.520 | 16.87 | 0 | 0 | 3 | 2 | 0 |
22 | 15.2 | 8 | 304.0 | 150 | 3.15 | 3.435 | 17.30 | 0 | 0 | 3 | 2 | 0 |
23 | 13.3 | 8 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 | 0 |
24 | 19.2 | 8 | 400.0 | 175 | 3.08 | 3.845 | 17.05 | 0 | 0 | 3 | 2 | 0 |
25 | 27.3 | 4 | 79.0 | 66 | 4.08 | 1.935 | 18.90 | 1 | 1 | 4 | 1 | 2 |
26 | 26.0 | 4 | 120.3 | 91 | 4.43 | 2.140 | 16.70 | 0 | 1 | 5 | 2 | 1 |
27 | 30.4 | 4 | 95.1 | 113 | 3.77 | 1.513 | 16.90 | 1 | 1 | 5 | 2 | 2 |
28 | 15.8 | 8 | 351.0 | 264 | 4.22 | 3.170 | 14.50 | 0 | 1 | 5 | 4 | 1 |
29 | 19.7 | 6 | 145.0 | 175 | 3.62 | 2.770 | 15.50 | 0 | 1 | 5 | 6 | 1 |
30 | 15.0 | 8 | 301.0 | 335 | 3.54 | 3.570 | 14.60 | 0 | 1 | 5 | 8 | 1 |
31 | 21.4 | 4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 | 2 |
TibbleGrouped: vsam (n=3)
In [11]:
Copied!
by_cyl >> \
group_by(f.vs, f.am) >> \
group_vars()
by_cyl >> \
group_by(f.vs, f.am) >> \
group_vars()
Out[11]:
['vs', 'am']
In [12]:
Copied!
by_cyl >> \
group_by(f.vs, f.am, _add=True) >> \
group_vars()
by_cyl >> \
group_by(f.vs, f.am, _add=True) >> \
group_vars()
Out[12]:
['cyl', 'vs', 'am']
In [ ]:
Copied!