group_by
In [1]:
Copied!
# https://dplyr.tidyverse.org/reference/group_by.html
%run nb_helpers.py
from datar.data import mtcars
from datar.all import *
nb_header(group_by, ungroup)
# https://dplyr.tidyverse.org/reference/group_by.html
%run nb_helpers.py
from datar.data import mtcars
from datar.all import *
nb_header(group_by, ungroup)
Try this notebook on binder.
★ group_by¶
Create a grouped frame¶
The original API:
https://dplyr.tidyverse.org/reference/group_by.html
Args:¶
_data: A data frame
*args: A variable or function of variables to group by.
_add: If True, add grouping variables to an existing group.
_drop: If True, drop grouping variables from the output.
Returns:¶
A grouped frame
★ ungroup¶
Remove grouping variables¶
The original API:
https://dplyr.tidyverse.org/reference/ungroup.html
Args:¶
_data: A grouped frame
*cols: Columns to remove grouping variables from.
Returns:¶
A data frame
In [2]:
Copied!
by_cyl = mtcars >> group_by(f.cyl)
by_cyl
by_cyl = mtcars >> group_by(f.cyl)
by_cyl
Out[2]:
| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| <float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | |
| 0 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
| 1 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
| 2 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
| 3 | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
| 4 | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
| 5 | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
| 6 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 |
| 7 | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
| 8 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 |
| 9 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 |
| 10 | 17.8 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 |
| 11 | 16.4 | 8 | 275.8 | 180 | 3.07 | 4.070 | 17.40 | 0 | 0 | 3 | 3 |
| 12 | 17.3 | 8 | 275.8 | 180 | 3.07 | 3.730 | 17.60 | 0 | 0 | 3 | 3 |
| 13 | 15.2 | 8 | 275.8 | 180 | 3.07 | 3.780 | 18.00 | 0 | 0 | 3 | 3 |
| 14 | 10.4 | 8 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 |
| 15 | 10.4 | 8 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 |
| 16 | 14.7 | 8 | 440.0 | 230 | 3.23 | 5.345 | 17.42 | 0 | 0 | 3 | 4 |
| 17 | 32.4 | 4 | 78.7 | 66 | 4.08 | 2.200 | 19.47 | 1 | 1 | 4 | 1 |
| 18 | 30.4 | 4 | 75.7 | 52 | 4.93 | 1.615 | 18.52 | 1 | 1 | 4 | 2 |
| 19 | 33.9 | 4 | 71.1 | 65 | 4.22 | 1.835 | 19.90 | 1 | 1 | 4 | 1 |
| 20 | 21.5 | 4 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 |
| 21 | 15.5 | 8 | 318.0 | 150 | 2.76 | 3.520 | 16.87 | 0 | 0 | 3 | 2 |
| 22 | 15.2 | 8 | 304.0 | 150 | 3.15 | 3.435 | 17.30 | 0 | 0 | 3 | 2 |
| 23 | 13.3 | 8 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 |
| 24 | 19.2 | 8 | 400.0 | 175 | 3.08 | 3.845 | 17.05 | 0 | 0 | 3 | 2 |
| 25 | 27.3 | 4 | 79.0 | 66 | 4.08 | 1.935 | 18.90 | 1 | 1 | 4 | 1 |
| 26 | 26.0 | 4 | 120.3 | 91 | 4.43 | 2.140 | 16.70 | 0 | 1 | 5 | 2 |
| 27 | 30.4 | 4 | 95.1 | 113 | 3.77 | 1.513 | 16.90 | 1 | 1 | 5 | 2 |
| 28 | 15.8 | 8 | 351.0 | 264 | 4.22 | 3.170 | 14.50 | 0 | 1 | 5 | 4 |
| 29 | 19.7 | 6 | 145.0 | 175 | 3.62 | 2.770 | 15.50 | 0 | 1 | 5 | 6 |
| 30 | 15.0 | 8 | 301.0 | 335 | 3.54 | 3.570 | 14.60 | 0 | 1 | 5 | 8 |
| 31 | 21.4 | 4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 |
TibbleGrouped: cyl (n=3)
In [3]:
Copied!
by_cyl >> group_vars()
by_cyl >> group_vars()
Out[3]:
['cyl']
In [4]:
Copied!
by_cyl >> summarise(
disp = mean(f.disp),
hp = mean(f.hp)
)
by_cyl >> summarise(
disp = mean(f.disp),
hp = mean(f.hp)
)
Out[4]:
| cyl | disp | hp | |
|---|---|---|---|
| <int64> | <float64> | <float64> | |
| 0 | 6 | 183.314286 | 122.285714 |
| 1 | 4 | 105.136364 | 82.636364 |
| 2 | 8 | 353.100000 | 209.214286 |
In [5]:
Copied!
by_cyl >> summarise(
disp = f.disp.mean(),
hp = f.hp.mean()
)
by_cyl >> summarise(
disp = f.disp.mean(),
hp = f.hp.mean()
)
Out[5]:
| cyl | disp | hp | |
|---|---|---|---|
| <int64> | <float64> | <float64> | |
| 0 | 6 | 183.314286 | 122.285714 |
| 1 | 4 | 105.136364 | 82.636364 |
| 2 | 8 | 353.100000 | 209.214286 |
In [6]:
Copied!
by_cyl >> filter(f.disp == max(f.disp))
by_cyl >> filter(f.disp == max(f.disp))
Out[6]:
| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| <float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | |
| 0 | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
| 1 | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
| 2 | 10.4 | 8 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 |
TibbleGrouped: cyl (n=3)
In [7]:
Copied!
by_vs_am = mtcars >> group_by(f.vs, f.am)
by_vs = by_vs_am >> summarise(n=n())
by_vs
by_vs_am = mtcars >> group_by(f.vs, f.am)
by_vs = by_vs_am >> summarise(n=n())
by_vs
[2022-12-02 14:03:28][datar][ INFO] `summarise()` has grouped output by ['vs'] (override with `_groups` argument)
Out[7]:
| vs | am | n | |
|---|---|---|---|
| <int64> | <int64> | <int64> | |
| 0 | 0 | 1 | 6 |
| 1 | 1 | 1 | 7 |
| 2 | 1 | 0 | 7 |
| 3 | 0 | 0 | 12 |
TibbleGrouped: vs (n=2)
In [8]:
Copied!
by_vs >> summarise(n=sum(f.n))
by_vs >> summarise(n=sum(f.n))
Out[8]:
| vs | n | |
|---|---|---|
| <int64> | <int64> | |
| 0 | 0 | 18 |
| 1 | 1 | 14 |
In [9]:
Copied!
by_vs >> \
ungroup() >> \
summarise(n = sum(f.n))
by_vs >> \
ungroup() >> \
summarise(n = sum(f.n))
Out[9]:
| n | |
|---|---|
| <int64> | |
| 0 | 32 |
In [10]:
Copied!
mtcars_vsam = mtcars >> group_by(vsam=f.vs + f.am)
mtcars_vsam
mtcars_vsam = mtcars >> group_by(vsam=f.vs + f.am)
mtcars_vsam
Out[10]:
| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | vsam | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | <int64> | |
| 0 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 | 1 |
| 1 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 | 1 |
| 2 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 | 2 |
| 3 | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 | 1 |
| 4 | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 | 0 |
| 5 | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 | 1 |
| 6 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 | 0 |
| 7 | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 | 1 |
| 8 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 | 1 |
| 9 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 | 1 |
| 10 | 17.8 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 | 1 |
| 11 | 16.4 | 8 | 275.8 | 180 | 3.07 | 4.070 | 17.40 | 0 | 0 | 3 | 3 | 0 |
| 12 | 17.3 | 8 | 275.8 | 180 | 3.07 | 3.730 | 17.60 | 0 | 0 | 3 | 3 | 0 |
| 13 | 15.2 | 8 | 275.8 | 180 | 3.07 | 3.780 | 18.00 | 0 | 0 | 3 | 3 | 0 |
| 14 | 10.4 | 8 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 | 0 |
| 15 | 10.4 | 8 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 | 0 |
| 16 | 14.7 | 8 | 440.0 | 230 | 3.23 | 5.345 | 17.42 | 0 | 0 | 3 | 4 | 0 |
| 17 | 32.4 | 4 | 78.7 | 66 | 4.08 | 2.200 | 19.47 | 1 | 1 | 4 | 1 | 2 |
| 18 | 30.4 | 4 | 75.7 | 52 | 4.93 | 1.615 | 18.52 | 1 | 1 | 4 | 2 | 2 |
| 19 | 33.9 | 4 | 71.1 | 65 | 4.22 | 1.835 | 19.90 | 1 | 1 | 4 | 1 | 2 |
| 20 | 21.5 | 4 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 | 1 |
| 21 | 15.5 | 8 | 318.0 | 150 | 2.76 | 3.520 | 16.87 | 0 | 0 | 3 | 2 | 0 |
| 22 | 15.2 | 8 | 304.0 | 150 | 3.15 | 3.435 | 17.30 | 0 | 0 | 3 | 2 | 0 |
| 23 | 13.3 | 8 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 | 0 |
| 24 | 19.2 | 8 | 400.0 | 175 | 3.08 | 3.845 | 17.05 | 0 | 0 | 3 | 2 | 0 |
| 25 | 27.3 | 4 | 79.0 | 66 | 4.08 | 1.935 | 18.90 | 1 | 1 | 4 | 1 | 2 |
| 26 | 26.0 | 4 | 120.3 | 91 | 4.43 | 2.140 | 16.70 | 0 | 1 | 5 | 2 | 1 |
| 27 | 30.4 | 4 | 95.1 | 113 | 3.77 | 1.513 | 16.90 | 1 | 1 | 5 | 2 | 2 |
| 28 | 15.8 | 8 | 351.0 | 264 | 4.22 | 3.170 | 14.50 | 0 | 1 | 5 | 4 | 1 |
| 29 | 19.7 | 6 | 145.0 | 175 | 3.62 | 2.770 | 15.50 | 0 | 1 | 5 | 6 | 1 |
| 30 | 15.0 | 8 | 301.0 | 335 | 3.54 | 3.570 | 14.60 | 0 | 1 | 5 | 8 | 1 |
| 31 | 21.4 | 4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 | 2 |
TibbleGrouped: vsam (n=3)
In [11]:
Copied!
by_cyl >> \
group_by(f.vs, f.am) >> \
group_vars()
by_cyl >> \
group_by(f.vs, f.am) >> \
group_vars()
Out[11]:
['vs', 'am']
In [12]:
Copied!
by_cyl >> \
group_by(f.vs, f.am, _add=True) >> \
group_vars()
by_cyl >> \
group_by(f.vs, f.am, _add=True) >> \
group_vars()
Out[12]:
['cyl', 'vs', 'am']
In [ ]:
Copied!