reframe
In [1]:
Copied!
# https://dplyr.tidyverse.org/reference/summarise.html
%run nb_helpers.py
from datar.data import starwars
from datar.all import *
nb_header(reframe)
# https://dplyr.tidyverse.org/reference/summarise.html
%run nb_helpers.py
from datar.data import starwars
from datar.all import *
nb_header(reframe)
In [2]:
Copied!
table = c("a", "b", "d", "f")
df = tibble(
g = c(1, 1, 1, 2, 2, 2, 2),
x = c("e", "a", "b", "c", "f", "d", "a")
)
# `reframe()` allows you to apply functions that return
# an arbitrary number of rows
df >> reframe(x = intersect(f.x, table))
table = c("a", "b", "d", "f")
df = tibble(
g = c(1, 1, 1, 2, 2, 2, 2),
x = c("e", "a", "b", "c", "f", "d", "a")
)
# `reframe()` allows you to apply functions that return
# an arbitrary number of rows
df >> reframe(x = intersect(f.x, table))
Out[2]:
x | |
---|---|
<object> | |
0 | a |
1 | b |
2 | f |
3 | d |
In [3]:
Copied!
df >> group_by(f.g)
df >> group_by(f.g)
Out[3]:
g | x | |
---|---|---|
<int64> | <object> | |
0 | 1 | e |
1 | 1 | a |
2 | 1 | b |
3 | 2 | c |
4 | 2 | f |
5 | 2 | d |
6 | 2 | a |
TibbleGrouped: g (n=2)
In [4]:
Copied!
# The output is always ungrouped, even when using `group_by()`
(
df
>> group_by(f.g)
>> reframe(x = intersect(f.x, table))
)
# The output is always ungrouped, even when using `group_by()`
(
df
>> group_by(f.g)
>> reframe(x = intersect(f.x, table))
)
Out[4]:
g | x | |
---|---|---|
<Int64> | <string> | |
0 | 1 | a |
1 | 1 | b |
2 | 2 | f |
3 | 2 | d |
4 | 2 | a |
In [12]:
Copied!
from pandas.core.groupby import SeriesGroupBy
from pipda import register_func
@register_func
def quantile_df(x, probs=[0.25, 0.5, 0.75]):
return tibble(
val = quantile(x, probs, na_rm=True),
quant = [probs] if isinstance(x, SeriesGroupBy) else probs
)
x = [10, 15, 18, 12]
quantile_df(x)
from pandas.core.groupby import SeriesGroupBy
from pipda import register_func
@register_func
def quantile_df(x, probs=[0.25, 0.5, 0.75]):
return tibble(
val = quantile(x, probs, na_rm=True),
quant = [probs] if isinstance(x, SeriesGroupBy) else probs
)
x = [10, 15, 18, 12]
quantile_df(x)
Out[12]:
val | quant | |
---|---|---|
<float64> | <float64> | |
0 | 11.50 | 0.25 |
1 | 13.50 | 0.50 |
2 | 15.75 | 0.75 |
In [13]:
Copied!
starwars >> reframe(quantile_df(f.height))
starwars >> reframe(quantile_df(f.height))
Out[13]:
val | quant | |
---|---|---|
<float64> | <float64> | |
0 | 167.0 | 0.25 |
1 | 180.0 | 0.50 |
2 | 191.0 | 0.75 |
In [14]:
Copied!
(
starwars
>> group_by(f.homeworld)
>> reframe(quantile_df(f.height))
)
(
starwars
>> group_by(f.homeworld)
>> reframe(quantile_df(f.height))
)
Out[14]:
homeworld | val | quant | |
---|---|---|---|
<string> | <Float64> | <Float64> | |
0 | Tatooine | 165.5 | 0.25 |
1 | Tatooine | 175.0 | 0.5 |
2 | Tatooine | 183.0 | 0.75 |
3 | Naboo | 165.0 | 0.25 |
... | ... | ... | ... |
4 | Naboo | 183.0 | 0.5 |
142 | Umbara | 178.0 | 0.5 |
143 | Umbara | 178.0 | 0.75 |
144 | Utapau | 206.0 | 0.25 |
145 | Utapau | 206.0 | 0.5 |
146 | Utapau | 206.0 | 0.75 |
147 rows × 3 columns
In [ ]:
Copied!
(
starwars
>> group_by(f.homeworld)
>> reframe(
across(c(f.height, f.mass), quantile_df)
)
)
(
starwars
>> group_by(f.homeworld)
>> reframe(
across(c(f.height, f.mass), quantile_df)
)
)
/home/pwwang/.cache/pypoetry/virtualenvs/datar-TA_GutPO-py3.12/lib/python3.12/site-packages/numpy/lib/_nanfunctions_impl.py:1598: RuntimeWarning: All-NaN slice encountered return _nanquantile_unchecked(
Out[ ]:
homeworld | height$val | height$quant | mass$val | mass$quant | |
---|---|---|---|---|---|
<string> | <Float64> | <Float64> | <Float64> | <Float64> | |
0 | Tatooine | 165.5 | 0.25 | 75.0 | 0.25 |
1 | Tatooine | 175.0 | 0.5 | 80.5 | 0.5 |
2 | Tatooine | 183.0 | 0.75 | 93.0 | 0.75 |
3 | Naboo | 165.0 | 0.25 | 50.25 | 0.25 |
... | ... | ... | ... | ... | ... |
4 | Naboo | 183.0 | 0.5 | 70.5 | 0.5 |
142 | Umbara | 178.0 | 0.5 | 48.0 | 0.5 |
143 | Umbara | 178.0 | 0.75 | 48.0 | 0.75 |
144 | Utapau | 206.0 | 0.25 | 80.0 | 0.25 |
145 | Utapau | 206.0 | 0.5 | 80.0 | 0.5 |
146 | Utapau | 206.0 | 0.75 | 80.0 | 0.75 |
147 rows × 5 columns