other
In [2]:
Copied!
# datar specific
import numpy
from datar import f
from datar.data import iris
from datar.base import as_date, factor, c
from datar.other import *
from datar.dplyr import mutate, group_by
from datar.tibble import tibble
%run nb_helpers.py
nb_header(
# get,
# flatten,
itemgetter,
attrgetter,
pd_str,
pd_cat,
pd_dt,
book='datar',
)
# datar specific
import numpy
from datar import f
from datar.data import iris
from datar.base import as_date, factor, c
from datar.other import *
from datar.dplyr import mutate, group_by
from datar.tibble import tibble
%run nb_helpers.py
nb_header(
# get,
# flatten,
itemgetter,
attrgetter,
pd_str,
pd_cat,
pd_dt,
book='datar',
)
Try this notebook on binder.
★ itemgetter¶
★ attrgetter¶
Attrgetter as a function for verb¶
This is helpful when we want to access to an accessor
(ie. CategoricalAccessor) from a SeriesGroupBy object
★ pd_str¶
Pandas' str accessor for a Series (x.str)¶
This is helpful when x is a SeriesGroupBy object
★ pd_cat¶
Pandas' cat accessor for a Series (x.cat)¶
This is helpful when x is a SeriesGroupBy object
★ pd_dt¶
Pandas' dt accessor for a Series (x.dt)¶
This is helpful when x is a SeriesGroupBy object
In [3]:
Copied!
# iris >> get(c[:5])
# iris >> get(c[:5])
In [4]:
Copied!
# iris >> get(cols=f.Species)
# iris >> get(cols=f.Species)
In [5]:
Copied!
# select single element
# iris >> get(1, f.Species)
# select single element
# iris >> get(1, f.Species)
In [6]:
Copied!
# get it as a single-element dataframe
# iris >> get([1], f.Species)
# get it as a single-element dataframe
# iris >> get([1], f.Species)
In [7]:
Copied!
# or
# iris >> get(1, [f.Species])
# or
# iris >> get(1, [f.Species])
In [8]:
Copied!
df = tibble(x=c[1:3], y=c[3:5])
# df >> flatten()
df = tibble(x=c[1:3], y=c[3:5])
# df >> flatten()
In [12]:
Copied!
arr = numpy.array(['a', 'b', 'c', 'd', 'e'])
# df >> mutate(a=arr[f.x], b=arr[f.y]) # Error
df >> mutate(a=itemgetter(arr, f.x.values), b=itemgetter(arr, f.y.values))
arr = numpy.array(['a', 'b', 'c', 'd', 'e'])
# df >> mutate(a=arr[f.x], b=arr[f.y]) # Error
df >> mutate(a=itemgetter(arr, f.x.values), b=itemgetter(arr, f.y.values))
Out[12]:
x | y | a | b | |
---|---|---|---|---|
<int64> | <int64> | <object> | <object> | |
0 | 1 | 3 | b | d |
1 | 2 | 4 | c | e |
In [13]:
Copied!
df = tibble(x=["abc", "def"])
df >> mutate(a=attrgetter(f.x, 'str').upper())
df = tibble(x=["abc", "def"])
df >> mutate(a=attrgetter(f.x, 'str').upper())
Out[13]:
x | a | |
---|---|---|
<object> | <object> | |
0 | abc | ABC |
1 | def | DEF |
In [14]:
Copied!
# or
# df >> mutate(a=pd_str(f.x).upper())
# or
df >> mutate(a=f.x.str.upper())
# or
# df >> mutate(a=pd_str(f.x).upper())
# or
df >> mutate(a=f.x.str.upper())
Out[14]:
x | a | |
---|---|---|
<object> | <object> | |
0 | abc | ABC |
1 | def | DEF |
In [15]:
Copied!
# but when df is grouped
gf = df >> group_by(g=[1, 2])
# pd_str(gf.x)[:2].obj
gf >> mutate(a=pd_str(gf.x)[:2])
# but when df is grouped
gf = df >> group_by(g=[1, 2])
# pd_str(gf.x)[:2].obj
gf >> mutate(a=pd_str(gf.x)[:2])
Out[15]:
x | g | a | |
---|---|---|---|
<object> | <int64> | <object> | |
0 | abc | 1 | ab |
1 | def | 2 | de |
TibbleGrouped: g (n=2)
In [16]:
Copied!
gf = (
tibble(x=["2022-01-01", "2022-12-02"])
>> mutate(x=as_date(f.x, format="%Y-%m-%d"))
>> group_by(g=[1, 2])
)
gf >> mutate(month=pd_dt(gf.x).month)
gf = (
tibble(x=["2022-01-01", "2022-12-02"])
>> mutate(x=as_date(f.x, format="%Y-%m-%d"))
>> group_by(g=[1, 2])
)
gf >> mutate(month=pd_dt(gf.x).month)
Out[16]:
x | g | month | |
---|---|---|---|
<datetime64[ns]> | <int64> | <int64> | |
0 | 2022-01-01 | 1 | 1 |
1 | 2022-12-02 | 2 | 12 |
TibbleGrouped: g (n=2)
In [17]:
Copied!
gf = (
tibble(x=factor([1, 2], levels=[1, 2, 3]))
>> group_by(g=[1, 2])
)
gf >> mutate(codes=pd_cat(gf.x).codes)
gf = (
tibble(x=factor([1, 2], levels=[1, 2, 3]))
>> group_by(g=[1, 2])
)
gf >> mutate(codes=pd_cat(gf.x).codes)
Out[17]:
x | g | codes | |
---|---|---|---|
<category> | <int64> | <int8> | |
0 | 1 | 1 | 0 |
1 | 2 | 2 | 1 |
TibbleGrouped: g (n=2)