extract
In [1]:
Copied!
# https://tidyr.tidyverse.org/reference/extract.html
%run nb_helpers.py
from datar.all import *
nb_header(extract)
# https://tidyr.tidyverse.org/reference/extract.html
%run nb_helpers.py
from datar.all import *
nb_header(extract)
Try this notebook on binder.
★ extract¶
Given a regular expression with capturing groups, extract() turns each¶
group into a new column. If the groups don't match, or the input is NA,
the output will be NA.
See https://tidyr.tidyverse.org/reference/extract.html
Args:¶
data
: The dataframe
col
: Column name or position.
into
: Names of new variables to create as character vector.
Use None to omit the variable in the output.
regex
: a regular expression used to extract the desired values.
There should be one group (defined by ()) for each element of into.
remove
: If TRUE, remove input column from output data frame.
convert
: The universal type for the extracted columns or a dict for
individual ones
Returns:¶
Dataframe with extracted columns.
In [2]:
Copied!
df = tibble(x = c(NA, "a-b", "a-d", "b-c", "d-e"))
df >> extract(f.x, "A")
df = tibble(x = c(NA, "a-b", "a-d", "b-c", "d-e"))
df >> extract(f.x, "A")
Out[2]:
A | |
---|---|
<object> | |
0 | NaN |
1 | a |
2 | a |
3 | b |
4 | d |
In [3]:
Copied!
df >> extract(f.x, c("A", "B"), r"(\w+)-(\w+)")
df >> extract(f.x, c("A", "B"), r"(\w+)-(\w+)")
Out[3]:
A | B | |
---|---|---|
<object> | <object> | |
0 | NaN | NaN |
1 | a | b |
2 | a | d |
3 | b | c |
4 | d | e |
In [4]:
Copied!
df >> extract(f.x, c("A", "B"), r"([a-d]+)-([a-d]+)")
df >> extract(f.x, c("A", "B"), r"([a-d]+)-([a-d]+)")
Out[4]:
A | B | |
---|---|---|
<object> | <object> | |
0 | NaN | NaN |
1 | a | b |
2 | a | d |
3 | b | c |
4 | NaN | NaN |
In [5]:
Copied!
# combine multiple columns
df = tibble(x='abcd')
df >> extract(f.x, ['a', 'b', 'a', 'b'], r'(.)(.)(.)(.)')
# combine multiple columns
df = tibble(x='abcd')
df >> extract(f.x, ['a', 'b', 'a', 'b'], r'(.)(.)(.)(.)')
Out[5]:
a | b | |
---|---|---|
<object> | <object> | |
0 | ac | bd |