forcats_lvl_addrm
In [1]:
Copied!
%run nb_helpers.py
from datar.all import *
nb_header(
fct_expand,
fct_explicit_na,
fct_drop,
fct_unify,
book="forcat_lvl_addrm",
)
%run nb_helpers.py
from datar.all import *
nb_header(
fct_expand,
fct_explicit_na,
fct_drop,
fct_unify,
book="forcat_lvl_addrm",
)
fct_expand¶
In [2]:
Copied!
fct = factor(sample(letters[:3], 20, replace = TRUE))
fct
fct = factor(sample(letters[:3], 20, replace = TRUE))
fct
Out[2]:
['c', 'c', 'b', 'c', 'a', ..., 'b', 'c', 'b', 'b', 'c'] Length: 20 Categories (3, object): ['a', 'b', 'c']
In [3]:
Copied!
fct_expand(fct, "d", "e", "f")
fct_expand(fct, "d", "e", "f")
Out[3]:
['c', 'c', 'b', 'c', 'a', ..., 'b', 'c', 'b', 'b', 'c'] Length: 20 Categories (6, object): ['a', 'b', 'c', 'd', 'e', 'f']
In [4]:
Copied!
fct_expand(fct, letters[:6])
fct_expand(fct, letters[:6])
Out[4]:
['c', 'c', 'b', 'c', 'a', ..., 'b', 'c', 'b', 'b', 'c'] Length: 20 Categories (6, object): ['a', 'b', 'c', 'd', 'e', 'f']
fct_explicit_na¶
In [5]:
Copied!
f1 = factor(c("a", "a", NA, NA, "a", "b", NA, "c", "a", "c", "b"))
fct_count(f1)
f1 = factor(c("a", "a", NA, NA, "a", "b", NA, "c", "a", "c", "b"))
fct_count(f1)
Out[5]:
f | n | |
---|---|---|
<category> | <float64> | |
0 | a | 4.0 |
1 | b | 2.0 |
2 | c | 2.0 |
3 | NaN | 3.0 |
In [6]:
Copied!
f2 = fct_explicit_na(f1)
fct_count(f2)
f2 = fct_explicit_na(f1)
fct_count(f2)
Out[6]:
f | n | |
---|---|---|
<category> | <int64> | |
0 | a | 4 |
1 | b | 2 |
2 | c | 2 |
3 | (Missing) | 3 |
fct_drop¶
In [7]:
Copied!
fct = factor(c("a", "b"), levels = c("a", "b", "c"))
fct
fct = factor(c("a", "b"), levels = c("a", "b", "c"))
fct
Out[7]:
['a', 'b'] Categories (3, object): ['a', 'b', 'c']
In [8]:
Copied!
fct_drop(fct)
fct_drop(fct)
Out[8]:
['a', 'b'] Categories (2, object): ['a', 'b']
In [9]:
Copied!
fct_drop(fct, only = "a")
fct_drop(fct, only = "a")
Out[9]:
['a', 'b'] Categories (3, object): ['a', 'b', 'c']
In [10]:
Copied!
fct_drop(fct, only = "c")
fct_drop(fct, only = "c")
Out[10]:
['a', 'b'] Categories (2, object): ['a', 'b']
fct_unify¶
In [11]:
Copied!
fs = [factor("a"), factor("b"), factor(c("a", "b"))]
fct_unify(fs)
fs = [factor("a"), factor("b"), factor(c("a", "b"))]
fct_unify(fs)
Out[11]:
[['a'] Categories (2, object): ['a', 'b'], ['b'] Categories (2, object): ['a', 'b'], ['a', 'b'] Categories (2, object): ['a', 'b']]