recode
In [1]:
Copied!
# https://dplyr.tidyverse.org/reference/recode.html
%run nb_helpers.py
from datar.all import *
nb_header(recode)
# https://dplyr.tidyverse.org/reference/recode.html
%run nb_helpers.py
from datar.all import *
nb_header(recode)
Try this notebook on binder.
★ recode¶
Recode a vector, replacing elements in it¶
Args:¶
x
: A vector to modify
*args
: and
**kwargs
: replacements
_default
: If supplied, all values not otherwise matched will be
given this value. If not supplied and if the replacements are
the same type as the original values in series, unmatched values
are not changed. If not supplied and if the replacements are
not compatible, unmatched values are replaced with np.nan.
_missing
: If supplied, any missing values in .x will be replaced
by this value.
Returns:¶
The vector with values replaced
In [2]:
Copied!
char_vec = sample(c("a", "b", "c"), 10, replace=True)
recode(char_vec, a="Apple")
char_vec = sample(c("a", "b", "c"), 10, replace=True)
recode(char_vec, a="Apple")
Out[2]:
0 Apple 1 c 2 c 3 Apple 4 b 5 Apple 6 b 7 b 8 b 9 c dtype: object
In [3]:
Copied!
recode(char_vec, a="Apple", b="Banana")
recode(char_vec, a="Apple", b="Banana")
Out[3]:
0 Apple 1 c 2 c 3 Apple 4 Banana 5 Apple 6 Banana 7 Banana 8 Banana 9 c dtype: object
In [4]:
Copied!
recode(char_vec, a="Apple", b="Banana", _default = NA)
recode(char_vec, a="Apple", b="Banana", _default = NA)
Out[4]:
0 Apple 1 NaN 2 NaN 3 Apple 4 Banana 5 Apple 6 Banana 7 Banana 8 Banana 9 NaN dtype: object
In [5]:
Copied!
level_key = dict(a="apple", b="banana", c="carrot")
recode(char_vec, **level_key)
level_key = dict(a="apple", b="banana", c="carrot")
recode(char_vec, **level_key)
Out[5]:
0 apple 1 carrot 2 carrot 3 apple 4 banana 5 apple 6 banana 7 banana 8 banana 9 carrot dtype: object
In [6]:
Copied!
num_vec = c(range(4), NA)
recode(num_vec, {1: 20, 3: 40})
num_vec = c(range(4), NA)
recode(num_vec, {1: 20, 3: 40})
[2022-12-02 14:19:25][datar][WARNING] Unreplaced values treated as NA as `_x` is not compatible. Please specify replacements exhaustively or supply `_default`
Out[6]:
0 NaN 1 20 2 NaN 3 40 4 NaN dtype: object
In [7]:
Copied!
recode(num_vec, "a", "b", "c", "d")
recode(num_vec, "a", "b", "c", "d")
Out[7]:
0 a 1 b 2 c 3 d 4 NaN dtype: object
In [8]:
Copied!
recode(c(0,4,2), "a", "b", "c", "d", _default="nothing")
recode(c(0,4,2), "a", "b", "c", "d", _default="nothing")
Out[8]:
0 a 1 nothing 2 c dtype: object
In [9]:
Copied!
recode(num_vec, {1: "b", 3: "d"})
recode(num_vec, {1: "b", 3: "d"})
[2022-12-02 14:19:31][datar][WARNING] Unreplaced values treated as NA as `_x` is not compatible. Please specify replacements exhaustively or supply `_default`
Out[9]:
0 NaN 1 b 2 NaN 3 d 4 NaN dtype: object
In [10]:
Copied!
recode(num_vec, "a", "b", "c", _default="other")
recode(num_vec, "a", "b", "c", _default="other")
Out[10]:
0 a 1 b 2 c 3 other 4 NaN dtype: object
In [11]:
Copied!
recode(num_vec, "a", "b", "c", _default="other", _missing="missing")
recode(num_vec, "a", "b", "c", _default="other", _missing="missing")
Out[11]:
0 a 1 b 2 c 3 other 4 missing dtype: object
In [12]:
Copied!
factor_vec = factor(c("a", "b", "c"))
factor_vec
factor_vec = factor(c("a", "b", "c"))
factor_vec
Out[12]:
['a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c']
In [13]:
Copied!
# categories lost
recode(factor_vec, a="Apple")
# categories lost
recode(factor_vec, a="Apple")
Out[13]:
0 Apple 1 b 2 c dtype: category Categories (3, object): ['Apple', 'b', 'c']
In [14]:
Copied!
recode_factor(factor_vec, a="Apple")
recode_factor(factor_vec, a="Apple")
Out[14]:
0 Apple 1 b 2 c dtype: category Categories (3, object): ['Apple', 'b', 'c']
In [15]:
Copied!
recode_factor(num_vec, {0: "z", 1: "y", 2: "x"})
recode_factor(num_vec, {0: "z", 1: "y", 2: "x"})
[2022-12-02 14:19:42][datar][WARNING] Unreplaced values treated as NA as `_x` is not compatible. Please specify replacements exhaustively or supply `_default`
Out[15]:
0 z 1 y 2 x 3 NaN 4 NaN dtype: category Categories (3, object): ['z', 'y', 'x']
In [16]:
Copied!
recode_factor(num_vec, {0: "z", 1: "y", 2: "x"}, _default="D")
recode_factor(num_vec, {0: "z", 1: "y", 2: "x"}, _default="D")
Out[16]:
0 z 1 y 2 x 3 D 4 NaN dtype: category Categories (4, object): ['z', 'y', 'x', 'D']
In [17]:
Copied!
recode_factor(num_vec, {0: "z", 1: "y", 2: "x"}, _default="D", _missing="M")
recode_factor(num_vec, {0: "z", 1: "y", 2: "x"}, _default="D", _missing="M")
Out[17]:
0 z 1 y 2 x 3 D 4 M dtype: category Categories (5, object): ['z', 'y', 'x', 'D', 'M']
In [18]:
Copied!
recode_factor(list(letters[:3]), b="z", c="y")
recode_factor(list(letters[:3]), b="z", c="y")
Out[18]:
0 a 1 z 2 y dtype: category Categories (3, object): ['z', 'y', 'a']
In [19]:
Copied!
level_key = dict(a="apple", b="banana", c="carrot")
recode_factor(char_vec, **level_key)
level_key = dict(a="apple", b="banana", c="carrot")
recode_factor(char_vec, **level_key)
Out[19]:
0 apple 1 carrot 2 carrot 3 apple 4 banana 5 apple 6 banana 7 banana 8 banana 9 carrot dtype: category Categories (3, object): ['apple', 'banana', 'carrot']