case_when
In [1]:
Copied!
# https://dplyr.tidyverse.org/reference/case_when.html
%run nb_helpers.py
from datar.data import starwars
from datar.all import *
nb_header(case_when)
# https://dplyr.tidyverse.org/reference/case_when.html
%run nb_helpers.py
from datar.data import starwars
from datar.all import *
nb_header(case_when)
In [2]:
Copied!
df = tibble(x=range(1,51))
df >> mutate(y=case_when(
f.x % 35 == 0, "fizz buzz",
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
True, as_character(f.x)
)) >> pull(f.y, to="array")
df = tibble(x=range(1,51))
df >> mutate(y=case_when(
f.x % 35 == 0, "fizz buzz",
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
True, as_character(f.x)
)) >> pull(f.y, to="array")
Out[2]:
array(['1', '2', '3', '4', 'fizz', '6', 'buzz', '8', '9', 'fizz', '11',
'12', '13', 'buzz', 'fizz', '16', '17', '18', '19', 'fizz', 'buzz',
'22', '23', '24', 'fizz', '26', '27', 'buzz', '29', 'fizz', '31',
'32', '33', '34', 'fizz buzz', '36', '37', '38', '39', 'fizz',
'41', 'buzz', '43', '44', 'fizz', '46', '47', '48', 'buzz', 'fizz'],
dtype=object)
In [3]:
Copied!
df >> mutate(y=case_when(
True, as_character(f.x),
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
f.x % 35 == 0, "fizz buzz"
)) >> pull(f.y, to="array")
df >> mutate(y=case_when(
True, as_character(f.x),
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
f.x % 35 == 0, "fizz buzz"
)) >> pull(f.y, to="array")
Out[3]:
array(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
'13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23',
'24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34',
'35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45',
'46', '47', '48', '49', '50'], dtype=object)
In [4]:
Copied!
df >> mutate(y=case_when(
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
f.x % 35 == 0, "fizz buzz"
)) >> pull(f.y, to="array")
df >> mutate(y=case_when(
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
f.x % 35 == 0, "fizz buzz"
)) >> pull(f.y, to="array")
Out[4]:
array([nan, nan, nan, nan, 'fizz', nan, 'buzz', nan, nan, 'fizz', nan,
nan, nan, 'buzz', 'fizz', nan, nan, nan, nan, 'fizz', 'buzz', nan,
nan, nan, 'fizz', nan, nan, 'buzz', nan, 'fizz', nan, nan, nan,
nan, 'fizz', nan, nan, nan, nan, 'fizz', nan, 'buzz', nan, nan,
'fizz', nan, nan, nan, 'buzz', 'fizz'], dtype=object)
In [5]:
Copied!
df.iloc[[1,2,3], 0] = NA
df >> mutate(y=case_when(
f.x % 35 == 0, "fizz buzz",
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
is_na(f.x), "nope",
True, as_character(f.x)
)) >> pull(f.y, to="array")
df.iloc[[1,2,3], 0] = NA
df >> mutate(y=case_when(
f.x % 35 == 0, "fizz buzz",
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
is_na(f.x), "nope",
True, as_character(f.x)
)) >> pull(f.y, to="array")
Out[5]:
array(['1.0', 'nope', 'nope', 'nope', 'fizz', '6.0', 'buzz', '8.0', '9.0',
'fizz', '11.0', '12.0', '13.0', 'buzz', 'fizz', '16.0', '17.0',
'18.0', '19.0', 'fizz', 'buzz', '22.0', '23.0', '24.0', 'fizz',
'26.0', '27.0', 'buzz', '29.0', 'fizz', '31.0', '32.0', '33.0',
'34.0', 'fizz buzz', '36.0', '37.0', '38.0', '39.0', 'fizz',
'41.0', 'buzz', '43.0', '44.0', 'fizz', '46.0', '47.0', '48.0',
'buzz', 'fizz'], dtype=object)
In [6]:
Copied!
df >> mutate(y=case_when(
f.x % 35 == 0, NA,
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
True, as_character(f.x)
)) >> pull(f.y, to="array")
df >> mutate(y=case_when(
f.x % 35 == 0, NA,
f.x % 5 == 0, "fizz",
f.x % 7 == 0, "buzz",
True, as_character(f.x)
)) >> pull(f.y, to="array")
Out[6]:
array(['1.0', 'nan', 'nan', 'nan', 'fizz', '6.0', 'buzz', '8.0', '9.0',
'fizz', '11.0', '12.0', '13.0', 'buzz', 'fizz', '16.0', '17.0',
'18.0', '19.0', 'fizz', 'buzz', '22.0', '23.0', '24.0', 'fizz',
'26.0', '27.0', 'buzz', '29.0', 'fizz', '31.0', '32.0', '33.0',
'34.0', nan, '36.0', '37.0', '38.0', '39.0', 'fizz', '41.0',
'buzz', '43.0', '44.0', 'fizz', '46.0', '47.0', '48.0', 'buzz',
'fizz'], dtype=object)
In [7]:
Copied!
df >> mutate(y=case_when(
f.x % 35 == 0, 35,
f.x % 5 == 0, 5,
f.x % 7 == 0, 7,
True, NA)
) >> pull(f.y, to="array")
df >> mutate(y=case_when(
f.x % 35 == 0, 35,
f.x % 5 == 0, 5,
f.x % 7 == 0, 7,
True, NA)
) >> pull(f.y, to="array")
Out[7]:
array([nan, nan, nan, nan, 5., nan, 7., nan, nan, 5., nan, nan, nan,
7., 5., nan, nan, nan, nan, 5., 7., nan, nan, nan, 5., nan,
nan, 7., nan, 5., nan, nan, nan, nan, 35., nan, nan, nan, nan,
5., nan, 7., nan, nan, 5., nan, nan, nan, 7., 5.])
In [8]:
Copied!
df = tibble(x=seq(-2, 2.1, by=.5))
df >> mutate(y=case_when(
f.x >= 0, sqrt(f.x),
True, f.x
)) >> pull(f.y, to="array")
df = tibble(x=seq(-2, 2.1, by=.5))
df >> mutate(y=case_when(
f.x >= 0, sqrt(f.x),
True, f.x
)) >> pull(f.y, to="array")
RuntimeWarning: invalid value encountered in sqrt
Out[8]:
array([-2. , -1.5 , -1. , -0.5 , 0. ,
0.70710678, 1. , 1.22474487, 1.41421356])
In [9]:
Copied!
starwars >> \
select(f[f.name:f.hair_color], f.gender, f.species) >> \
mutate(
type = case_when(
(f.height > 200) | (f.mass > 200), "large",
f.species == "Droid" , "robot",
True , "other"
)
)
starwars >> \
select(f[f.name:f.hair_color], f.gender, f.species) >> \
mutate(
type = case_when(
(f.height > 200) | (f.mass > 200), "large",
f.species == "Droid" , "robot",
True , "other"
)
)
Out[9]:
| name | height | mass | gender | species | type | |
|---|---|---|---|---|---|---|
| <object> | <float64> | <float64> | <object> | <object> | <object> | |
| 0 | Luke Skywalker | 172.0 | 77.0 | masculine | Human | other |
| 1 | C-3PO | 167.0 | 75.0 | masculine | Droid | robot |
| 2 | R2-D2 | 96.0 | 32.0 | masculine | Droid | robot |
| 3 | Darth Vader | 202.0 | 136.0 | masculine | Human | large |
| ... | ... | ... | ... | ... | ... | ... |
| 4 | Leia Organa | 150.0 | 49.0 | feminine | Human | other |
| 82 | Rey | NaN | NaN | feminine | Human | other |
| 83 | Poe Dameron | NaN | NaN | masculine | Human | other |
| 84 | BB8 | NaN | NaN | masculine | Droid | robot |
| 85 | Captain Phasma | NaN | NaN | NaN | NaN | other |
| 86 | Padmé Amidala | 165.0 | 45.0 | feminine | Human | other |
87 rows × 6 columns
In [10]:
Copied!
starwars >> \
mutate(type=case_when(
(f.height > 200) | (f.mass > 200), "large",
f.species == "Droid", "robot",
True, "other"
)) >> \
pull(f.type, to="array")
starwars >> \
mutate(type=case_when(
(f.height > 200) | (f.mass > 200), "large",
f.species == "Droid", "robot",
True, "other"
)) >> \
pull(f.type, to="array")
Out[10]:
array(['other', 'robot', 'robot', 'large', 'other', 'other', 'other',
'robot', 'other', 'other', 'other', 'other', 'large', 'other',
'other', 'large', 'other', 'other', 'other', 'other', 'other',
'robot', 'other', 'other', 'other', 'other', 'other', 'other',
'other', 'other', 'other', 'other', 'other', 'other', 'large',
'large', 'other', 'other', 'other', 'other', 'other', 'other',
'other', 'other', 'other', 'other', 'other', 'other', 'other',
'other', 'other', 'other', 'other', 'large', 'other', 'other',
'other', 'other', 'other', 'other', 'other', 'other', 'other',
'other', 'other', 'other', 'other', 'other', 'large', 'large',
'other', 'other', 'robot', 'other', 'other', 'other', 'large',
'large', 'other', 'other', 'large', 'other', 'other', 'other',
'robot', 'other', 'other'], dtype=object)
In [ ]:
Copied!