select
In [1]:
Copied!
# https://dplyr.tidyverse.org/reference/select.html
%run nb_helpers.py
from datar.data import starwars, iris
from datar.all import *
nb_header(select)
# https://dplyr.tidyverse.org/reference/select.html
%run nb_helpers.py
from datar.data import starwars, iris
from datar.all import *
nb_header(select)
In [2]:
Copied!
starwars >> select(f.height)
starwars >> select(f.height)
Out[2]:
height | |
---|---|
<float64> | |
0 | 172.0 |
1 | 167.0 |
2 | 96.0 |
3 | 202.0 |
... | ... |
4 | 150.0 |
82 | NaN |
83 | NaN |
84 | NaN |
85 | NaN |
86 | 165.0 |
87 rows × 1 columns
In [3]:
Copied!
iris >> pivot_longer(f['Sepal_Length'])
iris >> pivot_longer(f['Sepal_Length'])
Out[3]:
Petal_Length | Petal_Width | Sepal_Width | Species | name | value | |
---|---|---|---|---|---|---|
<float64> | <float64> | <float64> | <object> | <object> | <float64> | |
0 | 1.4 | 0.2 | 3.5 | setosa | Sepal_Length | 5.1 |
1 | 1.4 | 0.2 | 3.0 | setosa | Sepal_Length | 4.9 |
2 | 1.3 | 0.2 | 3.2 | setosa | Sepal_Length | 4.7 |
3 | 1.5 | 0.2 | 3.1 | setosa | Sepal_Length | 4.6 |
... | ... | ... | ... | ... | ... | ... |
4 | 1.4 | 0.2 | 3.6 | setosa | Sepal_Length | 5.0 |
145 | 5.2 | 2.3 | 3.0 | virginica | Sepal_Length | 6.7 |
146 | 5.0 | 1.9 | 2.5 | virginica | Sepal_Length | 6.3 |
147 | 5.2 | 2.0 | 3.0 | virginica | Sepal_Length | 6.5 |
148 | 5.4 | 2.3 | 3.4 | virginica | Sepal_Length | 6.2 |
149 | 5.1 | 1.8 | 3.0 | virginica | Sepal_Length | 5.9 |
150 rows × 6 columns
In [4]:
Copied!
starwars >> select(f.homeworld, f.height, f.mass)
starwars >> select(f.homeworld, f.height, f.mass)
Out[4]:
homeworld | height | mass | |
---|---|---|---|
<object> | <float64> | <float64> | |
0 | Tatooine | 172.0 | 77.0 |
1 | Tatooine | 167.0 | 75.0 |
2 | Naboo | 96.0 | 32.0 |
3 | Tatooine | 202.0 | 136.0 |
... | ... | ... | ... |
4 | Alderaan | 150.0 | 49.0 |
82 | NaN | NaN | NaN |
83 | NaN | NaN | NaN |
84 | NaN | NaN | NaN |
85 | NaN | NaN | NaN |
86 | Naboo | 165.0 | 45.0 |
87 rows × 3 columns
In [5]:
Copied!
iris >> pivot_longer(c(f['Sepal_Length'], f['Petal_Length']))
iris >> pivot_longer(c(f['Sepal_Length'], f['Petal_Length']))
Out[5]:
Petal_Width | Sepal_Width | Species | name | value | |
---|---|---|---|---|---|
<float64> | <float64> | <object> | <object> | <float64> | |
0 | 0.2 | 3.5 | setosa | Sepal_Length | 5.1 |
1 | 0.2 | 3.0 | setosa | Sepal_Length | 4.9 |
2 | 0.2 | 3.2 | setosa | Sepal_Length | 4.7 |
3 | 0.2 | 3.1 | setosa | Sepal_Length | 4.6 |
... | ... | ... | ... | ... | ... |
4 | 0.2 | 3.6 | setosa | Sepal_Length | 5.0 |
295 | 2.3 | 3.0 | virginica | Petal_Length | 5.2 |
296 | 1.9 | 2.5 | virginica | Petal_Length | 5.0 |
297 | 2.0 | 3.0 | virginica | Petal_Length | 5.2 |
298 | 2.3 | 3.4 | virginica | Petal_Length | 5.4 |
299 | 1.8 | 3.0 | virginica | Petal_Length | 5.1 |
300 rows × 5 columns
In [6]:
Copied!
starwars >> select(c[f.name:f.mass])
starwars >> select(c[f.name:f.mass])
Out[6]:
name | height | |
---|---|---|
<object> | <float64> | |
0 | Luke Skywalker | 172.0 |
1 | C-3PO | 167.0 |
2 | R2-D2 | 96.0 |
3 | Darth Vader | 202.0 |
... | ... | ... |
4 | Leia Organa | 150.0 |
82 | Rey | NaN |
83 | Poe Dameron | NaN |
84 | BB8 | NaN |
85 | Captain Phasma | NaN |
86 | Padmé Amidala | 165.0 |
87 rows × 2 columns
In [7]:
Copied!
starwars >> select(~c[f.name:f.mass])
starwars >> select(~c[f.name:f.mass])
Out[7]:
mass | hair_color | skin_color | eye_color | birth_year | sex | gender | homeworld | species | |
---|---|---|---|---|---|---|---|---|---|
<float64> | <object> | <object> | <object> | <float64> | <object> | <object> | <object> | <object> | |
0 | 77.0 | blond | fair | blue | 19.0 | male | masculine | Tatooine | Human |
1 | 75.0 | NaN | gold | yellow | 112.0 | none | masculine | Tatooine | Droid |
2 | 32.0 | NaN | white, blue | red | 33.0 | none | masculine | Naboo | Droid |
3 | 136.0 | none | white | yellow | 41.9 | male | masculine | Tatooine | Human |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4 | 49.0 | brown | light | brown | 19.0 | female | feminine | Alderaan | Human |
82 | NaN | brown | light | hazel | NaN | female | feminine | NaN | Human |
83 | NaN | brown | light | brown | NaN | male | masculine | NaN | Human |
84 | NaN | none | none | black | NaN | none | masculine | NaN | Droid |
85 | NaN | unknown | unknown | unknown | NaN | NaN | NaN | NaN | NaN |
86 | 45.0 | brown | light | brown | 46.0 | female | feminine | Naboo | Human |
87 rows × 9 columns
In [8]:
Copied!
iris >> select(~c(f['Sepal_Length'], f['Petal_Length']))
iris >> select(~c(f['Sepal_Length'], f['Petal_Length']))
Out[8]:
Sepal_Width | Petal_Width | Species | |
---|---|---|---|
<float64> | <float64> | <object> | |
0 | 3.5 | 0.2 | setosa |
1 | 3.0 | 0.2 | setosa |
2 | 3.2 | 0.2 | setosa |
3 | 3.1 | 0.2 | setosa |
... | ... | ... | ... |
4 | 3.6 | 0.2 | setosa |
145 | 3.0 | 2.3 | virginica |
146 | 2.5 | 1.9 | virginica |
147 | 3.0 | 2.0 | virginica |
148 | 3.4 | 2.3 | virginica |
149 | 3.0 | 1.8 | virginica |
150 rows × 3 columns
In [9]:
Copied!
iris >> select(~ends_with("Width"))
iris >> select(~ends_with("Width"))
Out[9]:
Sepal_Length | Petal_Length | Species | |
---|---|---|---|
<float64> | <float64> | <object> | |
0 | 5.1 | 1.4 | setosa |
1 | 4.9 | 1.4 | setosa |
2 | 4.7 | 1.3 | setosa |
3 | 4.6 | 1.5 | setosa |
... | ... | ... | ... |
4 | 5.0 | 1.4 | setosa |
145 | 6.7 | 5.2 | virginica |
146 | 6.3 | 5.0 | virginica |
147 | 6.5 | 5.2 | virginica |
148 | 6.2 | 5.4 | virginica |
149 | 5.9 | 5.1 | virginica |
150 rows × 3 columns
In [10]:
Copied!
iris >> select(starts_with("Petal") & ends_with("Width"))
iris >> select(starts_with("Petal") & ends_with("Width"))
Out[10]:
Petal_Width | |
---|---|
<float64> | |
0 | 0.2 |
1 | 0.2 |
2 | 0.2 |
3 | 0.2 |
... | ... |
4 | 0.2 |
145 | 2.3 |
146 | 1.9 |
147 | 2.0 |
148 | 2.3 |
149 | 1.8 |
150 rows × 1 columns
In [11]:
Copied!
iris >> select(starts_with("Petal") | ends_with("Width"))
iris >> select(starts_with("Petal") | ends_with("Width"))
Out[11]:
Petal_Length | Petal_Width | Sepal_Width | |
---|---|---|---|
<float64> | <float64> | <float64> | |
0 | 1.4 | 0.2 | 3.5 |
1 | 1.4 | 0.2 | 3.0 |
2 | 1.3 | 0.2 | 3.2 |
3 | 1.5 | 0.2 | 3.1 |
... | ... | ... | ... |
4 | 1.4 | 0.2 | 3.6 |
145 | 5.2 | 2.3 | 3.0 |
146 | 5.0 | 1.9 | 2.5 |
147 | 5.2 | 2.0 | 3.0 |
148 | 5.4 | 2.3 | 3.4 |
149 | 5.1 | 1.8 | 3.0 |
150 rows × 3 columns
In [12]:
Copied!
iris >> select(starts_with("Petal") & ~ends_with("Width"))
iris >> select(starts_with("Petal") & ~ends_with("Width"))
Out[12]:
Petal_Length | |
---|---|
<float64> | |
0 | 1.4 |
1 | 1.4 |
2 | 1.3 |
3 | 1.5 |
... | ... |
4 | 1.4 |
145 | 5.2 |
146 | 5.0 |
147 | 5.2 |
148 | 5.4 |
149 | 5.1 |
150 rows × 1 columns
In [13]:
Copied!
# select last column
iris >> select(-1)
# select last column
iris >> select(-1)
Out[13]:
Species | |
---|---|
<object> | |
0 | setosa |
1 | setosa |
2 | setosa |
3 | setosa |
... | ... |
4 | setosa |
145 | virginica |
146 | virginica |
147 | virginica |
148 | virginica |
149 | virginica |
150 rows × 1 columns
In [14]:
Copied!
iris >> select(c[1:3])
iris >> select(c[1:3])
Out[14]:
Sepal_Width | Petal_Length | |
---|---|---|
<float64> | <float64> | |
0 | 3.5 | 1.4 |
1 | 3.0 | 1.4 |
2 | 3.2 | 1.3 |
3 | 3.1 | 1.5 |
... | ... | ... |
4 | 3.6 | 1.4 |
145 | 3.0 | 5.2 |
146 | 2.5 | 5.0 |
147 | 3.0 | 5.2 |
148 | 3.4 | 5.4 |
149 | 3.0 | 5.1 |
150 rows × 2 columns
In [15]:
Copied!
# drop last column
iris >> select(~c(-1))
# drop last column
iris >> select(~c(-1))
Out[15]:
Sepal_Length | Sepal_Width | Petal_Length | Petal_Width | |
---|---|---|---|---|
<float64> | <float64> | <float64> | <float64> | |
0 | 5.1 | 3.5 | 1.4 | 0.2 |
1 | 4.9 | 3.0 | 1.4 | 0.2 |
2 | 4.7 | 3.2 | 1.3 | 0.2 |
3 | 4.6 | 3.1 | 1.5 | 0.2 |
... | ... | ... | ... | ... |
4 | 5.0 | 3.6 | 1.4 | 0.2 |
145 | 6.7 | 3.0 | 5.2 | 2.3 |
146 | 6.3 | 2.5 | 5.0 | 1.9 |
147 | 6.5 | 3.0 | 5.2 | 2.0 |
148 | 6.2 | 3.4 | 5.4 | 2.3 |
149 | 5.9 | 3.0 | 5.1 | 1.8 |
150 rows × 4 columns