select
In [1]:
Copied!
# https://dplyr.tidyverse.org/reference/select.html
%run nb_helpers.py
from datar.data import starwars, iris
from datar.all import *
nb_header(select)
# https://dplyr.tidyverse.org/reference/select.html
%run nb_helpers.py
from datar.data import starwars, iris
from datar.all import *
nb_header(select)
In [2]:
Copied!
starwars >> select(f.height)
starwars >> select(f.height)
Out[2]:
| height | |
|---|---|
| <float64> | |
| 0 | 172.0 |
| 1 | 167.0 |
| 2 | 96.0 |
| 3 | 202.0 |
| ... | ... |
| 4 | 150.0 |
| 82 | NaN |
| 83 | NaN |
| 84 | NaN |
| 85 | NaN |
| 86 | 165.0 |
87 rows × 1 columns
In [3]:
Copied!
iris >> pivot_longer(f['Sepal_Length'])
iris >> pivot_longer(f['Sepal_Length'])
Out[3]:
| Petal_Length | Petal_Width | Sepal_Width | Species | name | value | |
|---|---|---|---|---|---|---|
| <float64> | <float64> | <float64> | <object> | <object> | <float64> | |
| 0 | 1.4 | 0.2 | 3.5 | setosa | Sepal_Length | 5.1 |
| 1 | 1.4 | 0.2 | 3.0 | setosa | Sepal_Length | 4.9 |
| 2 | 1.3 | 0.2 | 3.2 | setosa | Sepal_Length | 4.7 |
| 3 | 1.5 | 0.2 | 3.1 | setosa | Sepal_Length | 4.6 |
| ... | ... | ... | ... | ... | ... | ... |
| 4 | 1.4 | 0.2 | 3.6 | setosa | Sepal_Length | 5.0 |
| 145 | 5.2 | 2.3 | 3.0 | virginica | Sepal_Length | 6.7 |
| 146 | 5.0 | 1.9 | 2.5 | virginica | Sepal_Length | 6.3 |
| 147 | 5.2 | 2.0 | 3.0 | virginica | Sepal_Length | 6.5 |
| 148 | 5.4 | 2.3 | 3.4 | virginica | Sepal_Length | 6.2 |
| 149 | 5.1 | 1.8 | 3.0 | virginica | Sepal_Length | 5.9 |
150 rows × 6 columns
In [4]:
Copied!
starwars >> select(f.homeworld, f.height, f.mass)
starwars >> select(f.homeworld, f.height, f.mass)
Out[4]:
| homeworld | height | mass | |
|---|---|---|---|
| <object> | <float64> | <float64> | |
| 0 | Tatooine | 172.0 | 77.0 |
| 1 | Tatooine | 167.0 | 75.0 |
| 2 | Naboo | 96.0 | 32.0 |
| 3 | Tatooine | 202.0 | 136.0 |
| ... | ... | ... | ... |
| 4 | Alderaan | 150.0 | 49.0 |
| 82 | NaN | NaN | NaN |
| 83 | NaN | NaN | NaN |
| 84 | NaN | NaN | NaN |
| 85 | NaN | NaN | NaN |
| 86 | Naboo | 165.0 | 45.0 |
87 rows × 3 columns
In [5]:
Copied!
iris >> pivot_longer(c(f['Sepal_Length'], f['Petal_Length']))
iris >> pivot_longer(c(f['Sepal_Length'], f['Petal_Length']))
Out[5]:
| Petal_Width | Sepal_Width | Species | name | value | |
|---|---|---|---|---|---|
| <float64> | <float64> | <object> | <object> | <float64> | |
| 0 | 0.2 | 3.5 | setosa | Sepal_Length | 5.1 |
| 1 | 0.2 | 3.0 | setosa | Sepal_Length | 4.9 |
| 2 | 0.2 | 3.2 | setosa | Sepal_Length | 4.7 |
| 3 | 0.2 | 3.1 | setosa | Sepal_Length | 4.6 |
| ... | ... | ... | ... | ... | ... |
| 4 | 0.2 | 3.6 | setosa | Sepal_Length | 5.0 |
| 295 | 2.3 | 3.0 | virginica | Petal_Length | 5.2 |
| 296 | 1.9 | 2.5 | virginica | Petal_Length | 5.0 |
| 297 | 2.0 | 3.0 | virginica | Petal_Length | 5.2 |
| 298 | 2.3 | 3.4 | virginica | Petal_Length | 5.4 |
| 299 | 1.8 | 3.0 | virginica | Petal_Length | 5.1 |
300 rows × 5 columns
In [6]:
Copied!
starwars >> select(c[f.name:f.mass])
starwars >> select(c[f.name:f.mass])
Out[6]:
| name | height | |
|---|---|---|
| <object> | <float64> | |
| 0 | Luke Skywalker | 172.0 |
| 1 | C-3PO | 167.0 |
| 2 | R2-D2 | 96.0 |
| 3 | Darth Vader | 202.0 |
| ... | ... | ... |
| 4 | Leia Organa | 150.0 |
| 82 | Rey | NaN |
| 83 | Poe Dameron | NaN |
| 84 | BB8 | NaN |
| 85 | Captain Phasma | NaN |
| 86 | Padmé Amidala | 165.0 |
87 rows × 2 columns
In [7]:
Copied!
starwars >> select(~c[f.name:f.mass])
starwars >> select(~c[f.name:f.mass])
Out[7]:
| mass | hair_color | skin_color | eye_color | birth_year | sex | gender | homeworld | species | |
|---|---|---|---|---|---|---|---|---|---|
| <float64> | <object> | <object> | <object> | <float64> | <object> | <object> | <object> | <object> | |
| 0 | 77.0 | blond | fair | blue | 19.0 | male | masculine | Tatooine | Human |
| 1 | 75.0 | NaN | gold | yellow | 112.0 | none | masculine | Tatooine | Droid |
| 2 | 32.0 | NaN | white, blue | red | 33.0 | none | masculine | Naboo | Droid |
| 3 | 136.0 | none | white | yellow | 41.9 | male | masculine | Tatooine | Human |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4 | 49.0 | brown | light | brown | 19.0 | female | feminine | Alderaan | Human |
| 82 | NaN | brown | light | hazel | NaN | female | feminine | NaN | Human |
| 83 | NaN | brown | light | brown | NaN | male | masculine | NaN | Human |
| 84 | NaN | none | none | black | NaN | none | masculine | NaN | Droid |
| 85 | NaN | unknown | unknown | unknown | NaN | NaN | NaN | NaN | NaN |
| 86 | 45.0 | brown | light | brown | 46.0 | female | feminine | Naboo | Human |
87 rows × 9 columns
In [8]:
Copied!
iris >> select(~c(f['Sepal_Length'], f['Petal_Length']))
iris >> select(~c(f['Sepal_Length'], f['Petal_Length']))
Out[8]:
| Sepal_Width | Petal_Width | Species | |
|---|---|---|---|
| <float64> | <float64> | <object> | |
| 0 | 3.5 | 0.2 | setosa |
| 1 | 3.0 | 0.2 | setosa |
| 2 | 3.2 | 0.2 | setosa |
| 3 | 3.1 | 0.2 | setosa |
| ... | ... | ... | ... |
| 4 | 3.6 | 0.2 | setosa |
| 145 | 3.0 | 2.3 | virginica |
| 146 | 2.5 | 1.9 | virginica |
| 147 | 3.0 | 2.0 | virginica |
| 148 | 3.4 | 2.3 | virginica |
| 149 | 3.0 | 1.8 | virginica |
150 rows × 3 columns
In [9]:
Copied!
iris >> select(~ends_with("Width"))
iris >> select(~ends_with("Width"))
Out[9]:
| Sepal_Length | Petal_Length | Species | |
|---|---|---|---|
| <float64> | <float64> | <object> | |
| 0 | 5.1 | 1.4 | setosa |
| 1 | 4.9 | 1.4 | setosa |
| 2 | 4.7 | 1.3 | setosa |
| 3 | 4.6 | 1.5 | setosa |
| ... | ... | ... | ... |
| 4 | 5.0 | 1.4 | setosa |
| 145 | 6.7 | 5.2 | virginica |
| 146 | 6.3 | 5.0 | virginica |
| 147 | 6.5 | 5.2 | virginica |
| 148 | 6.2 | 5.4 | virginica |
| 149 | 5.9 | 5.1 | virginica |
150 rows × 3 columns
In [10]:
Copied!
iris >> select(starts_with("Petal") & ends_with("Width"))
iris >> select(starts_with("Petal") & ends_with("Width"))
Out[10]:
| Petal_Width | |
|---|---|
| <float64> | |
| 0 | 0.2 |
| 1 | 0.2 |
| 2 | 0.2 |
| 3 | 0.2 |
| ... | ... |
| 4 | 0.2 |
| 145 | 2.3 |
| 146 | 1.9 |
| 147 | 2.0 |
| 148 | 2.3 |
| 149 | 1.8 |
150 rows × 1 columns
In [11]:
Copied!
iris >> select(starts_with("Petal") | ends_with("Width"))
iris >> select(starts_with("Petal") | ends_with("Width"))
Out[11]:
| Petal_Length | Petal_Width | Sepal_Width | |
|---|---|---|---|
| <float64> | <float64> | <float64> | |
| 0 | 1.4 | 0.2 | 3.5 |
| 1 | 1.4 | 0.2 | 3.0 |
| 2 | 1.3 | 0.2 | 3.2 |
| 3 | 1.5 | 0.2 | 3.1 |
| ... | ... | ... | ... |
| 4 | 1.4 | 0.2 | 3.6 |
| 145 | 5.2 | 2.3 | 3.0 |
| 146 | 5.0 | 1.9 | 2.5 |
| 147 | 5.2 | 2.0 | 3.0 |
| 148 | 5.4 | 2.3 | 3.4 |
| 149 | 5.1 | 1.8 | 3.0 |
150 rows × 3 columns
In [12]:
Copied!
iris >> select(starts_with("Petal") & ~ends_with("Width"))
iris >> select(starts_with("Petal") & ~ends_with("Width"))
Out[12]:
| Petal_Length | |
|---|---|
| <float64> | |
| 0 | 1.4 |
| 1 | 1.4 |
| 2 | 1.3 |
| 3 | 1.5 |
| ... | ... |
| 4 | 1.4 |
| 145 | 5.2 |
| 146 | 5.0 |
| 147 | 5.2 |
| 148 | 5.4 |
| 149 | 5.1 |
150 rows × 1 columns
In [13]:
Copied!
# select last column
iris >> select(-1)
# select last column
iris >> select(-1)
Out[13]:
| Species | |
|---|---|
| <object> | |
| 0 | setosa |
| 1 | setosa |
| 2 | setosa |
| 3 | setosa |
| ... | ... |
| 4 | setosa |
| 145 | virginica |
| 146 | virginica |
| 147 | virginica |
| 148 | virginica |
| 149 | virginica |
150 rows × 1 columns
In [14]:
Copied!
iris >> select(c[1:3])
iris >> select(c[1:3])
Out[14]:
| Sepal_Width | Petal_Length | |
|---|---|---|
| <float64> | <float64> | |
| 0 | 3.5 | 1.4 |
| 1 | 3.0 | 1.4 |
| 2 | 3.2 | 1.3 |
| 3 | 3.1 | 1.5 |
| ... | ... | ... |
| 4 | 3.6 | 1.4 |
| 145 | 3.0 | 5.2 |
| 146 | 2.5 | 5.0 |
| 147 | 3.0 | 5.2 |
| 148 | 3.4 | 5.4 |
| 149 | 3.0 | 5.1 |
150 rows × 2 columns
In [15]:
Copied!
# drop last column
iris >> select(~c(-1))
# drop last column
iris >> select(~c(-1))
Out[15]:
| Sepal_Length | Sepal_Width | Petal_Length | Petal_Width | |
|---|---|---|---|---|
| <float64> | <float64> | <float64> | <float64> | |
| 0 | 5.1 | 3.5 | 1.4 | 0.2 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 |
| ... | ... | ... | ... | ... |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 |
| 145 | 6.7 | 3.0 | 5.2 | 2.3 |
| 146 | 6.3 | 2.5 | 5.0 | 1.9 |
| 147 | 6.5 | 3.0 | 5.2 | 2.0 |
| 148 | 6.2 | 3.4 | 5.4 | 2.3 |
| 149 | 5.9 | 3.0 | 5.1 | 1.8 |
150 rows × 4 columns