count
# https://dplyr.tidyverse.org/reference/count.html
%run nb_helpers.py
from datar.data import starwars
from datar.all import *
nb_header(count, tally, add_count, add_tally)
★ count¶
Count the number of rows in each group¶
Original API:
https://dplyr.tidyverse.org/reference/count.html
Args:¶
_data: A data frame
*args: Variables, or functions of variables.
Use desc() to sort a variable in descending order.
wt: A variable or function of variables to weight by.
sort: If TRUE, the result will be sorted by the count.
name: The name of the count column.
_drop: If False, keep grouping variables even if they are not used.
Original API does not support this.
**kwargs: Name-value pairs that apply with mutate
Returns:¶
A data frame with the same number of rows as the number of groups.
The output has the following properties:
All rows appear in the output, but (usually) in a different place.
Columns are not modified.
Groups are not modified.
Data frame attributes are preserved.
★ tally¶
Count the number of rows in each group¶
Original API:
https://dplyr.tidyverse.org/reference/count.html
Args:¶
_data: A data frame
wt: A variable or function of variables to weight by.
sort: If TRUE, the result will be sorted by the count.
name: The name of the count column.
Returns:¶
A data frame with the same number of rows as the number of groups.
The output has the following properties:
All rows appear in the output, but (usually) in a different place.
Columns are not modified.
Groups are not modified.
Data frame attributes are preserved.
★ add_count¶
Add a count column to a data frame¶
Original API:
https://dplyr.tidyverse.org/reference/count.html
Args:¶
_data: A data frame
*args: Variables, or functions of variables.
Use desc() to sort a variable in descending order.
wt: A variable or function of variables to weight by.
sort: If TRUE, the result will be sorted by the count.
name: The name of the count column.
**kwargs: Name-value pairs that apply with mutate
Returns:¶
A data frame with the same number of rows as the number of groups.
The output has the following properties:
All rows appear in the output, but (usually) in a different place.
Columns are not modified.
Groups are not modified.
Data frame attributes are preserved.
★ add_tally¶
Add a count column to a data frame¶
Original API:
https://dplyr.tidyverse.org/reference/count.html
Args:¶
_data: A data frame
wt: A variable or function of variables to weight by.
sort: If TRUE, the result will be sorted by the count.
name: The name of the count column.
Returns:¶
A data frame with the same number of rows as the number of groups.
The output has the following properties:
All rows appear in the output, but (usually) in a different place.
Columns are not modified.
Groups are not modified.
Data frame attributes are preserved.
starwars >> count(f.species)
| species | n | |
|---|---|---|
| <object> | <int64> | |
| 0 | Human | 35 |
| 1 | Droid | 6 |
| 2 | Wookiee | 2 |
| 3 | Rodian | 1 |
| 4 | Hutt | 1 |
| 5 | Yoda's species | 1 |
| 6 | Trandoshan | 1 |
| 7 | Mon Calamari | 1 |
| 8 | Ewok | 1 |
| 9 | Sullustan | 1 |
| 10 | Neimodian | 1 |
| 11 | Gungan | 3 |
| 12 | NaN | 4 |
| 13 | Toydarian | 1 |
| 14 | Dug | 1 |
| 15 | Zabrak | 2 |
| 16 | Twi'lek | 2 |
| 17 | Vulptereen | 1 |
| 18 | Xexto | 1 |
| 19 | Toong | 1 |
| 20 | Cerean | 1 |
| 21 | Nautolan | 1 |
| 22 | Tholothian | 1 |
| 23 | Iktotchi | 1 |
| 24 | Quermian | 1 |
| 25 | Kel Dor | 1 |
| 26 | Chagrian | 1 |
| 27 | Geonosian | 1 |
| 28 | Mirialan | 2 |
| 29 | Clawdite | 1 |
| 30 | Besalisk | 1 |
| 31 | Kaminoan | 2 |
| 32 | Aleena | 1 |
| 33 | Skakoan | 1 |
| 34 | Muun | 1 |
| 35 | Togruta | 1 |
| 36 | Kaleesh | 1 |
| 37 | Pau'an | 1 |
starwars >> count(f.species, sort=True)
| species | n | |
|---|---|---|
| <object> | <int64> | |
| 0 | Human | 35 |
| 1 | Droid | 6 |
| 2 | NaN | 4 |
| 3 | Gungan | 3 |
| 4 | Twi'lek | 2 |
| 5 | Zabrak | 2 |
| 6 | Kaminoan | 2 |
| 7 | Mirialan | 2 |
| 8 | Wookiee | 2 |
| 9 | Besalisk | 1 |
| 10 | Clawdite | 1 |
| 11 | Iktotchi | 1 |
| 12 | Skakoan | 1 |
| 13 | Muun | 1 |
| 14 | Geonosian | 1 |
| 15 | Chagrian | 1 |
| 16 | Togruta | 1 |
| 17 | Kel Dor | 1 |
| 18 | Quermian | 1 |
| 19 | Aleena | 1 |
| 20 | Tholothian | 1 |
| 21 | Xexto | 1 |
| 22 | Cerean | 1 |
| 23 | Toong | 1 |
| 24 | Kaleesh | 1 |
| 25 | Vulptereen | 1 |
| 26 | Dug | 1 |
| 27 | Toydarian | 1 |
| 28 | Neimodian | 1 |
| 29 | Sullustan | 1 |
| 30 | Ewok | 1 |
| 31 | Mon Calamari | 1 |
| 32 | Trandoshan | 1 |
| 33 | Yoda's species | 1 |
| 34 | Hutt | 1 |
| 35 | Rodian | 1 |
| 36 | Nautolan | 1 |
| 37 | Pau'an | 1 |
TibbleGrouped: species (n=38)
starwars >> count(f.sex, f.gender, sort=True)
| sex | gender | n | |
|---|---|---|---|
| <object> | <object> | <int64> | |
| 0 | male | masculine | 60 |
| 1 | female | feminine | 16 |
| 2 | none | masculine | 5 |
| 3 | NaN | NaN | 4 |
| 4 | hermaphroditic | masculine | 1 |
| 5 | none | feminine | 1 |
TibbleGrouped: sex, gender (n=6)
starwars >> count(birth_decade=round(f.birth_year, -1))
| birth_decade | n | |
|---|---|---|
| <float64> | <int64> | |
| 0 | 20.0 | 6 |
| 1 | 110.0 | 1 |
| 2 | 30.0 | 4 |
| 3 | 40.0 | 6 |
| 4 | 50.0 | 8 |
| 5 | NaN | 44 |
| 6 | 60.0 | 4 |
| 7 | 200.0 | 1 |
| 8 | 600.0 | 1 |
| 9 | 900.0 | 1 |
| 10 | 80.0 | 2 |
| 11 | 10.0 | 1 |
| 12 | 90.0 | 3 |
| 13 | 70.0 | 4 |
| 14 | 100.0 | 1 |
df = tribble(
f.name, f.gender, f.runs,
"Max", "male", 10,
"Sandra", "female", 1,
"Susan", "female", 4
)
# counts rows:
df >> count(f.gender)
| gender | n | |
|---|---|---|
| <object> | <int64> | |
| 0 | male | 1 |
| 1 | female | 2 |
df >> count(f.gender, wt=f.runs)
| gender | n | |
|---|---|---|
| <object> | <int64> | |
| 0 | male | 10 |
| 1 | female | 5 |
starwars >> tally()
| n | |
|---|---|
| <int64> | |
| 0 | 87 |
starwars >> group_by(f.species) >> tally()
| species | n | |
|---|---|---|
| <object> | <int64> | |
| 0 | Human | 35 |
| 1 | Droid | 6 |
| 2 | Wookiee | 2 |
| 3 | Rodian | 1 |
| 4 | Hutt | 1 |
| 5 | Yoda's species | 1 |
| 6 | Trandoshan | 1 |
| 7 | Mon Calamari | 1 |
| 8 | Ewok | 1 |
| 9 | Sullustan | 1 |
| 10 | Neimodian | 1 |
| 11 | Gungan | 3 |
| 12 | NaN | 4 |
| 13 | Toydarian | 1 |
| 14 | Dug | 1 |
| 15 | Zabrak | 2 |
| 16 | Twi'lek | 2 |
| 17 | Vulptereen | 1 |
| 18 | Xexto | 1 |
| 19 | Toong | 1 |
| 20 | Cerean | 1 |
| 21 | Nautolan | 1 |
| 22 | Tholothian | 1 |
| 23 | Iktotchi | 1 |
| 24 | Quermian | 1 |
| 25 | Kel Dor | 1 |
| 26 | Chagrian | 1 |
| 27 | Geonosian | 1 |
| 28 | Mirialan | 2 |
| 29 | Clawdite | 1 |
| 30 | Besalisk | 1 |
| 31 | Kaminoan | 2 |
| 32 | Aleena | 1 |
| 33 | Skakoan | 1 |
| 34 | Muun | 1 |
| 35 | Togruta | 1 |
| 36 | Kaleesh | 1 |
| 37 | Pau'an | 1 |
df >> add_count(f.gender, wt=f.runs)
| name | gender | runs | n | |
|---|---|---|---|---|
| <object> | <object> | <int64> | <int64> | |
| 0 | Max | male | 10 | 10 |
| 1 | Sandra | female | 1 | 5 |
| 2 | Susan | female | 4 | 5 |
TibbleGrouped: gender (n=2)
df
| name | gender | runs | |
|---|---|---|---|
| <object> | <object> | <int64> | |
| 0 | Max | male | 10 |
| 1 | Sandra | female | 1 |
| 2 | Susan | female | 4 |
df >> add_tally(wt=f.runs)
| name | gender | runs | n | |
|---|---|---|---|---|
| <object> | <object> | <int64> | <int64> | |
| 0 | Max | male | 10 | 15 |
| 1 | Sandra | female | 1 | 15 |
| 2 | Susan | female | 4 | 15 |