ranking
# https://dplyr.tidyverse.org/reference/ranking.html
%run nb_helpers.py
import numpy
from datar.data import mtcars
from datar.all import *
nb_header(row_number, min_rank, dense_rank, percent_rank, cume_dist, ntile)
★ row_number¶
★ min_rank¶
Get the min rank of x¶
Note that this function doesn't support piping.
Args:¶
x: The data to get row number
Defaults to Symbolic() so the whole data is used by default
when called min_rank()
na_last: How NA values are ranked
- "keep": NA values are ranked at the end
- "top": NA values are ranked at the top
- "bottom": NA values are ranked at the bottom
Returns:¶
The row number
★ dense_rank¶
Get the dense rank of x¶
Note that this function doesn't support piping.
Args:¶
x: The data to get row number
Defaults to Symbolic() so the whole data is used by default
when called dense_rank()
na_last: How NA values are ranked
- "keep": NA values are ranked at the end
- "top": NA values are ranked at the top
- "bottom": NA values are ranked at the bottom
Returns:¶
The row number
★ percent_rank¶
Get the percent rank of x¶
Note that this function doesn't support piping.
Args:¶
x: The data to get row number
Defaults to Symbolic() so the whole data is used by default
when called percent_rank()
na_last: How NA values are ranked
- "keep": NA values are ranked at the end
- "top": NA values are ranked at the top
- "bottom": NA values are ranked at the bottom
Returns:¶
The row number
★ cume_dist¶
Get the cume_dist of x¶
Note that this function doesn't support piping.
Args:¶
x: The data to get row number
Defaults to Symbolic() so the whole data is used by default
when called cume_dist()
na_last: How NA values are ranked
- "keep": NA values are ranked at the end
- "top": NA values are ranked at the top
- "bottom": NA values are ranked at the bottom
Returns:¶
The row number
★ ntile¶
a rough rank, which breaks the input vector into n buckets.¶
The size of the buckets may differ by up to one, larger buckets
have lower rank.
Note that this function doesn't support piping.
Args:¶
x: The data to get rownumber
Defaults to Symbolic() so the whole data is used by default
when called ntile(n=...)
n: The number of groups to divide the data into
Returns:¶
The row number
df = tibble(x=c(5, 1, 3, 2, 2, numpy.nan))
df >> mutate(
row_number=row_number(),
min_rank=min_rank(f.x),
dense_rank=dense_rank(f.x),
percent_rank=percent_rank(f.x),
cume_dist=cume_dist(f.x),
ntile=ntile(f.x, n=2)
)
| x | row_number | min_rank | dense_rank | percent_rank | cume_dist | ntile | |
|---|---|---|---|---|---|---|---|
| <float64> | <float64> | <float64> | <float64> | <float64> | <float64> | <category> | |
| 0 | 5.0 | 1.0 | 5.0 | 4.0 | 1.00 | 1.0 | 2 |
| 1 | 1.0 | 2.0 | 1.0 | 1.0 | 0.00 | 0.2 | 1 |
| 2 | 3.0 | 3.0 | 4.0 | 3.0 | 0.75 | 0.8 | 1 |
| 3 | 2.0 | 4.0 | 2.0 | 2.0 | 0.25 | 0.6 | 1 |
| 4 | 2.0 | 5.0 | 2.0 | 2.0 | 0.25 | 0.6 | 1 |
| 5 | NaN | 6.0 | NaN | NaN | NaN | NaN | NaN |
tibble(x=range(8)) >> mutate(ntile=ntile(f.x, n=3))
| x | ntile | |
|---|---|---|
| <int64> | <category> | |
| 0 | 0 | 1 |
| 1 | 1 | 1 |
| 2 | 2 | 1 |
| 3 | 3 | 2 |
| 4 | 4 | 2 |
| 5 | 5 | 3 |
| 6 | 6 | 3 |
| 7 | 7 | 3 |
mtcars >> mutate(n=row_number() == 0)
| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | n | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| <float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | <bool> | |
| Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 | False |
| Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 | False |
| Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 | False |
| Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 | False |
| Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 | False |
| Valiant | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 | False |
| Duster 360 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 | False |
| Merc 240D | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 | False |
| Merc 230 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 | False |
| Merc 280 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 | False |
| Merc 280C | 17.8 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 | False |
| Merc 450SE | 16.4 | 8 | 275.8 | 180 | 3.07 | 4.070 | 17.40 | 0 | 0 | 3 | 3 | False |
| Merc 450SL | 17.3 | 8 | 275.8 | 180 | 3.07 | 3.730 | 17.60 | 0 | 0 | 3 | 3 | False |
| Merc 450SLC | 15.2 | 8 | 275.8 | 180 | 3.07 | 3.780 | 18.00 | 0 | 0 | 3 | 3 | False |
| Cadillac Fleetwood | 10.4 | 8 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 | False |
| Lincoln Continental | 10.4 | 8 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 | False |
| Chrysler Imperial | 14.7 | 8 | 440.0 | 230 | 3.23 | 5.345 | 17.42 | 0 | 0 | 3 | 4 | False |
| Fiat 128 | 32.4 | 4 | 78.7 | 66 | 4.08 | 2.200 | 19.47 | 1 | 1 | 4 | 1 | False |
| Honda Civic | 30.4 | 4 | 75.7 | 52 | 4.93 | 1.615 | 18.52 | 1 | 1 | 4 | 2 | False |
| Toyota Corolla | 33.9 | 4 | 71.1 | 65 | 4.22 | 1.835 | 19.90 | 1 | 1 | 4 | 1 | False |
| Toyota Corona | 21.5 | 4 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 | False |
| Dodge Challenger | 15.5 | 8 | 318.0 | 150 | 2.76 | 3.520 | 16.87 | 0 | 0 | 3 | 2 | False |
| AMC Javelin | 15.2 | 8 | 304.0 | 150 | 3.15 | 3.435 | 17.30 | 0 | 0 | 3 | 2 | False |
| Camaro Z28 | 13.3 | 8 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 | False |
| Pontiac Firebird | 19.2 | 8 | 400.0 | 175 | 3.08 | 3.845 | 17.05 | 0 | 0 | 3 | 2 | False |
| Fiat X1-9 | 27.3 | 4 | 79.0 | 66 | 4.08 | 1.935 | 18.90 | 1 | 1 | 4 | 1 | False |
| Porsche 914-2 | 26.0 | 4 | 120.3 | 91 | 4.43 | 2.140 | 16.70 | 0 | 1 | 5 | 2 | False |
| Lotus Europa | 30.4 | 4 | 95.1 | 113 | 3.77 | 1.513 | 16.90 | 1 | 1 | 5 | 2 | False |
| Ford Pantera L | 15.8 | 8 | 351.0 | 264 | 4.22 | 3.170 | 14.50 | 0 | 1 | 5 | 4 | False |
| Ferrari Dino | 19.7 | 6 | 145.0 | 175 | 3.62 | 2.770 | 15.50 | 0 | 1 | 5 | 6 | False |
| Maserati Bora | 15.0 | 8 | 301.0 | 335 | 3.54 | 3.570 | 14.60 | 0 | 1 | 5 | 8 | False |
| Volvo 142E | 21.4 | 4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 | False |
mtcars >> filter(0 <= row_number() < 10)
| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| <float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | |
| Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
| Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
| Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
| Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
| Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
| Valiant | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
| Duster 360 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 |
| Merc 240D | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
| Merc 230 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 |