ranking
# https://dplyr.tidyverse.org/reference/ranking.html
%run nb_helpers.py
import numpy
from datar.data import mtcars
from datar.all import *
nb_header(row_number, min_rank, dense_rank, percent_rank, cume_dist, ntile)
★ row_number¶
★ min_rank¶
Get the min rank of x¶
Note that this function doesn't support piping.
Args:¶
x
: The data to get row number
Defaults to Symbolic()
so the whole data is used by default
when called min_rank()
na_last
: How NA values are ranked
- "keep": NA values are ranked at the end
- "top": NA values are ranked at the top
- "bottom": NA values are ranked at the bottom
Returns:¶
The row number
★ dense_rank¶
Get the dense rank of x¶
Note that this function doesn't support piping.
Args:¶
x
: The data to get row number
Defaults to Symbolic()
so the whole data is used by default
when called dense_rank()
na_last
: How NA values are ranked
- "keep": NA values are ranked at the end
- "top": NA values are ranked at the top
- "bottom": NA values are ranked at the bottom
Returns:¶
The row number
★ percent_rank¶
Get the percent rank of x¶
Note that this function doesn't support piping.
Args:¶
x
: The data to get row number
Defaults to Symbolic()
so the whole data is used by default
when called percent_rank()
na_last
: How NA values are ranked
- "keep": NA values are ranked at the end
- "top": NA values are ranked at the top
- "bottom": NA values are ranked at the bottom
Returns:¶
The row number
★ cume_dist¶
Get the cume_dist of x¶
Note that this function doesn't support piping.
Args:¶
x
: The data to get row number
Defaults to Symbolic()
so the whole data is used by default
when called cume_dist()
na_last
: How NA values are ranked
- "keep": NA values are ranked at the end
- "top": NA values are ranked at the top
- "bottom": NA values are ranked at the bottom
Returns:¶
The row number
★ ntile¶
a rough rank, which breaks the input vector into n buckets.¶
The size of the buckets may differ by up to one, larger buckets
have lower rank.
Note that this function doesn't support piping.
Args:¶
x
: The data to get rownumber
Defaults to Symbolic()
so the whole data is used by default
when called ntile(n=...)
n
: The number of groups to divide the data into
Returns:¶
The row number
df = tibble(x=c(5, 1, 3, 2, 2, numpy.nan))
df >> mutate(
row_number=row_number(),
min_rank=min_rank(f.x),
dense_rank=dense_rank(f.x),
percent_rank=percent_rank(f.x),
cume_dist=cume_dist(f.x),
ntile=ntile(f.x, n=2)
)
x | row_number | min_rank | dense_rank | percent_rank | cume_dist | ntile | |
---|---|---|---|---|---|---|---|
<float64> | <float64> | <float64> | <float64> | <float64> | <float64> | <category> | |
0 | 5.0 | 1.0 | 5.0 | 4.0 | 1.00 | 1.0 | 2 |
1 | 1.0 | 2.0 | 1.0 | 1.0 | 0.00 | 0.2 | 1 |
2 | 3.0 | 3.0 | 4.0 | 3.0 | 0.75 | 0.8 | 1 |
3 | 2.0 | 4.0 | 2.0 | 2.0 | 0.25 | 0.6 | 1 |
4 | 2.0 | 5.0 | 2.0 | 2.0 | 0.25 | 0.6 | 1 |
5 | NaN | 6.0 | NaN | NaN | NaN | NaN | NaN |
tibble(x=range(8)) >> mutate(ntile=ntile(f.x, n=3))
x | ntile | |
---|---|---|
<int64> | <category> | |
0 | 0 | 1 |
1 | 1 | 1 |
2 | 2 | 1 |
3 | 3 | 2 |
4 | 4 | 2 |
5 | 5 | 3 |
6 | 6 | 3 |
7 | 7 | 3 |
mtcars >> mutate(n=row_number() == 0)
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | n | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
<float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | <bool> | |
Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 | False |
Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 | False |
Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 | False |
Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 | False |
Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 | False |
Valiant | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 | False |
Duster 360 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 | False |
Merc 240D | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 | False |
Merc 230 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 | False |
Merc 280 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 | False |
Merc 280C | 17.8 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 | False |
Merc 450SE | 16.4 | 8 | 275.8 | 180 | 3.07 | 4.070 | 17.40 | 0 | 0 | 3 | 3 | False |
Merc 450SL | 17.3 | 8 | 275.8 | 180 | 3.07 | 3.730 | 17.60 | 0 | 0 | 3 | 3 | False |
Merc 450SLC | 15.2 | 8 | 275.8 | 180 | 3.07 | 3.780 | 18.00 | 0 | 0 | 3 | 3 | False |
Cadillac Fleetwood | 10.4 | 8 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 | False |
Lincoln Continental | 10.4 | 8 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 | False |
Chrysler Imperial | 14.7 | 8 | 440.0 | 230 | 3.23 | 5.345 | 17.42 | 0 | 0 | 3 | 4 | False |
Fiat 128 | 32.4 | 4 | 78.7 | 66 | 4.08 | 2.200 | 19.47 | 1 | 1 | 4 | 1 | False |
Honda Civic | 30.4 | 4 | 75.7 | 52 | 4.93 | 1.615 | 18.52 | 1 | 1 | 4 | 2 | False |
Toyota Corolla | 33.9 | 4 | 71.1 | 65 | 4.22 | 1.835 | 19.90 | 1 | 1 | 4 | 1 | False |
Toyota Corona | 21.5 | 4 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 | False |
Dodge Challenger | 15.5 | 8 | 318.0 | 150 | 2.76 | 3.520 | 16.87 | 0 | 0 | 3 | 2 | False |
AMC Javelin | 15.2 | 8 | 304.0 | 150 | 3.15 | 3.435 | 17.30 | 0 | 0 | 3 | 2 | False |
Camaro Z28 | 13.3 | 8 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 | False |
Pontiac Firebird | 19.2 | 8 | 400.0 | 175 | 3.08 | 3.845 | 17.05 | 0 | 0 | 3 | 2 | False |
Fiat X1-9 | 27.3 | 4 | 79.0 | 66 | 4.08 | 1.935 | 18.90 | 1 | 1 | 4 | 1 | False |
Porsche 914-2 | 26.0 | 4 | 120.3 | 91 | 4.43 | 2.140 | 16.70 | 0 | 1 | 5 | 2 | False |
Lotus Europa | 30.4 | 4 | 95.1 | 113 | 3.77 | 1.513 | 16.90 | 1 | 1 | 5 | 2 | False |
Ford Pantera L | 15.8 | 8 | 351.0 | 264 | 4.22 | 3.170 | 14.50 | 0 | 1 | 5 | 4 | False |
Ferrari Dino | 19.7 | 6 | 145.0 | 175 | 3.62 | 2.770 | 15.50 | 0 | 1 | 5 | 6 | False |
Maserati Bora | 15.0 | 8 | 301.0 | 335 | 3.54 | 3.570 | 14.60 | 0 | 1 | 5 | 8 | False |
Volvo 142E | 21.4 | 4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 | False |
mtcars >> filter(0 <= row_number() < 10)
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
---|---|---|---|---|---|---|---|---|---|---|---|
<float64> | <int64> | <float64> | <int64> | <float64> | <float64> | <float64> | <int64> | <int64> | <int64> | <int64> | |
Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
Valiant | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
Duster 360 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 |
Merc 240D | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
Merc 230 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 |