lead-lag
In [1]:
Copied!
# https://dplyr.tidyverse.org/reference/lead-lag.html
%run nb_helpers.py
from datar.all import *
nb_header(lead, lag, book='lead-lag')
# https://dplyr.tidyverse.org/reference/lead-lag.html
%run nb_helpers.py
from datar.all import *
nb_header(lead, lag, book='lead-lag')
Try this notebook on binder.
★ lead¶
Shift a vector by n
positions.¶
The original API:
https://dplyr.tidyverse.org/reference/lead.html
Args:¶
x
: A vector
n
: The number of positions to shift.
default
: The default value to use for positions that don't exist.
order_by
: A vector of column names to order by.
Returns:¶
A vector
★ lag¶
Shift a vector by n
positions.¶
The original API:
https://dplyr.tidyverse.org/reference/lag.html
Args:¶
x
: A vector
n
: The number of positions to shift.
default
: The default value to use for positions that don't exist.
order_by
: A vector of column names to order by.
Returns:¶
A vector
In [2]:
Copied!
x = [1,2,3,4,5]
lag(x)
x = [1,2,3,4,5]
lag(x)
Out[2]:
0 NaN 1 1.0 2 2.0 3 3.0 4 4.0 dtype: float64
In [3]:
Copied!
lead(x)
lead(x)
Out[3]:
0 2.0 1 3.0 2 4.0 3 5.0 4 NaN dtype: float64
In [4]:
Copied!
tibble(behind=lag(x), x=x, ahead=lead(x))
tibble(behind=lag(x), x=x, ahead=lead(x))
Out[4]:
behind | x | ahead | |
---|---|---|---|
<float64> | <int64> | <float64> | |
0 | NaN | 1 | 2.0 |
1 | 1.0 | 2 | 3.0 |
2 | 2.0 | 3 | 4.0 |
3 | 3.0 | 4 | 5.0 |
4 | 4.0 | 5 | NaN |
In [5]:
Copied!
lag(x, n=1)
lag(x, n=1)
Out[5]:
0 NaN 1 1.0 2 2.0 3 3.0 4 4.0 dtype: float64
In [6]:
Copied!
lag(x, n=2)
lag(x, n=2)
Out[6]:
0 NaN 1 NaN 2 1.0 3 2.0 4 3.0 dtype: float64
In [7]:
Copied!
lead(x, n=1)
lead(x, n=1)
Out[7]:
0 2.0 1 3.0 2 4.0 3 5.0 4 NaN dtype: float64
In [8]:
Copied!
lead(x, n=2)
lead(x, n=2)
Out[8]:
0 3.0 1 4.0 2 5.0 3 NaN 4 NaN dtype: float64
In [9]:
Copied!
lag(x, default=0)
lag(x, default=0)
Out[9]:
0 0 1 1 2 2 3 3 4 4 dtype: int64
In [10]:
Copied!
lead(x, default=6)
lead(x, default=6)
Out[10]:
0 2 1 3 2 4 3 5 4 6 dtype: int64
In [11]:
Copied!
scrambled = slice_sample(
tibble(year=[2000, 2001, 2002, 2003, 2004, 2005],
value=[a**2 for a in range(6)]),
prop=1
)
scrambled >> mutate(previous_year_value = lag(f.value)) >> arrange(f.year)
scrambled = slice_sample(
tibble(year=[2000, 2001, 2002, 2003, 2004, 2005],
value=[a**2 for a in range(6)]),
prop=1
)
scrambled >> mutate(previous_year_value = lag(f.value)) >> arrange(f.year)
Out[11]:
year | value | previous_year_value | |
---|---|---|---|
<int64> | <int64> | <float64> | |
0 | 2000 | 0 | 1.0 |
1 | 2001 | 1 | 9.0 |
2 | 2002 | 4 | NaN |
3 | 2003 | 9 | 4.0 |
4 | 2004 | 16 | 0.0 |
5 | 2005 | 25 | 16.0 |
In [12]:
Copied!
# Use this for lag(value, order_by = year) instead
scrambled >> mutate(previous_year_value = lag(f.value, order_by=f.year)) >> arrange(f.year)
# Use this for lag(value, order_by = year) instead
scrambled >> mutate(previous_year_value = lag(f.value, order_by=f.year)) >> arrange(f.year)
Out[12]:
year | value | previous_year_value | |
---|---|---|---|
<int64> | <int64> | <float64> | |
0 | 2000 | 0 | 0.0 |
1 | 2001 | 1 | NaN |
2 | 2002 | 4 | 4.0 |
3 | 2003 | 9 | 1.0 |
4 | 2004 | 16 | 9.0 |
5 | 2005 | 25 | 16.0 |
In [ ]:
Copied!