mutate-joins
# https://dplyr.tidyverse.org/reference/mutate-joins.html
%run nb_helpers.py
from datar.data import band_members, band_instruments, band_instruments2
from datar.all import *
nb_header(inner_join, left_join, right_join, full_join, book='mutate-joins')
★ inner_join¶
Inner join two data frames by matching rows.¶
The original API:
https://dplyr.tidyverse.org/reference/join.html
Args:¶
x
: A data frame
y
: A data frame
by
: A list of column names to join by.
If None, use the intersection of the columns of x and y.
copy
: If True, always copy the data.
suffix
: A tuple of suffixes to apply to overlapping columns.
keep
: If True, keep the grouping variables in the output.
Returns:¶
A data frame
★ left_join¶
Left join two data frames by matching rows.¶
The original API:
https://dplyr.tidyverse.org/reference/join.html
Args:¶
x
: A data frame
y
: A data frame
by
: A list of column names to join by.
If None, use the intersection of the columns of x and y.
copy
: If True, always copy the data.
suffix
: A tuple of suffixes to apply to overlapping columns.
keep
: If True, keep the grouping variables in the output.
Returns:¶
A data frame
★ right_join¶
Right join two data frames by matching rows.¶
The original API:
https://dplyr.tidyverse.org/reference/join.html
Args:¶
x
: A data frame
y
: A data frame
by
: A list of column names to join by.
If None, use the intersection of the columns of x and y.
copy
: If True, always copy the data.
suffix
: A tuple of suffixes to apply to overlapping columns.
keep
: If True, keep the grouping variables in the output.
Returns:¶
A data frame
★ full_join¶
Full join two data frames by matching rows.¶
The original API:
https://dplyr.tidyverse.org/reference/join.html
Args:¶
x
: A data frame
y
: A data frame
by
: A list of column names to join by.
If None, use the intersection of the columns of x and y.
copy
: If True, always copy the data.
suffix
: A tuple of suffixes to apply to overlapping columns.
keep
: If True, keep the grouping variables in the output.
Returns:¶
A data frame
band_members >> inner_join(band_instruments)
name | band | plays | |
---|---|---|---|
<object> | <object> | <object> | |
0 | John | Beatles | guitar |
1 | Paul | Beatles | bass |
band_members >> left_join(band_instruments)
name | band | plays | |
---|---|---|---|
<object> | <object> | <object> | |
0 | Mick | Stones | NaN |
1 | John | Beatles | guitar |
2 | Paul | Beatles | bass |
band_members >> right_join(band_instruments)
name | band | plays | |
---|---|---|---|
<object> | <object> | <object> | |
0 | John | Beatles | guitar |
1 | Paul | Beatles | bass |
2 | Keith | NaN | guitar |
band_members >> full_join(band_instruments)
name | band | plays | |
---|---|---|---|
<object> | <object> | <object> | |
0 | Mick | Stones | NaN |
1 | John | Beatles | guitar |
2 | Paul | Beatles | bass |
3 | Keith | NaN | guitar |
band_members >> inner_join(band_instruments, by=f.name)
name | band | plays | |
---|---|---|---|
<object> | <object> | <object> | |
0 | John | Beatles | guitar |
1 | Paul | Beatles | bass |
band_members >> full_join(band_instruments2, by={'name': 'artist'})
name | band | plays | |
---|---|---|---|
<object> | <object> | <object> | |
0 | Mick | Stones | NaN |
1 | John | Beatles | guitar |
2 | Paul | Beatles | bass |
3 | NaN | NaN | guitar |
band_members >> full_join(band_instruments2, by={'name': 'artist'}, keep=True)
name | band | artist | plays | |
---|---|---|---|---|
<object> | <object> | <object> | <object> | |
0 | Mick | Stones | NaN | NaN |
1 | John | Beatles | John | guitar |
2 | Paul | Beatles | Paul | bass |
3 | NaN | NaN | Keith | guitar |
df1 = tibble(x=[1,2,3])
df2 = tibble(x=[1,1,2], y=["first", "second", "third"])
df1 >> left_join(df2)
x | y | |
---|---|---|
<int64> | <object> | |
0 | 1 | first |
1 | 1 | second |
2 | 2 | third |
3 | 3 | NaN |
df1 = tibble(x=[1, NA], y=2)
df2 = tibble(x=[1, NA], z=3)
left_join(df1, df2) # na_matches not supported yet
x | y | z | |
---|---|---|---|
<float64> | <int64> | <int64> | |
0 | 1.0 | 2 | 3 |
1 | NaN | 2 | 3 |