mutate-joins
# https://dplyr.tidyverse.org/reference/mutate-joins.html
%run nb_helpers.py
from datar.data import band_members, band_instruments, band_instruments2
from datar.all import *
nb_header(inner_join, left_join, right_join, full_join, book='mutate-joins')
★ inner_join¶
Inner join two data frames by matching rows.¶
The original API:
https://dplyr.tidyverse.org/reference/join.html
Args:¶
  x: A data frame
  y: A data frame
  by: A list of column names to join by.
    If None, use the intersection of the columns of x and y.
  copy: If True, always copy the data.
  suffix: A tuple of suffixes to apply to overlapping columns.
  keep: If True, keep the grouping variables in the output.
Returns:¶
A data frame
★ left_join¶
Left join two data frames by matching rows.¶
The original API:
https://dplyr.tidyverse.org/reference/join.html
Args:¶
  x: A data frame
  y: A data frame
  by: A list of column names to join by.
    If None, use the intersection of the columns of x and y.
  copy: If True, always copy the data.
  suffix: A tuple of suffixes to apply to overlapping columns.
  keep: If True, keep the grouping variables in the output.
Returns:¶
A data frame
★ right_join¶
Right join two data frames by matching rows.¶
The original API:
https://dplyr.tidyverse.org/reference/join.html
Args:¶
  x: A data frame
  y: A data frame
  by: A list of column names to join by.
    If None, use the intersection of the columns of x and y.
  copy: If True, always copy the data.
  suffix: A tuple of suffixes to apply to overlapping columns.
  keep: If True, keep the grouping variables in the output.
Returns:¶
A data frame
★ full_join¶
Full join two data frames by matching rows.¶
The original API:
https://dplyr.tidyverse.org/reference/join.html
Args:¶
  x: A data frame
  y: A data frame
  by: A list of column names to join by.
    If None, use the intersection of the columns of x and y.
  copy: If True, always copy the data.
  suffix: A tuple of suffixes to apply to overlapping columns.
  keep: If True, keep the grouping variables in the output.
Returns:¶
A data frame
band_members >> inner_join(band_instruments)
| name | band | plays | |
|---|---|---|---|
| <object> | <object> | <object> | |
| 0 | John | Beatles | guitar | 
| 1 | Paul | Beatles | bass | 
band_members >> left_join(band_instruments)
| name | band | plays | |
|---|---|---|---|
| <object> | <object> | <object> | |
| 0 | Mick | Stones | NaN | 
| 1 | John | Beatles | guitar | 
| 2 | Paul | Beatles | bass | 
band_members >> right_join(band_instruments)
| name | band | plays | |
|---|---|---|---|
| <object> | <object> | <object> | |
| 0 | John | Beatles | guitar | 
| 1 | Paul | Beatles | bass | 
| 2 | Keith | NaN | guitar | 
band_members >> full_join(band_instruments)
| name | band | plays | |
|---|---|---|---|
| <object> | <object> | <object> | |
| 0 | Mick | Stones | NaN | 
| 1 | John | Beatles | guitar | 
| 2 | Paul | Beatles | bass | 
| 3 | Keith | NaN | guitar | 
band_members >> inner_join(band_instruments, by=f.name)
| name | band | plays | |
|---|---|---|---|
| <object> | <object> | <object> | |
| 0 | John | Beatles | guitar | 
| 1 | Paul | Beatles | bass | 
band_members >> full_join(band_instruments2, by={'name': 'artist'})
| name | band | plays | |
|---|---|---|---|
| <object> | <object> | <object> | |
| 0 | Mick | Stones | NaN | 
| 1 | John | Beatles | guitar | 
| 2 | Paul | Beatles | bass | 
| 3 | NaN | NaN | guitar | 
band_members >> full_join(band_instruments2, by={'name': 'artist'}, keep=True)
| name | band | artist | plays | |
|---|---|---|---|---|
| <object> | <object> | <object> | <object> | |
| 0 | Mick | Stones | NaN | NaN | 
| 1 | John | Beatles | John | guitar | 
| 2 | Paul | Beatles | Paul | bass | 
| 3 | NaN | NaN | Keith | guitar | 
df1 = tibble(x=[1,2,3])
df2 = tibble(x=[1,1,2], y=["first", "second", "third"])
df1 >> left_join(df2)
| x | y | |
|---|---|---|
| <int64> | <object> | |
| 0 | 1 | first | 
| 1 | 1 | second | 
| 2 | 2 | third | 
| 3 | 3 | NaN | 
df1 = tibble(x=[1, NA], y=2)
df2 = tibble(x=[1, NA], z=3)
left_join(df1, df2) # na_matches not supported yet
| x | y | z | |
|---|---|---|---|
| <float64> | <int64> | <int64> | |
| 0 | 1.0 | 2 | 3 | 
| 1 | NaN | 2 | 3 |