tibble

In [2]:

Copied!





# https://tibble.tidyverse.org/reference/tibble.html
# https://tibble.tidyverse.org/reference/tribble.html
%run nb_helpers.py

from datar import f
from datar.tibble import tibble, tibble_row, tribble
from datar.base import diag, runif
from datar.dplyr import mutate

nb_header(tibble, tibble_row, tribble)
# https://tibble.tidyverse.org/reference/tibble.html
# https://tibble.tidyverse.org/reference/tribble.html
%run nb_helpers.py

from datar import f
from datar.tibble import tibble, tibble_row, tribble
from datar.base import diag, runif
from datar.dplyr import mutate

nb_header(tibble, tibble_row, tribble)

Try this notebook on binder.

★ tibble
¶

Constructs a data frame¶

Args:¶

*args: and
**kwargs: A set of name-value pairs.
_name_repair: treatment of problematic column names:
- "minimal": No name repair or checks, beyond basic existence,

- "unique": Make sure names are unique and not empty,

- "check_unique": (default value), no name repair, but check they are unique,

- "universal": Make the names unique and syntactic

- a function: apply custom name repair

_rows: Number of rows of a 0-col dataframe when args and kwargs are
not provided. When args or kwargs are provided, this is ignored.

_dtypes: The dtypes for each columns to convert to.
_drop_index: Whether drop the index for the final data frame
_index: The new index of the output frame

Returns:¶

A constructed tibble

★ tibble_row
¶

Constructs a data frame that is guaranteed to occupy one row.¶

Scalar values will be wrapped with []

Args:¶

*args: and
**kwargs: A set of name-value pairs.
_name_repair: treatment of problematic column names:
- "minimal": No name repair or checks, beyond basic existence,

- "unique": Make sure names are unique and not empty,

- "check_unique": (default value), no name repair, but check they are unique,

- "universal": Make the names unique and syntactic

- a function: apply custom name repair

Returns:¶

A constructed dataframe

★ tribble
¶

Create dataframe using an easier to read row-by-row layout¶

Unlike original API that uses formula (f.col) to indicate the column
names, we use f.col to indicate them.

Args:¶

*dummies: Arguments specifying the structure of a dataframe
Variable names should be specified with f.name

_dtypes: The dtypes for each columns to convert to.

Returns:¶

A dataframe

In [3]:

Copied!

a = range(5)
tibble(a=a, b=f.a*2)
a = range(5)
tibble(a=a, b=f.a*2)

Out[3]:

	a	b
	<int64>	<int64>
0	0	0
1	1	2
2	2	4
3	3	6
4	4	8

In [4]:

Copied!

tibble(a=a, b=f.a * 2, c=1)
tibble(a=a, b=f.a * 2, c=1)

Out[4]:

	a	b	c
	<int64>	<int64>	<int64>
0	0	0	1
1	1	2	1
2	2	4	1
3	3	6	1
4	4	8	1

In [5]:

Copied!

tibble(x=runif(10), y=f.x*2)
tibble(x=runif(10), y=f.x*2)

Out[5]:

	x	y
	<float64>	<float64>
0	0.639511	1.279022
1	0.573888	1.147776
2	0.123471	0.246943
3	0.807206	1.614412
4	0.159120	0.318241
5	0.893697	1.787394
6	0.897584	1.795168
7	0.159780	0.319559
8	0.919717	1.839433
9	0.304561	0.609122

In [6]:

Copied!

x = 1
with try_catch():
    tibble(x, x)
x = 1
with try_catch():
    tibble(x, x)

[NameNonUniqueError] Names must be unique: 1

In [7]:

Copied!

tibble(x, x, _name_repair="unique")
tibble(x, x, _name_repair="unique")

[2022-12-02 14:48:49][datar][WARNING] New names:
[2022-12-02 14:48:49][datar][WARNING] * '1' -> '1__0'
[2022-12-02 14:48:49][datar][WARNING] * '1' -> '1__1'

Out[7]:

	1__0	1__1
	<int64>	<int64>
0	1	1

In [8]:

Copied!

tibble(x, x, _name_repair="minimal") # duplicated columns allowed
tibble(x, x, _name_repair="minimal") # duplicated columns allowed

Out[8]:

	1	1
	<int64>	<int64>
0	1	1

In [9]:

Copied!

a = 1
tibble(a * 1, a * 2, _name_repair="universal")
a = 1
tibble(a * 1, a * 2, _name_repair="universal")

[2022-12-02 14:49:00][datar][WARNING] New names:
[2022-12-02 14:49:00][datar][WARNING] * '1' -> '_1'
[2022-12-02 14:49:00][datar][WARNING] * '2' -> '_2'

Out[9]:

	_1	_2
	<int64>	<int64>
0	1	2

In [10]:

Copied!





from typing import Iterable
# use annotation to tell it's all names
# not only a single name
def make_unique(names: Iterable[str]): 
    new_names = []
    for name in names:
        name_count = new_names.count(name)
        if name_count == 0:
            new_names.append(name)
        else:
            new_names.append(f'{name}_{name_count}')
    return new_names

tibble(a, a, _name_repair=make_unique)
from typing import Iterable
# use annotation to tell it's all names
# not only a single name
def make_unique(names: Iterable[str]): 
    new_names = []
    for name in names:
        name_count = new_names.count(name)
        if name_count == 0:
            new_names.append(name)
        else:
            new_names.append(f'{name}_{name_count}')
    return new_names

tibble(a, a, _name_repair=make_unique)

Out[10]:

	1	1_1
	<int64>	<int64>
0	1	1

In [11]:

Copied!





# if not annotation specified
# assuming a single name
def fix_names(name):
    import re
    return re.sub(r'\s+', '_', name)


tibble(a + 1, a + 2, _name_repair = fix_names)
# if not annotation specified
# assuming a single name
def fix_names(name):
    import re
    return re.sub(r'\s+', '_', name)


tibble(a + 1, a + 2, _name_repair = fix_names)

Out[11]:

	2	3
	<int64>	<int64>
0	2	3

In [12]:

Copied!

tibble(x, x, _name_repair=["a", "b"])
tibble(x, x, _name_repair=["a", "b"])

Out[12]:

	a	b
	<int64>	<int64>
0	1	1

In [13]:

Copied!





tibble(
  tibble(
    b = [4,5,6],
    c = [7,8,9]
  ),
  a = range(3),
  d = f.b
)
tibble(
  tibble(
    b = [4,5,6],
    c = [7,8,9]
  ),
  a = range(3),
  d = f.b
)

Out[13]:

	b	c	a	d
	<int64>	<int64>	<int64>	<int64>
0	4	7	0	4
1	5	8	1	5
2	6	9	2	6

In [14]:

Copied!





s = tibble(diag(1, 4))
t = tibble(s.iloc[:, :2], _name_repair=['x', 'y'])
tibble(
  a=range(4),
  b=s,
  c=t
)
s = tibble(diag(1, 4))
t = tibble(s.iloc[:, :2], _name_repair=['x', 'y'])
tibble(
  a=range(4),
  b=s,
  c=t
)

Out[14]:

	a	b$0	b$1	b$2	b$3	c$x$0	c$x$1
	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>
0	0	1	0	0	0	1	0
1	1	0	1	0	0	0	1
2	2	0	0	1	0	0	0
3	3	0	0	0	1	0	0

In [15]:

Copied!

with try_catch():
    tibble(a=range(3), b=range(4))
with try_catch():
    tibble(a=range(3), b=range(4))

[ValueError] `b` must be size [1 3], not 4.

In [16]:

Copied!

tibble(_dotted = 3, _name_repair=lambda x: x.replace('_', '.'))
tibble(_dotted = 3, _name_repair=lambda x: x.replace('_', '.'))

Out[16]:

	.dotted
	<int64>
0	3

In [17]:

Copied!

x = 3
tibble(x=1, y=f.x)
x = 3
tibble(x=1, y=f.x)

Out[17]:

	x	y
	<int64>	<int64>
0	1	1

In [18]:

Copied!

tibble(x=1, y=x)
tibble(x=1, y=x)

Out[18]:

	x	y
	<int64>	<int64>
0	1	3

In [19]:

Copied!





tribble(
  f.colA, f.colB,
  "a",    1,
  "b",    2,
  "c",    3
)
tribble(
  f.colA, f.colB,
  "a",    1,
  "b",    2,
  "c",    3
)

Out[19]:

	colA	colB
	<object>	<int64>
0	a	1
1	b	2
2	c	3

In [20]:

Copied!





tribble(
  f.x,  f.y,
  "a",  [1,2,3],
  "b",  [4,5,6]
)
tribble(
  f.x,  f.y,
  "a",  [1,2,3],
  "b",  [4,5,6]
)

Out[20]:

	x	y
	<object>	<object>
0	a	[1, 2, 3]
1	b	[4, 5, 6]

In [21]:

Copied!

tibble_row(a=1, b=[[2,3]])
tibble_row(a=1, b=[[2,3]])

Out[21]:

	a	b
	<int64>	<object>
0	1	[2, 3]

In [22]:

Copied!

# inside a verb

tibble(x=1) >> mutate(y=tibble(y=f.x))
# inside a verb

tibble(x=1) >> mutate(y=tibble(y=f.x))

Out[22]:

	x	y$y
	<int64>	<int64>
0	1	1

tibble

★ tibble¶

Constructs a data frame¶

Args:¶

Returns:¶

★ tibble_row¶

Constructs a data frame that is guaranteed to occupy one row.¶

Args:¶

Returns:¶

★ tribble¶

Create dataframe using an easier to read row-by-row layout¶

Args:¶

Returns:¶

★ tibble
¶

★ tibble_row
¶

★ tribble
¶