base
In [1]:
Copied!
%run nb_helpers.py
import numpy
from datar import f
from datar.base import *
from datar.tibble import tibble
debug_kwargs = {'prefix': '\n', 'sep': f'\n{"-" * 20}\n'}
%run nb_helpers.py
import numpy
from datar import f
from datar.base import *
from datar.tibble import tibble
debug_kwargs = {'prefix': '\n', 'sep': f'\n{"-" * 20}\n'}
In [2]:
Copied!
# constants
debug(
pi,
Inf,
letters,
LETTERS,
month_abb,
month_name,
**debug_kwargs
)
# constants
debug(
pi,
Inf,
letters,
LETTERS,
month_abb,
month_name,
**debug_kwargs
)
pi
--------------------
3.141592653589793
Inf
--------------------
inf
letters
--------------------
array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'],
dtype='<U1')
LETTERS
--------------------
array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'],
dtype='<U1')
month_abb
--------------------
array(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep',
'Oct', 'Nov', 'Dec'], dtype='<U3')
month_name
--------------------
array(['January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December'],
dtype='<U9')
In [5]:
Copied!
# verbs
df = tibble(x=1, y=2, z=3)
debug(
colnames(df),
colnames(df, ['a', 'b', 'c']),
rownames(df),
set_rownames(df, ['row1']),
dim(df),
nrow(df),
ncol(df),
diag(1, 3),
diag(diag(1, 3)),
t(df),
intersect([1,2], [2,3]),
setdiff([1,2], [2,3]),
union([1,2], [2,3]),
setequal([1,2], [2,1]),
duplicated([1,1,2,2]),
duplicated([1,1,2,2], from_last=True),
**debug_kwargs
)
# verbs
df = tibble(x=1, y=2, z=3)
debug(
colnames(df),
colnames(df, ['a', 'b', 'c']),
rownames(df),
set_rownames(df, ['row1']),
dim(df),
nrow(df),
ncol(df),
diag(1, 3),
diag(diag(1, 3)),
t(df),
intersect([1,2], [2,3]),
setdiff([1,2], [2,3]),
union([1,2], [2,3]),
setequal([1,2], [2,1]),
duplicated([1,1,2,2]),
duplicated([1,1,2,2], from_last=True),
**debug_kwargs
)
colnames(df)
--------------------
array(['x', 'y', 'z'], dtype=object)
colnames(df, ['a', 'b', 'c'])
--------------------
array(['x', 'y', 'z'], dtype=object)
rownames(df)
--------------------
array([0])
set_rownames(df, ['row1'])
--------------------
x y z
<int64> <int64> <int64>
row1 1 2 3
dim(df)
--------------------
(1, 3)
nrow(df)
--------------------
1
ncol(df)
--------------------
3
diag(1, 3)
--------------------
0 1 2
<int64> <int64> <int64>
0 1 0 0
1 0 1 0
2 0 0 1
diag(diag(1, 3))
--------------------
array([1, 1, 1])
t(df)
--------------------
0
<int64>
x 1
y 2
z 3
intersect([1,2], [2,3])
--------------------
array([2])
setdiff([1,2], [2,3])
--------------------
array([1])
union([1,2], [2,3])
--------------------
array([1, 2, 3])
setequal([1,2], [2,1])
--------------------
True
duplicated([1,1,2,2])
--------------------
array([False, True, False, True])
duplicated([1,1,2,2], from_last=True)
--------------------
array([ True, False, True, False])
In [6]:
Copied!
# bessel
debug(
bessel_i([1,2,3], 1),
bessel_j([1,2,3], 1),
bessel_k([1,2,3], 1),
bessel_y([1,2,3], 1),
**debug_kwargs
)
# bessel
debug(
bessel_i([1,2,3], 1),
bessel_j([1,2,3], 1),
bessel_k([1,2,3], 1),
bessel_y([1,2,3], 1),
**debug_kwargs
)
bessel_i([1,2,3], 1) -------------------- array([0.5651591 , 1.59063685, 3.95337022]) bessel_j([1,2,3], 1) -------------------- array([0.44005059, 0.57672481, 0.33905896]) bessel_k([1,2,3], 1) -------------------- array([0.60190723, 0.13986588, 0.04015643]) bessel_y([1,2,3], 1) -------------------- array([-0.78121282, -0.10703243, 0.32467442])
In [4]:
Copied!
# casting
debug(
as_integer([1, 2.1]),
as_double(['1', 2, 'nan']),
as_numeric(['1', 2, 'nan']),
**debug_kwargs
)
# casting
debug(
as_integer([1, 2.1]),
as_double(['1', 2, 'nan']),
as_numeric(['1', 2, 'nan']),
**debug_kwargs
)
as_integer([1, 2.1]) -------------------- array([1, 2]) as_double(['1', 2, 'nan']) -------------------- array([ 1., 2., nan]) as_numeric(['1', 2, 'nan']) -------------------- array([ 1., 2., nan])
In [5]:
Copied!
# complex numbers
cm = 1 + 2j
debug(
im(cm),
re(cm),
mod(cm),
conj(cm),
is_complex(cm),
as_complex(1),
**debug_kwargs
)
# complex numbers
cm = 1 + 2j
debug(
im(cm),
re(cm),
mod(cm),
conj(cm),
is_complex(cm),
as_complex(1),
**debug_kwargs
)
im(cm) -------------------- 2.0 re(cm) -------------------- 1.0 mod(cm) -------------------- 2.23606797749979 conj(cm) -------------------- (1-2j) is_complex(cm) -------------------- True as_complex(1) -------------------- (1+0j)
In [6]:
Copied!
# cum stats
a = [1,3,2,4]
debug(
cumsum(a),
cumprod(a),
cummin(a),
cummax(a),
**debug_kwargs
)
# cum stats
a = [1,3,2,4]
debug(
cumsum(a),
cumprod(a),
cummin(a),
cummax(a),
**debug_kwargs
)
cumsum(a) -------------------- array([ 1, 4, 6, 10]) cumprod(a) -------------------- array([ 1, 3, 6, 24]) cummin(a) -------------------- array([1, 1, 1, 1]) cummax(a) -------------------- array([1, 3, 3, 4])
In [9]:
Copied!
# date
debug(
as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], format="%d%b%Y"),
as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], format="%m/%d/%y"),
as_date(32768, origin="1900-01-01"),
**debug_kwargs
)
# date
debug(
as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], format="%d%b%Y"),
as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], format="%m/%d/%y"),
as_date(32768, origin="1900-01-01"),
**debug_kwargs
)
as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], format="%d%b%Y")
--------------------
array([datetime.date(1960, 1, 1), datetime.date(1960, 1, 2),
datetime.date(1960, 3, 31), datetime.date(1960, 7, 30)],
dtype=object)
as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], format="%m/%d/%y")
--------------------
array([datetime.date(1992, 2, 27), datetime.date(1992, 2, 27),
datetime.date(1992, 1, 14), datetime.date(1992, 2, 28),
datetime.date(1992, 2, 1)], dtype=object)
as_date(32768, origin="1900-01-01")
--------------------
datetime.date(1989, 9, 19)
In [10]:
Copied!
# factor
fct = factor([1,2,3], levels=[1,2,3,4])
debug(
levels(fct),
droplevels(fct),
is_factor(fct),
as_factor([1,2,3]),
**debug_kwargs
)
# factor
fct = factor([1,2,3], levels=[1,2,3,4])
debug(
levels(fct),
droplevels(fct),
is_factor(fct),
as_factor([1,2,3]),
**debug_kwargs
)
levels(fct) -------------------- array([1, 2, 3, 4]) droplevels(fct) -------------------- [1, 2, 3] Categories (3, int64): [1, 2, 3] is_factor(fct) -------------------- True as_factor([1,2,3]) -------------------- [1, 2, 3] Categories (3, int64): [1, 2, 3]
In [11]:
Copied!
# logical
debug(
TRUE,
FALSE,
is_true(TRUE),
not is_true([TRUE, TRUE]),
is_false(FALSE),
is_logical(TRUE),
is_logical([TRUE, FALSE]),
as_logical([0, 1]),
**debug_kwargs
)
# logical
debug(
TRUE,
FALSE,
is_true(TRUE),
not is_true([TRUE, TRUE]),
is_false(FALSE),
is_logical(TRUE),
is_logical([TRUE, FALSE]),
as_logical([0, 1]),
**debug_kwargs
)
TRUE -------------------- True FALSE -------------------- False is_true(TRUE) -------------------- True not is_true([TRUE, TRUE]) -------------------- True is_false(FALSE) -------------------- True is_logical(TRUE) -------------------- True is_logical([TRUE, FALSE]) -------------------- True as_logical([0, 1]) -------------------- array([False, True])
In [12]:
Copied!
# na
debug(
NA,
NaN,
NA is NaN,
type(NA),
is_na([NA, NaN, None]),
any_na([1,2, NA]),
numpy.array([1,2,NA]),
numpy.array(['a', 'b', NA]),
numpy.array(['a', 'b', None]),
**debug_kwargs
)
# na
debug(
NA,
NaN,
NA is NaN,
type(NA),
is_na([NA, NaN, None]),
any_na([1,2, NA]),
numpy.array([1,2,NA]),
numpy.array(['a', 'b', NA]),
numpy.array(['a', 'b', None]),
**debug_kwargs
)
NA -------------------- nan NaN -------------------- nan NA is NaN -------------------- True type(NA) -------------------- <class 'float'> is_na([NA, NaN, None]) -------------------- array([ True, True, True]) any_na([1,2, NA]) -------------------- True numpy.array([1,2,NA]) -------------------- array([ 1., 2., nan]) numpy.array(['a', 'b', NA]) -------------------- array(['a', 'b', 'nan'], dtype='<U32') numpy.array(['a', 'b', None]) -------------------- array(['a', 'b', None], dtype=object)
In [13]:
Copied!
# null
debug(
NULL,
NULL is None,
is_null(NA),
is_null(NULL),
as_null('whatever'),
**debug_kwargs
)
# null
debug(
NULL,
NULL is None,
is_null(NA),
is_null(NULL),
as_null('whatever'),
**debug_kwargs
)
NULL
--------------------
None
NULL is None
--------------------
True
is_null(NA)
--------------------
False
is_null(NULL)
--------------------
True
as_null('whatever')
--------------------
None
In [14]:
Copied!
# random
set_seed(8525)
sample(seq(1,100), 10) # always: [19, 61, 85, 93, 18, 5, 22, 7, 29, 26]
# random
set_seed(8525)
sample(seq(1,100), 10) # always: [19, 61, 85, 93, 18, 5, 22, 7, 29, 26]
Out[14]:
array([19, 61, 85, 93, 18, 5, 22, 7, 29, 26])
In [15]:
Copied!
# seq
debug(
str(c(c(1,2), seq(3,4))),
seq_len(5),
seq_along([4,2,1]),
rev(seq_len(5)),
rep([1,2], 2),
rep([1,2], each=2),
length([1,2]),
length("abcd"), # string is scalar
lengths(10),
lengths([[1], [1,2]]),
unique([3,3,2,4,4]), # order kept
**debug_kwargs
)
# seq
debug(
str(c(c(1,2), seq(3,4))),
seq_len(5),
seq_along([4,2,1]),
rev(seq_len(5)),
rep([1,2], 2),
rep([1,2], each=2),
length([1,2]),
length("abcd"), # string is scalar
lengths(10),
lengths([[1], [1,2]]),
unique([3,3,2,4,4]), # order kept
**debug_kwargs
)
str(c(c(1,2), seq(3,4)))
--------------------
'[1, 2, 3, 4]'
seq_len(5)
--------------------
array([1, 2, 3, 4, 5])
seq_along([4,2,1])
--------------------
array([1, 2, 3])
rev(seq_len(5))
--------------------
array([5, 4, 3, 2, 1])
rep([1,2], 2)
--------------------
array([1, 2, 1, 2])
rep([1,2], each=2)
--------------------
array([1, 1, 2, 2])
length([1,2])
--------------------
2
length("abcd")
--------------------
1
lengths(10)
--------------------
array([1])
lengths([[1], [1,2]])
--------------------
array([1, 2])
unique([3,3,2,4,4])
--------------------
array([3, 2, 4])
In [16]:
Copied!
# special
debug(
beta([1,2,3], 1),
lbeta([1,2,3], 1),
gamma([1,2,3]),
lgamma([1,2,3]),
digamma([1,2,3]),
trigamma([1,2,3]),
psigamma([1,2,3], 1),
choose([4,6], 2),
lchoose([4,6], 2),
factorial([1,5]),
lfactorial([1,5]),
**debug_kwargs
)
# special
debug(
beta([1,2,3], 1),
lbeta([1,2,3], 1),
gamma([1,2,3]),
lgamma([1,2,3]),
digamma([1,2,3]),
trigamma([1,2,3]),
psigamma([1,2,3], 1),
choose([4,6], 2),
lchoose([4,6], 2),
factorial([1,5]),
lfactorial([1,5]),
**debug_kwargs
)
beta([1,2,3], 1) -------------------- array([1. , 0.5 , 0.33333333]) lbeta([1,2,3], 1) -------------------- array([ 0. , -0.69314718, -1.09861229]) gamma([1,2,3]) -------------------- array([1., 1., 2.]) lgamma([1,2,3]) -------------------- array([0. , 0. , 0.69314718]) digamma([1,2,3]) -------------------- array([-0.57721566, 0.42278434, 0.92278434]) trigamma([1,2,3]) -------------------- array([1.64493407, 0.64493407, 0.39493407]) psigamma([1,2,3], 1) -------------------- array([1.64493407, 0.64493407, 0.39493407]) choose([4,6], 2) -------------------- array([ 6., 15.]) lchoose([4,6], 2) -------------------- array([1.79175947, 2.7080502 ]) factorial([1,5]) -------------------- array([ 1., 120.]) lfactorial([1,5]) -------------------- array([0. , 4.78749174])
In [18]:
Copied!
# string
debug(
is_character("a"),
is_character(["a", "b"]),
as_character([1,2]),
grep(".", ["ab", "c.d"]),
grep(".", ["ab", "c.d"], fixed=True),
grepl(".", ["ab", "c.d"], fixed=True),
sub(".", "x", ["ab", "c.d.e"]),
sub(".", "x", ["ab", "c.d.e"], fixed=True),
gsub(".", "x", ["ab", "c.d.e"]),
gsub(".", "x", ["ab", "c.d.e"], fixed=True),
nchar('\0'),
nchar('\0', type_='width'),
nzchar(["a", ""]),
paste("a", "b"),
paste(["a", "c"], ["b", "d"], sep="."),
paste0(["a", "c"], ["b", "d"], collapse="; "),
sprintf("%s, %d, %.3f", pi, pi, pi),
substr("abcde", 1, 3),
substring("abcde", 1),
strsplit(["a.b.c", "e.f"], ".", fixed=True),
**debug_kwargs
)
# string
debug(
is_character("a"),
is_character(["a", "b"]),
as_character([1,2]),
grep(".", ["ab", "c.d"]),
grep(".", ["ab", "c.d"], fixed=True),
grepl(".", ["ab", "c.d"], fixed=True),
sub(".", "x", ["ab", "c.d.e"]),
sub(".", "x", ["ab", "c.d.e"], fixed=True),
gsub(".", "x", ["ab", "c.d.e"]),
gsub(".", "x", ["ab", "c.d.e"], fixed=True),
nchar('\0'),
nchar('\0', type_='width'),
nzchar(["a", ""]),
paste("a", "b"),
paste(["a", "c"], ["b", "d"], sep="."),
paste0(["a", "c"], ["b", "d"], collapse="; "),
sprintf("%s, %d, %.3f", pi, pi, pi),
substr("abcde", 1, 3),
substring("abcde", 1),
strsplit(["a.b.c", "e.f"], ".", fixed=True),
**debug_kwargs
)
is_character("a")
--------------------
True
is_character(["a", "b"])
--------------------
True
as_character([1,2])
--------------------
array(['1', '2'], dtype='<U21')
grep(".", ["ab", "c.d"])
--------------------
array([0, 1])
grep(".", ["ab", "c.d"], fixed=True)
--------------------
array([1])
grepl(".", ["ab", "c.d"], fixed=True)
--------------------
array([False, True])
sub(".", "x", ["ab", "c.d.e"])
--------------------
array(['xb', 'x.d.e'], dtype='<U5')
sub(".", "x", ["ab", "c.d.e"], fixed=True)
--------------------
array(['ab', 'cxd.e'], dtype='<U5')
gsub(".", "x", ["ab", "c.d.e"])
--------------------
array(['xx', 'xxxxx'], dtype='<U5')
gsub(".", "x", ["ab", "c.d.e"], fixed=True)
--------------------
array(['ab', 'cxdxe'], dtype='<U5')
nchar('\0')
--------------------
array(0)
nchar('\0', type_='width')
--------------------
array(0)
nzchar(["a", ""])
--------------------
array([ True, False])
paste("a", "b")
--------------------
array('a b', dtype=object)
paste(["a", "c"], ["b", "d"], sep=".")
--------------------
array(['a.b', 'c.d'], dtype=object)
paste0(["a", "c"], ["b", "d"], collapse="; ")
--------------------
'ab; cd'
sprintf("%s, %d, %.3f", pi, pi, pi)
--------------------
array('3.141592653589793, 3, 3.142', dtype='<U27')
substr("abcde", 1, 3)
--------------------
array('bc', dtype='<U2')
substring("abcde", 1)
--------------------
array('bcde', dtype='<U4')
strsplit(["a.b.c", "e.f"], ".", fixed=True)
--------------------
array([list(['a', 'b', 'c']), list(['e', 'f'])], dtype=object)
In [19]:
Copied!
# table
table(rep(['a', 'b'], each=4))
# table
table(rep(['a', 'b'], each=4))
Out[19]:
| a | b | |
|---|---|---|
| <int64> | <int64> | |
| count | 4 | 4 |
In [21]:
Copied!
# testing
debug(
is_double(numpy.array([1,2])),
is_integer(numpy.array([1,2])),
is_numeric(numpy.array([1,2])),
is_double(numpy.array([1,2], dtype=numpy.double)),
is_atomic("abc"),
is_element(1, [1,2]),
**debug_kwargs
)
# testing
debug(
is_double(numpy.array([1,2])),
is_integer(numpy.array([1,2])),
is_numeric(numpy.array([1,2])),
is_double(numpy.array([1,2], dtype=numpy.double)),
is_atomic("abc"),
is_element(1, [1,2]),
**debug_kwargs
)
is_double(numpy.array([1,2]))
--------------------
False
is_integer(numpy.array([1,2]))
--------------------
True
is_numeric(numpy.array([1,2]))
--------------------
True
is_double(numpy.array([1,2], dtype=numpy.double))
--------------------
True
is_atomic("abc")
--------------------
True
is_element(1, [1,2])
--------------------
array(True)
In [22]:
Copied!
# trig/hb
debug(
cos(.5),
sin(.5),
tan(.5),
acos(.5),
asin(.5),
atan2(.5, 1),
cospi(.5),
sinpi(.5),
tanpi(.5),
cosh(.5),
sinh(.5),
tanh(.5),
acosh(1),
asinh(.5),
atanh(.5),
**debug_kwargs
)
# trig/hb
debug(
cos(.5),
sin(.5),
tan(.5),
acos(.5),
asin(.5),
atan2(.5, 1),
cospi(.5),
sinpi(.5),
tanpi(.5),
cosh(.5),
sinh(.5),
tanh(.5),
acosh(1),
asinh(.5),
atanh(.5),
**debug_kwargs
)
cos(.5) -------------------- 0.8775825618903728 sin(.5) -------------------- 0.479425538604203 tan(.5) -------------------- 0.5463024898437905 acos(.5) -------------------- 1.0471975511965979 asin(.5) -------------------- 0.5235987755982989 atan2(.5, 1) -------------------- 0.4636476090008061 cospi(.5) -------------------- 6.123233995736766e-17 sinpi(.5) -------------------- 1.0 tanpi(.5) -------------------- 1.633123935319537e+16 cosh(.5) -------------------- 1.1276259652063807 sinh(.5) -------------------- 0.5210953054937474 tanh(.5) -------------------- 0.46211715726000974 acosh(1) -------------------- 0.0 asinh(.5) -------------------- 0.48121182505960347 atanh(.5) -------------------- 0.5493061443340548
In [23]:
Copied!
# which
debug(
which([True, False, True]),
which_max([3,2,4,1]),
which_min([3,2,4,1]),
**debug_kwargs
)
# which
debug(
which([True, False, True]),
which_max([3,2,4,1]),
which_min([3,2,4,1]),
**debug_kwargs
)
which([True, False, True]) -------------------- array([0, 2]) which_max([3,2,4,1]) -------------------- 2 which_min([3,2,4,1]) -------------------- 3