base
In [1]:
Copied!
%run nb_helpers.py
import numpy
from datar import f
from datar.base import *
from datar.tibble import tibble
debug_kwargs = {'prefix': '\n', 'sep': f'\n{"-" * 20}\n'}
%run nb_helpers.py
import numpy
from datar import f
from datar.base import *
from datar.tibble import tibble
debug_kwargs = {'prefix': '\n', 'sep': f'\n{"-" * 20}\n'}
In [2]:
Copied!
# constants
debug(
pi,
Inf,
letters,
LETTERS,
month_abb,
month_name,
**debug_kwargs
)
# constants
debug(
pi,
Inf,
letters,
LETTERS,
month_abb,
month_name,
**debug_kwargs
)
pi -------------------- 3.141592653589793 Inf -------------------- inf letters -------------------- array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'], dtype='<U1') LETTERS -------------------- array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'], dtype='<U1') month_abb -------------------- array(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], dtype='<U3') month_name -------------------- array(['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'], dtype='<U9')
In [5]:
Copied!
# verbs
df = tibble(x=1, y=2, z=3)
debug(
colnames(df),
colnames(df, ['a', 'b', 'c']),
rownames(df),
set_rownames(df, ['row1']),
dim(df),
nrow(df),
ncol(df),
diag(1, 3),
diag(diag(1, 3)),
t(df),
intersect([1,2], [2,3]),
setdiff([1,2], [2,3]),
union([1,2], [2,3]),
setequal([1,2], [2,1]),
duplicated([1,1,2,2]),
duplicated([1,1,2,2], from_last=True),
**debug_kwargs
)
# verbs
df = tibble(x=1, y=2, z=3)
debug(
colnames(df),
colnames(df, ['a', 'b', 'c']),
rownames(df),
set_rownames(df, ['row1']),
dim(df),
nrow(df),
ncol(df),
diag(1, 3),
diag(diag(1, 3)),
t(df),
intersect([1,2], [2,3]),
setdiff([1,2], [2,3]),
union([1,2], [2,3]),
setequal([1,2], [2,1]),
duplicated([1,1,2,2]),
duplicated([1,1,2,2], from_last=True),
**debug_kwargs
)
colnames(df) -------------------- array(['x', 'y', 'z'], dtype=object) colnames(df, ['a', 'b', 'c']) -------------------- array(['x', 'y', 'z'], dtype=object) rownames(df) -------------------- array([0]) set_rownames(df, ['row1']) -------------------- x y z <int64> <int64> <int64> row1 1 2 3 dim(df) -------------------- (1, 3) nrow(df) -------------------- 1 ncol(df) -------------------- 3 diag(1, 3) -------------------- 0 1 2 <int64> <int64> <int64> 0 1 0 0 1 0 1 0 2 0 0 1 diag(diag(1, 3)) -------------------- array([1, 1, 1]) t(df) -------------------- 0 <int64> x 1 y 2 z 3 intersect([1,2], [2,3]) -------------------- array([2]) setdiff([1,2], [2,3]) -------------------- array([1]) union([1,2], [2,3]) -------------------- array([1, 2, 3]) setequal([1,2], [2,1]) -------------------- True duplicated([1,1,2,2]) -------------------- array([False, True, False, True]) duplicated([1,1,2,2], from_last=True) -------------------- array([ True, False, True, False])
In [6]:
Copied!
# bessel
debug(
bessel_i([1,2,3], 1),
bessel_j([1,2,3], 1),
bessel_k([1,2,3], 1),
bessel_y([1,2,3], 1),
**debug_kwargs
)
# bessel
debug(
bessel_i([1,2,3], 1),
bessel_j([1,2,3], 1),
bessel_k([1,2,3], 1),
bessel_y([1,2,3], 1),
**debug_kwargs
)
bessel_i([1,2,3], 1) -------------------- array([0.5651591 , 1.59063685, 3.95337022]) bessel_j([1,2,3], 1) -------------------- array([0.44005059, 0.57672481, 0.33905896]) bessel_k([1,2,3], 1) -------------------- array([0.60190723, 0.13986588, 0.04015643]) bessel_y([1,2,3], 1) -------------------- array([-0.78121282, -0.10703243, 0.32467442])
In [4]:
Copied!
# casting
debug(
as_integer([1, 2.1]),
as_double(['1', 2, 'nan']),
as_numeric(['1', 2, 'nan']),
**debug_kwargs
)
# casting
debug(
as_integer([1, 2.1]),
as_double(['1', 2, 'nan']),
as_numeric(['1', 2, 'nan']),
**debug_kwargs
)
as_integer([1, 2.1]) -------------------- array([1, 2]) as_double(['1', 2, 'nan']) -------------------- array([ 1., 2., nan]) as_numeric(['1', 2, 'nan']) -------------------- array([ 1., 2., nan])
In [5]:
Copied!
# complex numbers
cm = 1 + 2j
debug(
im(cm),
re(cm),
mod(cm),
conj(cm),
is_complex(cm),
as_complex(1),
**debug_kwargs
)
# complex numbers
cm = 1 + 2j
debug(
im(cm),
re(cm),
mod(cm),
conj(cm),
is_complex(cm),
as_complex(1),
**debug_kwargs
)
im(cm) -------------------- 2.0 re(cm) -------------------- 1.0 mod(cm) -------------------- 2.23606797749979 conj(cm) -------------------- (1-2j) is_complex(cm) -------------------- True as_complex(1) -------------------- (1+0j)
In [6]:
Copied!
# cum stats
a = [1,3,2,4]
debug(
cumsum(a),
cumprod(a),
cummin(a),
cummax(a),
**debug_kwargs
)
# cum stats
a = [1,3,2,4]
debug(
cumsum(a),
cumprod(a),
cummin(a),
cummax(a),
**debug_kwargs
)
cumsum(a) -------------------- array([ 1, 4, 6, 10]) cumprod(a) -------------------- array([ 1, 3, 6, 24]) cummin(a) -------------------- array([1, 1, 1, 1]) cummax(a) -------------------- array([1, 3, 3, 4])
In [9]:
Copied!
# date
debug(
as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], format="%d%b%Y"),
as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], format="%m/%d/%y"),
as_date(32768, origin="1900-01-01"),
**debug_kwargs
)
# date
debug(
as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], format="%d%b%Y"),
as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], format="%m/%d/%y"),
as_date(32768, origin="1900-01-01"),
**debug_kwargs
)
as_date(["1jan1960", "2jan1960", "31mar1960", "30jul1960"], format="%d%b%Y") -------------------- array([datetime.date(1960, 1, 1), datetime.date(1960, 1, 2), datetime.date(1960, 3, 31), datetime.date(1960, 7, 30)], dtype=object) as_date(["02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92"], format="%m/%d/%y") -------------------- array([datetime.date(1992, 2, 27), datetime.date(1992, 2, 27), datetime.date(1992, 1, 14), datetime.date(1992, 2, 28), datetime.date(1992, 2, 1)], dtype=object) as_date(32768, origin="1900-01-01") -------------------- datetime.date(1989, 9, 19)
In [10]:
Copied!
# factor
fct = factor([1,2,3], levels=[1,2,3,4])
debug(
levels(fct),
droplevels(fct),
is_factor(fct),
as_factor([1,2,3]),
**debug_kwargs
)
# factor
fct = factor([1,2,3], levels=[1,2,3,4])
debug(
levels(fct),
droplevels(fct),
is_factor(fct),
as_factor([1,2,3]),
**debug_kwargs
)
levels(fct) -------------------- array([1, 2, 3, 4]) droplevels(fct) -------------------- [1, 2, 3] Categories (3, int64): [1, 2, 3] is_factor(fct) -------------------- True as_factor([1,2,3]) -------------------- [1, 2, 3] Categories (3, int64): [1, 2, 3]
In [11]:
Copied!
# logical
debug(
TRUE,
FALSE,
is_true(TRUE),
not is_true([TRUE, TRUE]),
is_false(FALSE),
is_logical(TRUE),
is_logical([TRUE, FALSE]),
as_logical([0, 1]),
**debug_kwargs
)
# logical
debug(
TRUE,
FALSE,
is_true(TRUE),
not is_true([TRUE, TRUE]),
is_false(FALSE),
is_logical(TRUE),
is_logical([TRUE, FALSE]),
as_logical([0, 1]),
**debug_kwargs
)
TRUE -------------------- True FALSE -------------------- False is_true(TRUE) -------------------- True not is_true([TRUE, TRUE]) -------------------- True is_false(FALSE) -------------------- True is_logical(TRUE) -------------------- True is_logical([TRUE, FALSE]) -------------------- True as_logical([0, 1]) -------------------- array([False, True])
In [12]:
Copied!
# na
debug(
NA,
NaN,
NA is NaN,
type(NA),
is_na([NA, NaN, None]),
any_na([1,2, NA]),
numpy.array([1,2,NA]),
numpy.array(['a', 'b', NA]),
numpy.array(['a', 'b', None]),
**debug_kwargs
)
# na
debug(
NA,
NaN,
NA is NaN,
type(NA),
is_na([NA, NaN, None]),
any_na([1,2, NA]),
numpy.array([1,2,NA]),
numpy.array(['a', 'b', NA]),
numpy.array(['a', 'b', None]),
**debug_kwargs
)
NA -------------------- nan NaN -------------------- nan NA is NaN -------------------- True type(NA) -------------------- <class 'float'> is_na([NA, NaN, None]) -------------------- array([ True, True, True]) any_na([1,2, NA]) -------------------- True numpy.array([1,2,NA]) -------------------- array([ 1., 2., nan]) numpy.array(['a', 'b', NA]) -------------------- array(['a', 'b', 'nan'], dtype='<U32') numpy.array(['a', 'b', None]) -------------------- array(['a', 'b', None], dtype=object)
In [13]:
Copied!
# null
debug(
NULL,
NULL is None,
is_null(NA),
is_null(NULL),
as_null('whatever'),
**debug_kwargs
)
# null
debug(
NULL,
NULL is None,
is_null(NA),
is_null(NULL),
as_null('whatever'),
**debug_kwargs
)
NULL -------------------- None NULL is None -------------------- True is_null(NA) -------------------- False is_null(NULL) -------------------- True as_null('whatever') -------------------- None
In [14]:
Copied!
# random
set_seed(8525)
sample(seq(1,100), 10) # always: [19, 61, 85, 93, 18, 5, 22, 7, 29, 26]
# random
set_seed(8525)
sample(seq(1,100), 10) # always: [19, 61, 85, 93, 18, 5, 22, 7, 29, 26]
Out[14]:
array([19, 61, 85, 93, 18, 5, 22, 7, 29, 26])
In [15]:
Copied!
# seq
debug(
str(c(c(1,2), seq(3,4))),
seq_len(5),
seq_along([4,2,1]),
rev(seq_len(5)),
rep([1,2], 2),
rep([1,2], each=2),
length([1,2]),
length("abcd"), # string is scalar
lengths(10),
lengths([[1], [1,2]]),
unique([3,3,2,4,4]), # order kept
**debug_kwargs
)
# seq
debug(
str(c(c(1,2), seq(3,4))),
seq_len(5),
seq_along([4,2,1]),
rev(seq_len(5)),
rep([1,2], 2),
rep([1,2], each=2),
length([1,2]),
length("abcd"), # string is scalar
lengths(10),
lengths([[1], [1,2]]),
unique([3,3,2,4,4]), # order kept
**debug_kwargs
)
str(c(c(1,2), seq(3,4))) -------------------- '[1, 2, 3, 4]' seq_len(5) -------------------- array([1, 2, 3, 4, 5]) seq_along([4,2,1]) -------------------- array([1, 2, 3]) rev(seq_len(5)) -------------------- array([5, 4, 3, 2, 1]) rep([1,2], 2) -------------------- array([1, 2, 1, 2]) rep([1,2], each=2) -------------------- array([1, 1, 2, 2]) length([1,2]) -------------------- 2 length("abcd") -------------------- 1 lengths(10) -------------------- array([1]) lengths([[1], [1,2]]) -------------------- array([1, 2]) unique([3,3,2,4,4]) -------------------- array([3, 2, 4])
In [16]:
Copied!
# special
debug(
beta([1,2,3], 1),
lbeta([1,2,3], 1),
gamma([1,2,3]),
lgamma([1,2,3]),
digamma([1,2,3]),
trigamma([1,2,3]),
psigamma([1,2,3], 1),
choose([4,6], 2),
lchoose([4,6], 2),
factorial([1,5]),
lfactorial([1,5]),
**debug_kwargs
)
# special
debug(
beta([1,2,3], 1),
lbeta([1,2,3], 1),
gamma([1,2,3]),
lgamma([1,2,3]),
digamma([1,2,3]),
trigamma([1,2,3]),
psigamma([1,2,3], 1),
choose([4,6], 2),
lchoose([4,6], 2),
factorial([1,5]),
lfactorial([1,5]),
**debug_kwargs
)
beta([1,2,3], 1) -------------------- array([1. , 0.5 , 0.33333333]) lbeta([1,2,3], 1) -------------------- array([ 0. , -0.69314718, -1.09861229]) gamma([1,2,3]) -------------------- array([1., 1., 2.]) lgamma([1,2,3]) -------------------- array([0. , 0. , 0.69314718]) digamma([1,2,3]) -------------------- array([-0.57721566, 0.42278434, 0.92278434]) trigamma([1,2,3]) -------------------- array([1.64493407, 0.64493407, 0.39493407]) psigamma([1,2,3], 1) -------------------- array([1.64493407, 0.64493407, 0.39493407]) choose([4,6], 2) -------------------- array([ 6., 15.]) lchoose([4,6], 2) -------------------- array([1.79175947, 2.7080502 ]) factorial([1,5]) -------------------- array([ 1., 120.]) lfactorial([1,5]) -------------------- array([0. , 4.78749174])
In [18]:
Copied!
# string
debug(
is_character("a"),
is_character(["a", "b"]),
as_character([1,2]),
grep(".", ["ab", "c.d"]),
grep(".", ["ab", "c.d"], fixed=True),
grepl(".", ["ab", "c.d"], fixed=True),
sub(".", "x", ["ab", "c.d.e"]),
sub(".", "x", ["ab", "c.d.e"], fixed=True),
gsub(".", "x", ["ab", "c.d.e"]),
gsub(".", "x", ["ab", "c.d.e"], fixed=True),
nchar('\0'),
nchar('\0', type_='width'),
nzchar(["a", ""]),
paste("a", "b"),
paste(["a", "c"], ["b", "d"], sep="."),
paste0(["a", "c"], ["b", "d"], collapse="; "),
sprintf("%s, %d, %.3f", pi, pi, pi),
substr("abcde", 1, 3),
substring("abcde", 1),
strsplit(["a.b.c", "e.f"], ".", fixed=True),
**debug_kwargs
)
# string
debug(
is_character("a"),
is_character(["a", "b"]),
as_character([1,2]),
grep(".", ["ab", "c.d"]),
grep(".", ["ab", "c.d"], fixed=True),
grepl(".", ["ab", "c.d"], fixed=True),
sub(".", "x", ["ab", "c.d.e"]),
sub(".", "x", ["ab", "c.d.e"], fixed=True),
gsub(".", "x", ["ab", "c.d.e"]),
gsub(".", "x", ["ab", "c.d.e"], fixed=True),
nchar('\0'),
nchar('\0', type_='width'),
nzchar(["a", ""]),
paste("a", "b"),
paste(["a", "c"], ["b", "d"], sep="."),
paste0(["a", "c"], ["b", "d"], collapse="; "),
sprintf("%s, %d, %.3f", pi, pi, pi),
substr("abcde", 1, 3),
substring("abcde", 1),
strsplit(["a.b.c", "e.f"], ".", fixed=True),
**debug_kwargs
)
is_character("a") -------------------- True is_character(["a", "b"]) -------------------- True as_character([1,2]) -------------------- array(['1', '2'], dtype='<U21') grep(".", ["ab", "c.d"]) -------------------- array([0, 1]) grep(".", ["ab", "c.d"], fixed=True) -------------------- array([1]) grepl(".", ["ab", "c.d"], fixed=True) -------------------- array([False, True]) sub(".", "x", ["ab", "c.d.e"]) -------------------- array(['xb', 'x.d.e'], dtype='<U5') sub(".", "x", ["ab", "c.d.e"], fixed=True) -------------------- array(['ab', 'cxd.e'], dtype='<U5') gsub(".", "x", ["ab", "c.d.e"]) -------------------- array(['xx', 'xxxxx'], dtype='<U5') gsub(".", "x", ["ab", "c.d.e"], fixed=True) -------------------- array(['ab', 'cxdxe'], dtype='<U5') nchar('\0') -------------------- array(0) nchar('\0', type_='width') -------------------- array(0) nzchar(["a", ""]) -------------------- array([ True, False]) paste("a", "b") -------------------- array('a b', dtype=object) paste(["a", "c"], ["b", "d"], sep=".") -------------------- array(['a.b', 'c.d'], dtype=object) paste0(["a", "c"], ["b", "d"], collapse="; ") -------------------- 'ab; cd' sprintf("%s, %d, %.3f", pi, pi, pi) -------------------- array('3.141592653589793, 3, 3.142', dtype='<U27') substr("abcde", 1, 3) -------------------- array('bc', dtype='<U2') substring("abcde", 1) -------------------- array('bcde', dtype='<U4') strsplit(["a.b.c", "e.f"], ".", fixed=True) -------------------- array([list(['a', 'b', 'c']), list(['e', 'f'])], dtype=object)
In [19]:
Copied!
# table
table(rep(['a', 'b'], each=4))
# table
table(rep(['a', 'b'], each=4))
Out[19]:
a | b | |
---|---|---|
<int64> | <int64> | |
count | 4 | 4 |
In [21]:
Copied!
# testing
debug(
is_double(numpy.array([1,2])),
is_integer(numpy.array([1,2])),
is_numeric(numpy.array([1,2])),
is_double(numpy.array([1,2], dtype=numpy.double)),
is_atomic("abc"),
is_element(1, [1,2]),
**debug_kwargs
)
# testing
debug(
is_double(numpy.array([1,2])),
is_integer(numpy.array([1,2])),
is_numeric(numpy.array([1,2])),
is_double(numpy.array([1,2], dtype=numpy.double)),
is_atomic("abc"),
is_element(1, [1,2]),
**debug_kwargs
)
is_double(numpy.array([1,2])) -------------------- False is_integer(numpy.array([1,2])) -------------------- True is_numeric(numpy.array([1,2])) -------------------- True is_double(numpy.array([1,2], dtype=numpy.double)) -------------------- True is_atomic("abc") -------------------- True is_element(1, [1,2]) -------------------- array(True)
In [22]:
Copied!
# trig/hb
debug(
cos(.5),
sin(.5),
tan(.5),
acos(.5),
asin(.5),
atan2(.5, 1),
cospi(.5),
sinpi(.5),
tanpi(.5),
cosh(.5),
sinh(.5),
tanh(.5),
acosh(1),
asinh(.5),
atanh(.5),
**debug_kwargs
)
# trig/hb
debug(
cos(.5),
sin(.5),
tan(.5),
acos(.5),
asin(.5),
atan2(.5, 1),
cospi(.5),
sinpi(.5),
tanpi(.5),
cosh(.5),
sinh(.5),
tanh(.5),
acosh(1),
asinh(.5),
atanh(.5),
**debug_kwargs
)
cos(.5) -------------------- 0.8775825618903728 sin(.5) -------------------- 0.479425538604203 tan(.5) -------------------- 0.5463024898437905 acos(.5) -------------------- 1.0471975511965979 asin(.5) -------------------- 0.5235987755982989 atan2(.5, 1) -------------------- 0.4636476090008061 cospi(.5) -------------------- 6.123233995736766e-17 sinpi(.5) -------------------- 1.0 tanpi(.5) -------------------- 1.633123935319537e+16 cosh(.5) -------------------- 1.1276259652063807 sinh(.5) -------------------- 0.5210953054937474 tanh(.5) -------------------- 0.46211715726000974 acosh(1) -------------------- 0.0 asinh(.5) -------------------- 0.48121182505960347 atanh(.5) -------------------- 0.5493061443340548
In [23]:
Copied!
# which
debug(
which([True, False, True]),
which_max([3,2,4,1]),
which_min([3,2,4,1]),
**debug_kwargs
)
# which
debug(
which([True, False, True]),
which_max([3,2,4,1]),
which_min([3,2,4,1]),
**debug_kwargs
)
which([True, False, True]) -------------------- array([0, 2]) which_max([3,2,4,1]) -------------------- 2 which_min([3,2,4,1]) -------------------- 3