base-arithmetic
In [1]:
Copied!
%run nb_helpers.py
from datar.all import *
debug_kwargs = {'prefix': '\n', 'sep': f'\n{"-" * 20}\n'}
nb_header(
sum, prod, mean, median, min, max, var, pmin, pmax,
round, sqrt, abs, sign, trunc, ceiling, floor, signif,
log, exp, log2, log10, log1p, cov, scale, col_sums,
row_sums, col_means, row_means, col_sds, row_sds,
col_medians, row_medians, quantile, sd, weighted_mean
)
%run nb_helpers.py
from datar.all import *
debug_kwargs = {'prefix': '\n', 'sep': f'\n{"-" * 20}\n'}
nb_header(
sum, prod, mean, median, min, max, var, pmin, pmax,
round, sqrt, abs, sign, trunc, ceiling, floor, signif,
log, exp, log2, log10, log1p, cov, scale, col_sums,
row_sums, col_means, row_means, col_sds, row_sds,
col_medians, row_medians, quantile, sd, weighted_mean
)
Try this notebook on binder.
★ sum_¶
★ prod¶
★ mean¶
★ median¶
★ min_¶
★ max_¶
★ var¶
★ pmin¶
★ pmax¶
★ round_¶
★ sqrt¶
★ abs_¶
★ sign¶
★ trunc¶
★ ceiling¶
★ floor¶
★ signif¶
★ log¶
★ exp¶
★ log2¶
★ log10¶
★ log1p¶
★ cov¶
Compute pairwise covariance between two variables¶
Args:¶
x
: a numeric vector, matrix or data frame.
y
: None or a vector, matrix or data frame with
compatible dimensions to x
. The default is equivalent to
y = x
na_rm
: If True
, remove missing values before computing
the covariance.
ddof
: The denominator degrees of freedom.
Returns:¶
The covariance matrix
★ scale¶
★ col_sums¶
★ row_sums¶
★ col_means¶
★ row_means¶
★ col_sds¶
★ row_sds¶
★ col_medians¶
★ row_medians¶
★ quantile¶
★ sd¶
★ weighted_mean¶
In [2]:
Copied!
debug(
sum([1,2,4,6]),
prod([1,2,4,6]),
mean([1,2,4,6]),
median([1,2,4,6]),
min([1,2,4,6]),
max([1,2,4,6]),
var([1,2,4,6]),
pmin([1,4], [2,3]),
pmax([1,4], [2,3]),
**debug_kwargs
)
debug(
sum([1,2,4,6]),
prod([1,2,4,6]),
mean([1,2,4,6]),
median([1,2,4,6]),
min([1,2,4,6]),
max([1,2,4,6]),
var([1,2,4,6]),
pmin([1,4], [2,3]),
pmax([1,4], [2,3]),
**debug_kwargs
)
sum([1,2,4,6]) -------------------- 13 prod([1,2,4,6]) -------------------- 48 mean([1,2,4,6]) -------------------- 3.25 median([1,2,4,6]) -------------------- 3.0 min([1,2,4,6]) -------------------- 1 max([1,2,4,6]) -------------------- 6 var([1,2,4,6]) -------------------- 4.916666666666667 pmin([1,4], [2,3]) -------------------- array([1, 3]) pmax([1,4], [2,3]) -------------------- array([2, 4])
In [3]:
Copied!
debug(
round([1.4, 1.5]),
sqrt([1.1, 2.1]),
abs([1, -1]),
sign([10, -10]),
trunc([1.1, 2.1]),
ceiling([1.1, 2.1]),
floor([1.1, 2.1]),
signif(3.14567e-10, 3),
**debug_kwargs
)
debug(
round([1.4, 1.5]),
sqrt([1.1, 2.1]),
abs([1, -1]),
sign([10, -10]),
trunc([1.1, 2.1]),
ceiling([1.1, 2.1]),
floor([1.1, 2.1]),
signif(3.14567e-10, 3),
**debug_kwargs
)
round([1.4, 1.5]) -------------------- array([1., 2.]) sqrt([1.1, 2.1]) -------------------- array([1.04880885, 1.44913767]) abs([1, -1]) -------------------- array([1, 1]) sign([10, -10]) -------------------- array([ 1, -1]) trunc([1.1, 2.1]) -------------------- array([1., 2.]) ceiling([1.1, 2.1]) -------------------- array([2., 3.]) floor([1.1, 2.1]) -------------------- array([1., 2.]) signif(3.14567e-10, 3) -------------------- array(3.15e-10)
In [4]:
Copied!
debug(
log(exp(2)),
exp(2),
log2(4),
log10(100),
log1p(exp(1)-1),
cov([1, 2, 3], [3, 2, 1]),
scale([1, 2, 3]),
**debug_kwargs
)
debug(
log(exp(2)),
exp(2),
log2(4),
log10(100),
log1p(exp(1)-1),
cov([1, 2, 3], [3, 2, 1]),
scale([1, 2, 3]),
**debug_kwargs
)
log(exp(2)) -------------------- 2.0 exp(2) -------------------- 7.38905609893065 log2(4) -------------------- 2.0 log10(100) -------------------- 2.0 log1p(exp(1)-1) -------------------- 1.0 cov([1, 2, 3], [3, 2, 1]) -------------------- -1.0 scale([1, 2, 3]) -------------------- array([-1., 0., 1.])
In [5]:
Copied!
# column and row stats
df = tribble(f.v1, f.v2, f.v3, *rnorm(15))
debug(
df,
col_sums(df),
row_sums(df),
col_means(df),
row_means(df),
col_sds(df),
row_sds(df),
col_medians(df),
row_medians(df),
**debug_kwargs,
)
# column and row stats
df = tribble(f.v1, f.v2, f.v3, *rnorm(15))
debug(
df,
col_sums(df),
row_sums(df),
col_means(df),
row_means(df),
col_sds(df),
row_sds(df),
col_medians(df),
row_medians(df),
**debug_kwargs,
)
df -------------------- v1 v2 v3 <float64> <float64> <float64> 0 0.067273 0.015561 -0.633373 1 -0.911285 1.526871 0.540873 2 1.140124 -0.596385 1.933209 3 -0.147867 -0.945199 0.994281 4 -2.492679 -1.536482 -0.473660 col_sums(df) -------------------- v1 -2.344435 v2 -1.535634 v3 2.361329 dtype: float64 row_sums(df) -------------------- 0 -0.550539 1 1.156458 2 2.476947 3 -0.098786 4 -4.502821 dtype: float64 col_means(df) -------------------- v1 -0.468887 v2 -0.307127 v3 0.472266 dtype: float64 row_means(df) -------------------- 0 -0.183513 1 0.385486 2 0.825649 3 -0.032929 4 -1.500940 dtype: float64 col_sds(df) -------------------- v1 1.348229 v2 1.169380 v3 1.064046 dtype: float64 row_sds(df) -------------------- 0 0.390447 1 1.226483 2 1.293786 3 0.974835 4 1.009978 dtype: float64 col_medians(df) -------------------- v1 -0.147867 v2 -0.596385 v3 0.540873 dtype: float64 row_medians(df) -------------------- 0 0.015561 1 0.540873 2 1.140124 3 -0.147867 4 -1.536482 dtype: float64
In [6]:
Copied!
debug(
quantile([1, 2, 3, 4, 5]),
quantile([1, 2, 3, 4, 5], [0, 1]),
sd([1, 2, 3, 4, 5]),
weighted_mean([1, 2, 3, 4, 5]),
weighted_mean([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]),
**debug_kwargs,
)
debug(
quantile([1, 2, 3, 4, 5]),
quantile([1, 2, 3, 4, 5], [0, 1]),
sd([1, 2, 3, 4, 5]),
weighted_mean([1, 2, 3, 4, 5]),
weighted_mean([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]),
**debug_kwargs,
)
quantile([1, 2, 3, 4, 5]) -------------------- array([1., 2., 3., 4., 5.]) quantile([1, 2, 3, 4, 5], [0, 1]) -------------------- array([1, 5]) sd([1, 2, 3, 4, 5]) -------------------- 1.5811388300841898 weighted_mean([1, 2, 3, 4, 5]) -------------------- 3.0 weighted_mean([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]) -------------------- 2.3333333333333335