forcats_misc

In [1]:

Copied!





%run nb_helpers.py

import numpy
from datar.all import *
from datar.data import gss_cat


nb_header(
    as_factor,
    fct_count,
    fct_match,
    fct_unique,
    lvls_reorder,
    lvls_revalue,
    lvls_expand,
    lvls_union,
    book="forcat_lvl_addrm",
)
%run nb_helpers.py

import numpy
from datar.all import *
from datar.data import gss_cat


nb_header(
    as_factor,
    fct_count,
    fct_match,
    fct_unique,
    lvls_reorder,
    lvls_revalue,
    lvls_expand,
    lvls_union,
    book="forcat_lvl_addrm",
)

Try this notebook on binder.

★ as_factor
¶

Convert a vector to a factor vector¶

Args:¶

x: A numeric vector

Returns:¶

The factor vector

★ fct_count
¶

Count entries in a factor¶

Args:¶

_f: A factor
sort: If True, sort the result so that the most common values float to
the top

prop: If True, compute the fraction of marginal table.

Returns:¶

A data frame with columns f, n and p, if prop is True

★ fct_match
¶

Test for presence of levels in a factor¶

Do any of lvls occur in _f?

Args:¶

_f: A factor
lvls: A vector specifying levels to look for.

Returns:¶

A logical factor

★ fct_unique
¶

Unique values of a factor¶

Args:¶

_f: A factor

Returns:¶

The factor with the unique values in _f

★ lvls_reorder
¶

Leaves values of a factor as they are, but changes the order by¶

given indices

Args:¶

f: A factor (or character vector).
idx: A integer index, with one integer for each existing level.
new_levels: A character vector of new levels.
ordered: A logical which determines the "ordered" status of the
output factor. None preserves the existing status of the factor.

Returns:¶

The factor with levels reordered

★ lvls_revalue
¶

changes the values of existing levels; there must¶

be one new level for each old level

Args:¶

_f: A factor
new_levels: A character vector of new levels.

Returns:¶

The factor with the new levels

★ lvls_expand
¶

Expands the set of levels; the new levels must¶

include the old levels.

Args:¶

_f: A factor
new_levels: The new levels. Must include the old ones

Returns:¶

The factor with the new levels

★ lvls_union
¶

Find all levels in a list of factors¶

Args:¶

fs: A list of factors

Returns:¶

A list of all levels

as_factor¶

In [2]:

Copied!

x = c("a", "z", "g")
as_factor(x)
x = c("a", "z", "g")
as_factor(x)

Out[2]:

['a', 'z', 'g']
Categories (3, object): ['a', 'g', 'z']

In [3]:

Copied!

y = c("1.1", "11", "2.2", "22")
as_factor(y)
y = c("1.1", "11", "2.2", "22")
as_factor(y)

Out[3]:

['1.1', '11', '2.2', '22']
Categories (4, object): ['1.1', '11', '2.2', '22']

In [4]:

Copied!

z = as_numeric(y)
as_factor(z)
z = as_numeric(y)
as_factor(z)

FutureWarning: Index.ravel returning ndarray is deprecated; in a future version this will return a view on self.

Out[4]:

[1.1, 11.0, 2.2, 22.0]
Categories (4, float64): [1.1, 2.2, 11.0, 22.0]

fct_count¶

In [5]:

Copied!

fct = factor(sample(letters)[rpois(1000, 10)])
table(fct)
fct = factor(sample(letters)[rpois(1000, 10)])
table(fct)

Out[5]:

	b	c	d	e	i	k	l	m	n	o	...	q	r	s	t	u	v	w	x	y	z
	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	...	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>	<int64>
count	8	88	37	1	45	67	2	14	105	4	...	5	22	1	87	49	17	134	128	72	112

1 rows × 21 columns

In [6]:

Copied!

fct_count(fct)
fct_count(fct)

Out[6]:

	f	n
	<category>	<int64>
0	b	8
1	c	88
2	d	37
3	e	1
4	i	45
5	k	67
6	l	2
7	m	14
8	n	105
9	o	4
10	p	2
11	q	5
12	r	22
13	s	1
14	t	87
15	u	49
16	v	17
17	w	134
18	x	128
19	y	72
20	z	112

In [7]:

Copied!

fct_count(fct, sort = TRUE)
fct_count(fct, sort = TRUE)

Out[7]:

	f	n
	<category>	<int64>
17	w	134
18	x	128
20	z	112
8	n	105
1	c	88
14	t	87
19	y	72
5	k	67
15	u	49
4	i	45
2	d	37
12	r	22
16	v	17
7	m	14
0	b	8
11	q	5
9	o	4
6	l	2
10	p	2
3	e	1
13	s	1

In [8]:

Copied!

fct_count(fct, sort = TRUE, prop = TRUE)
fct_count(fct, sort = TRUE, prop = TRUE)

Out[8]:

	f	n	p
	<category>	<int64>	<float64>
17	w	134	0.134
18	x	128	0.128
20	z	112	0.112
8	n	105	0.105
1	c	88	0.088
14	t	87	0.087
19	y	72	0.072
5	k	67	0.067
15	u	49	0.049
4	i	45	0.045
2	d	37	0.037
12	r	22	0.022
16	v	17	0.017
7	m	14	0.014
0	b	8	0.008
11	q	5	0.005
9	o	4	0.004
6	l	2	0.002
10	p	2	0.002
3	e	1	0.001
13	s	1	0.001

fct_match¶

In [9]:

Copied!

table(fct_match(gss_cat.marital, c("Married", "Divorced")))
table(fct_match(gss_cat.marital, c("Married", "Divorced")))

Out[9]:

	False	True
	<int64>	<int64>
count	7983	13500

In [10]:

Copied!

table(numpy.isin(gss_cat.marital, c("Maried", "Davorced")))
table(numpy.isin(gss_cat.marital, c("Maried", "Davorced")))

Out[10]:

	False
	<int64>
count	21483

In [11]:

Copied!

with try_catch():
    table(fct_match(gss_cat.marital, c("Maried", "Davorced")))
with try_catch():
    table(fct_match(gss_cat.marital, c("Maried", "Davorced")))

[ValueError] Levels not present in factor: ['Maried' 'Davorced'].

fct_unique¶

In [12]:

Copied!

fct = factor(letters[rpois(100, 10)-1])

unique(fct)
fct = factor(letters[rpois(100, 10)-1])

unique(fct)

Out[12]:

array(['p', 'k', 'i', 'j', 'e', 'r', 'm', 'g', 'n', 'f', 'o', 'h', 'l',
       'd', 'c'], dtype=object)

In [13]:

Copied!

fct_unique(fct)
fct_unique(fct)

Out[13]:

['c', 'd', 'e', 'f', 'g', ..., 'm', 'n', 'o', 'p', 'r']
Length: 15
Categories (15, object): ['c', 'd', 'e', 'f', ..., 'n', 'o', 'p', 'r']

lvls_reorder, lvls_revalue and lvls_expand¶

In [14]:

Copied!

fct = factor(c("a", "b", "c"))
lvls_reorder(fct, [2,1,0])
fct = factor(c("a", "b", "c"))
lvls_reorder(fct, [2,1,0])

Out[14]:

['a', 'b', 'c']
Categories (3, object): ['c', 'b', 'a']

In [15]:

Copied!

lvls_revalue(fct, c("apple", "banana", "carrot"))
lvls_revalue(fct, c("apple", "banana", "carrot"))

Out[15]:

['apple', 'banana', 'carrot']
Categories (3, object): ['apple', 'banana', 'carrot']

In [16]:

Copied!

lvls_expand(fct, c("a", "b", "c", "d"))
lvls_expand(fct, c("a", "b", "c", "d"))

Out[16]:

['a', 'b', 'c']
Categories (4, object): ['a', 'b', 'c', 'd']

lvls_union¶

In [17]:

Copied!

fs = [factor("a"), factor("b"), factor(c("a", "b"))]
lvls_union(fs)
fs = [factor("a"), factor("b"), factor(c("a", "b"))]
lvls_union(fs)

Out[17]:

array(['a', 'b'], dtype=object)

forcats_misc

★ as_factor¶

Convert a vector to a factor vector¶

Args:¶

Returns:¶

★ fct_count¶

Count entries in a factor¶

Args:¶

Returns:¶

★ fct_match¶

Test for presence of levels in a factor¶

Args:¶

Returns:¶

★ fct_unique¶

Unique values of a factor¶

Args:¶

Returns:¶

★ lvls_reorder¶

Leaves values of a factor as they are, but changes the order by¶

Args:¶

Returns:¶

★ lvls_revalue¶

changes the values of existing levels; there must¶

Args:¶

Returns:¶

★ lvls_expand¶

Expands the set of levels; the new levels must¶

Args:¶

Returns:¶

★ lvls_union¶

Find all levels in a list of factors¶

Args:¶

Returns:¶

as_factor¶

fct_count¶

fct_match¶

fct_unique¶

lvls_reorder, lvls_revalue and lvls_expand¶

lvls_union¶

★ as_factor
¶

★ fct_count
¶

★ fct_match
¶

★ fct_unique
¶

★ lvls_reorder
¶

★ lvls_revalue
¶

★ lvls_expand
¶

★ lvls_union
¶