Skip to content

datar

forcats_lvl_order

forcats_lvl_order

In [32]:

Copied!





%run nb_helpers.py

import plotnine as p9
from datar.all import *
from datar.data import gss_cat, iris, ChickWeight

nb_header(
    fct_relevel,
    fct_inorder,
    fct_infreq,
    fct_inseq,
    fct_reorder,
    fct_reorder2,
    fct_rev,
    fct_shift,
    fct_shuffle,
    first2,
    last2, 
    book="forcat_lvl_order",
)
%run nb_helpers.py

import plotnine as p9
from datar.all import *
from datar.data import gss_cat, iris, ChickWeight

nb_header(
    fct_relevel,
    fct_inorder,
    fct_infreq,
    fct_inseq,
    fct_reorder,
    fct_reorder2,
    fct_rev,
    fct_shift,
    fct_shuffle,
    first2,
    last2, 
    book="forcat_lvl_order",
)

Try this notebook on binder.

★ fct_relevel
¶

Reorder factor levels by hand¶

Args:¶

_f: A factor (categoriccal), or a string vector
*lvls: Either a function (then len(lvls) should equal to 1) or
the new levels.
A function will be called with the current levels as input, and the
return value (which must be a character vector) will be used to
relevel the factor.
Any levels not mentioned will be left in their existing order,
by default after the explicitly mentioned levels.

after: Where should the new values be placed?

Returns:¶

The factor with levels replaced

★ fct_inorder
¶

Reorder factor levels by first appearance¶

Args:¶

_f: A factor
ordered: A logical which determines the "ordered" status of the
output factor.

Returns:¶

The factor with levels reordered

★ fct_infreq
¶

Reorder factor levels by frequency¶

Args:¶

_f: A factor
ordered: A logical which determines the "ordered" status of the
output factor.

Returns:¶

The factor with levels reordered

★ fct_inseq
¶

Reorder factor levels by sequence¶

Args:¶

_f: A factor
ordered: A logical which determines the "ordered" status of the
output factor.

Returns:¶

The factor with levels reordered

★ fct_reorder
¶

Reorder factor levels by a function (default: median)¶

Args:¶

_f: A factor
_x: The data to be used to reorder the factor
_fun: A function to be used to reorder the factor
_desc: If True, the factor will be reordered in descending order
*args: Extra arguments to be passed to _fun
**kwargs: Extra keyword arguments to be passed to _fun

Returns:¶

The factor with levels reordered

★ fct_reorder2
¶

Reorder factor levels by a function (default: `last2`)¶

Args:¶

_f: A factor
_x: The data to be used to reorder the factor
_fun: A function to be used to reorder the factor
_desc: If True, the factor will be reordered in descending order
*args: Extra arguments to be passed to _fun
**kwargs: Extra keyword arguments to be passed to _fun

Returns:¶

The factor with levels reordered

★ fct_rev
¶

Reverse the order of the levels of a factor¶

Args:¶

_f: A factor

Returns:¶

The factor with levels reversed

★ fct_shift
¶

Shift the levels of a factor¶

Args:¶

_f: A factor
n: The number of levels to shift

Returns:¶

The factor with levels shifted

★ fct_shuffle
¶

Shuffle the levels of a factor¶

Args:¶

_f: A factor

Returns:¶

The factor with levels shuffled

★ first2
¶

Find the first element of `_y` ordered by `_x`¶

Args:¶

_x: The vector used to order _y
_y: The vector to get the first element of

Returns:¶

First element of _y ordered by _x

★ last2
¶

Find the last element of `_y` ordered by `_x`¶

Args:¶

_x: The vector used to order _y
_y: The vector to get the last element of

Returns:¶

Last element of _y ordered by _x

fct_relevel¶

In [2]:

Copied!

fct = factor(c("a", "b", "c", "d"), levels = c("b", "c", "d", "a"))
fct_relevel(fct)
fct = factor(c("a", "b", "c", "d"), levels = c("b", "c", "d", "a"))
fct_relevel(fct)

Out[2]:

['a', 'b', 'c', 'd']
Categories (4, object): ['b', 'c', 'd', 'a']

In [3]:

Copied!

fct_relevel(fct, "a")
fct_relevel(fct, "a")

Out[3]:

['a', 'b', 'c', 'd']
Categories (4, object): ['a', 'b', 'c', 'd']

In [4]:

Copied!

fct_relevel(fct, "b", "a")
fct_relevel(fct, "b", "a")

Out[4]:

['a', 'b', 'c', 'd']
Categories (4, object): ['b', 'a', 'c', 'd']

In [5]:

Copied!

fct_relevel(fct, "a", after=1)
fct_relevel(fct, "a", after=1)

Out[5]:

['a', 'b', 'c', 'd']
Categories (4, object): ['b', 'c', 'a', 'd']

In [6]:

Copied!

# use -1 instead of Inf
fct_relevel(fct, "a", after = None)
# use -1 instead of Inf
fct_relevel(fct, "a", after = None)

Out[6]:

['a', 'b', 'c', 'd']
Categories (4, object): ['a', 'b', 'c', 'd']

In [7]:

Copied!

fct_relevel(fct, "a", after = 2)
fct_relevel(fct, "a", after = 2)

Out[7]:

['a', 'b', 'c', 'd']
Categories (4, object): ['b', 'c', 'd', 'a']

In [8]:

Copied!

fct_relevel(fct, sort)
fct_relevel(fct, sort)

Out[8]:

['a', 'b', 'c', 'd']
Categories (4, object): ['a', 'b', 'c', 'd']

In [9]:

Copied!

fct_relevel(fct, sample)
fct_relevel(fct, sample)

Out[9]:

['a', 'b', 'c', 'd']
Categories (4, object): ['b', 'd', 'a', 'c']

In [10]:

Copied!

fct_relevel(fct, rev)
fct_relevel(fct, rev)

Out[10]:

['a', 'b', 'c', 'd']
Categories (4, object): ['a', 'd', 'c', 'b']

In [11]:

Copied!





df = gss_cat[["rincome", "denom"]] >> mutate(across(everything(), as_factor)) 

(
    df 
        >> summarize(across(everything(), lambda col: [levels(col).tolist()]))
        >> t()
        >> rename_with(str)
        >> pull(to="dict", name=rownames(f))
) 
df = gss_cat[["rincome", "denom"]] >> mutate(across(everything(), as_factor)) 

(
    df 
        >> summarize(across(everything(), lambda col: [levels(col).tolist()]))
        >> t()
        >> rename_with(str)
        >> pull(to="dict", name=rownames(f))
) 

Out[11]:

{'rincome': ['$1000 to 2999',
  '$10000 - 14999',
  '$15000 - 19999',
  '$20000 - 24999',
  '$25000 or more',
  '$3000 to 3999',
  '$4000 to 4999',
  '$5000 to 5999',
  '$6000 to 6999',
  '$7000 to 7999',
  '$8000 to 9999',
  "Don't know",
  'Lt $1000',
  'No answer',
  'Not applicable',
  'Refused'],
 'denom': ['Afr meth ep zion',
  'Afr meth episcopal',
  'Am bapt ch in usa',
  'Am baptist asso',
  'Am lutheran',
  'Baptist-dk which',
  "Don't know",
  'Episcopal',
  'Evangelical luth',
  'Luth ch in america',
  'Lutheran-dk which',
  'Lutheran-mo synod',
  'Methodist-dk which',
  'Nat bapt conv of am',
  'Nat bapt conv usa',
  'No answer',
  'No denomination',
  'Not applicable',
  'Other',
  'Other baptists',
  'Other lutheran',
  'Other methodist',
  'Other presbyterian',
  'Presbyterian c in us',
  'Presbyterian, merged',
  'Presbyterian-dk wh',
  'Southern baptist',
  'United methodist',
  'United pres ch in us',
  'Wi evan luth synod']}

In [12]:

Copied!





df2 = df >> mutate(across(everything(), fct_relevel, "Don't know", after=-1))
(
    df2 
        >> summarize(across(everything(), lambda col: [levels(col).tolist()]))
        >> t()
        >> rename_with(str)
        >> pull(to="dict", name=rownames(f))
) 
df2 = df >> mutate(across(everything(), fct_relevel, "Don't know", after=-1))
(
    df2 
        >> summarize(across(everything(), lambda col: [levels(col).tolist()]))
        >> t()
        >> rename_with(str)
        >> pull(to="dict", name=rownames(f))
) 

Out[12]:

{'rincome': ['$1000 to 2999',
  '$10000 - 14999',
  '$15000 - 19999',
  '$20000 - 24999',
  '$25000 or more',
  '$3000 to 3999',
  '$4000 to 4999',
  '$5000 to 5999',
  '$6000 to 6999',
  '$7000 to 7999',
  '$8000 to 9999',
  'Lt $1000',
  'No answer',
  'Not applicable',
  'Refused',
  "Don't know"],
 'denom': ['Afr meth ep zion',
  'Afr meth episcopal',
  'Am bapt ch in usa',
  'Am baptist asso',
  'Am lutheran',
  'Baptist-dk which',
  'Episcopal',
  'Evangelical luth',
  'Luth ch in america',
  'Lutheran-dk which',
  'Lutheran-mo synod',
  'Methodist-dk which',
  'Nat bapt conv of am',
  'Nat bapt conv usa',
  'No answer',
  'No denomination',
  'Not applicable',
  'Other',
  'Other baptists',
  'Other lutheran',
  'Other methodist',
  'Other presbyterian',
  'Presbyterian c in us',
  'Presbyterian, merged',
  'Presbyterian-dk wh',
  'Southern baptist',
  'United methodist',
  'United pres ch in us',
  'Wi evan luth synod',
  "Don't know"]}

In [13]:

Copied!

fct_relevel(fct, "e")
fct_relevel(fct, "e")

[2022-12-02 14:00:07][datar][WARNING] [fct_relevel] Unknown levels in `_f`: ['e']

Out[13]:

['a', 'b', 'c', 'd']
Categories (4, object): ['b', 'c', 'd', 'a']

fct_inorder, fct_infreq, and fct_inseq¶

In [14]:

Copied!

fct = factor(c("b", "b", "a", "c", "c", "c"))
fct
fct = factor(c("b", "b", "a", "c", "c", "c"))
fct

Out[14]:

['b', 'b', 'a', 'c', 'c', 'c']
Categories (3, object): ['a', 'b', 'c']

In [15]:

Copied!

fct_inorder(fct)
fct_inorder(fct)

Out[15]:

['b', 'b', 'a', 'c', 'c', 'c']
Categories (3, object): ['b', 'a', 'c']

In [16]:

Copied!

fct_infreq(fct)
fct_infreq(fct)

Out[16]:

['b', 'b', 'a', 'c', 'c', 'c']
Categories (3, object): ['c', 'b', 'a']

In [17]:

Copied!

fct = factor([1,2,3], levels = [3,2,1])
fct_inseq(fct)
fct = factor([1,2,3], levels = [3,2,1])
fct_inseq(fct)

Out[17]:

[1, 2, 3]
Categories (3, int64): [1, 2, 3]

fct_reorder, fct_reorder2, last2, and first2¶

In [18]:

Copied!





df = tribble(
    f.color,  f.a, f.b,
    "blue",   1,   2,
    "green",  6,   2,
    "purple", 3,   3,
    "red",    2,   3,
    "yellow", 5,   1
) >> mutate(color=as_factor(f.color))

fct_reorder(df.color, df.a, _fun=min)
df = tribble(
    f.color,  f.a, f.b,
    "blue",   1,   2,
    "green",  6,   2,
    "purple", 3,   3,
    "red",    2,   3,
    "yellow", 5,   1
) >> mutate(color=as_factor(f.color))

fct_reorder(df.color, df.a, _fun=min)

Out[18]:

['blue', 'green', 'purple', 'red', 'yellow']
Categories (5, object): ['blue', 'red', 'purple', 'yellow', 'green']

In [19]:

Copied!

fct_reorder2(df.color, df.a, df.b)
fct_reorder2(df.color, df.a, df.b)

Out[19]:

['blue', 'green', 'purple', 'red', 'yellow']
Categories (5, object): ['red', 'purple', 'green', 'blue', 'yellow']

In [20]:

Copied!

p9.ggplot(iris) + p9.geom_boxplot(
    p9.aes(x="Species", y="Sepal_Width")
)
p9.ggplot(iris) + p9.geom_boxplot(
    p9.aes(x="Species", y="Sepal_Width")
)

No description has been provided for this image

Out[20]:

<ggplot: (8749823469044)>

In [21]:

Copied!





p9.ggplot(
    iris >> mutate(Species=fct_reorder(f.Species, f.Sepal_Width))
) + p9.geom_boxplot(
    p9.aes(x="Species", y="Sepal_Width")
)
p9.ggplot(
    iris >> mutate(Species=fct_reorder(f.Species, f.Sepal_Width))
) + p9.geom_boxplot(
    p9.aes(x="Species", y="Sepal_Width")
)

No description has been provided for this image

Out[21]:

<ggplot: (8749823508375)>

In [22]:

Copied!





p9.ggplot(
    iris >> mutate(Species=fct_reorder(f.Species, f.Sepal_Width, _desc=True))
) + p9.geom_boxplot(
    p9.aes(x="Species", y="Sepal_Width")
)
p9.ggplot(
    iris >> mutate(Species=fct_reorder(f.Species, f.Sepal_Width, _desc=True))
) + p9.geom_boxplot(
    p9.aes(x="Species", y="Sepal_Width")
)

No description has been provided for this image

Out[22]:

<ggplot: (8749821226873)>

In [23]:

Copied!





chks = (
    ChickWeight 
    >> filter(as_integer(f.Chick) < 10)
    >> mutate(Chick=fct_shuffle(f.Chick))
)

(
    p9.ggplot(chks, p9.aes("Time", "weight", colour="Chick")) 
    + p9.geom_point() 
    + p9.geom_line()
)

chks = (
    ChickWeight 
    >> filter(as_integer(f.Chick) < 10)
    >> mutate(Chick=fct_shuffle(f.Chick))
)

(
    p9.ggplot(chks, p9.aes("Time", "weight", colour="Chick")) 
    + p9.geom_point() 
    + p9.geom_line()
)

No description has been provided for this image

Out[23]:

<ggplot: (8749821203432)>

In [24]:

Copied!





(
    p9.ggplot(
        chks >> mutate(Chick=fct_reorder2(f.Chick, f.Time, f.weight)),
        p9.aes("Time", "weight", colour="Chick"),
    )
    + p9.geom_point()
    + p9.geom_line()
    + p9.labs(colour="Chick")
)
(
    p9.ggplot(
        chks >> mutate(Chick=fct_reorder2(f.Chick, f.Time, f.weight)),
        p9.aes("Time", "weight", colour="Chick"),
    )
    + p9.geom_point()
    + p9.geom_line()
    + p9.labs(colour="Chick")
)

No description has been provided for this image

Out[24]:

<ggplot: (8749821116657)>

fct_shuffle¶

In [25]:

Copied!

fct = factor(c("a", "b", "c"))
fct_shuffle(fct)
fct = factor(c("a", "b", "c"))
fct_shuffle(fct)

Out[25]:

['a', 'b', 'c']
Categories (3, object): ['b', 'c', 'a']

In [26]:

Copied!

fct_shuffle(fct)
fct_shuffle(fct)

Out[26]:

['a', 'b', 'c']
Categories (3, object): ['c', 'b', 'a']

fct_rev¶

In [27]:

Copied!

fct_rev(fct)
fct_rev(fct)

Out[27]:

['a', 'b', 'c']
Categories (3, object): ['c', 'b', 'a']

fct_shift¶

In [28]:

Copied!





x = factor(
  c("Mon", "Tue", "Wed"),
  levels = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"),
  ordered = TRUE
)
x
x = factor(
  c("Mon", "Tue", "Wed"),
  levels = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"),
  ordered = TRUE
)
x

Out[28]:

['Mon', 'Tue', 'Wed']
Categories (7, object): ['Sun' < 'Mon' < 'Tue' < 'Wed' < 'Thu' < 'Fri' < 'Sat']

In [29]:

Copied!

fct_shift(x)
fct_shift(x)

Out[29]:

['Mon', 'Tue', 'Wed']
Categories (7, object): ['Mon' < 'Tue' < 'Wed' < 'Thu' < 'Fri' < 'Sat' < 'Sun']

In [30]:

Copied!

fct_shift(x, 2)
fct_shift(x, 2)

Out[30]:

['Mon', 'Tue', 'Wed']
Categories (7, object): ['Tue' < 'Wed' < 'Thu' < 'Fri' < 'Sat' < 'Sun' < 'Mon']

In [31]:

Copied!

fct_shift(x, -1)
fct_shift(x, -1)

Out[31]:

['Mon', 'Tue', 'Wed']
Categories (7, object): ['Sat' < 'Sun' < 'Mon' < 'Tue' < 'Wed' < 'Thu' < 'Fri']