forcats_lvl_order
%run nb_helpers.py
import plotnine as p9
from datar.all import *
from datar.data import gss_cat, iris, ChickWeight
nb_header(
fct_relevel,
fct_inorder,
fct_infreq,
fct_inseq,
fct_reorder,
fct_reorder2,
fct_rev,
fct_shift,
fct_shuffle,
first2,
last2,
book="forcat_lvl_order",
)
★ fct_relevel¶
Reorder factor levels by hand¶
Args:¶
_f
: A factor (categoriccal), or a string vector
*lvls
: Either a function (then len(lvls)
should equal to 1
) or
the new levels.
A function will be called with the current levels as input, and the
return value (which must be a character vector) will be used to
relevel the factor.
Any levels not mentioned will be left in their existing order,
by default after the explicitly mentioned levels.
after
: Where should the new values be placed?
Returns:¶
The factor with levels replaced
★ fct_inorder¶
★ fct_infreq¶
★ fct_inseq¶
★ fct_reorder¶
Reorder factor levels by a function (default: median)¶
Args:¶
_f
: A factor
_x
: The data to be used to reorder the factor
_fun
: A function to be used to reorder the factor
_desc
: If True
, the factor will be reordered in descending order
*args
: Extra arguments to be passed to _fun
**kwargs
: Extra keyword arguments to be passed to _fun
Returns:¶
The factor with levels reordered
★ fct_reorder2¶
Reorder factor levels by a function (default: last2
)¶
Args:¶
_f
: A factor
_x
: The data to be used to reorder the factor
_fun
: A function to be used to reorder the factor
_desc
: If True
, the factor will be reordered in descending order
*args
: Extra arguments to be passed to _fun
**kwargs
: Extra keyword arguments to be passed to _fun
Returns:¶
The factor with levels reordered
★ fct_rev¶
★ fct_shift¶
★ fct_shuffle¶
★ first2¶
★ last2¶
fct_relevel¶
fct = factor(c("a", "b", "c", "d"), levels = c("b", "c", "d", "a"))
fct_relevel(fct)
['a', 'b', 'c', 'd'] Categories (4, object): ['b', 'c', 'd', 'a']
fct_relevel(fct, "a")
['a', 'b', 'c', 'd'] Categories (4, object): ['a', 'b', 'c', 'd']
fct_relevel(fct, "b", "a")
['a', 'b', 'c', 'd'] Categories (4, object): ['b', 'a', 'c', 'd']
fct_relevel(fct, "a", after=1)
['a', 'b', 'c', 'd'] Categories (4, object): ['b', 'c', 'a', 'd']
# use -1 instead of Inf
fct_relevel(fct, "a", after = None)
['a', 'b', 'c', 'd'] Categories (4, object): ['a', 'b', 'c', 'd']
fct_relevel(fct, "a", after = 2)
['a', 'b', 'c', 'd'] Categories (4, object): ['b', 'c', 'd', 'a']
fct_relevel(fct, sort)
['a', 'b', 'c', 'd'] Categories (4, object): ['a', 'b', 'c', 'd']
fct_relevel(fct, sample)
['a', 'b', 'c', 'd'] Categories (4, object): ['b', 'd', 'a', 'c']
fct_relevel(fct, rev)
['a', 'b', 'c', 'd'] Categories (4, object): ['a', 'd', 'c', 'b']
df = gss_cat[["rincome", "denom"]] >> mutate(across(everything(), as_factor))
(
df
>> summarize(across(everything(), lambda col: [levels(col).tolist()]))
>> t()
>> rename_with(str)
>> pull(to="dict", name=rownames(f))
)
{'rincome': ['$1000 to 2999', '$10000 - 14999', '$15000 - 19999', '$20000 - 24999', '$25000 or more', '$3000 to 3999', '$4000 to 4999', '$5000 to 5999', '$6000 to 6999', '$7000 to 7999', '$8000 to 9999', "Don't know", 'Lt $1000', 'No answer', 'Not applicable', 'Refused'], 'denom': ['Afr meth ep zion', 'Afr meth episcopal', 'Am bapt ch in usa', 'Am baptist asso', 'Am lutheran', 'Baptist-dk which', "Don't know", 'Episcopal', 'Evangelical luth', 'Luth ch in america', 'Lutheran-dk which', 'Lutheran-mo synod', 'Methodist-dk which', 'Nat bapt conv of am', 'Nat bapt conv usa', 'No answer', 'No denomination', 'Not applicable', 'Other', 'Other baptists', 'Other lutheran', 'Other methodist', 'Other presbyterian', 'Presbyterian c in us', 'Presbyterian, merged', 'Presbyterian-dk wh', 'Southern baptist', 'United methodist', 'United pres ch in us', 'Wi evan luth synod']}
df2 = df >> mutate(across(everything(), fct_relevel, "Don't know", after=-1))
(
df2
>> summarize(across(everything(), lambda col: [levels(col).tolist()]))
>> t()
>> rename_with(str)
>> pull(to="dict", name=rownames(f))
)
{'rincome': ['$1000 to 2999', '$10000 - 14999', '$15000 - 19999', '$20000 - 24999', '$25000 or more', '$3000 to 3999', '$4000 to 4999', '$5000 to 5999', '$6000 to 6999', '$7000 to 7999', '$8000 to 9999', 'Lt $1000', 'No answer', 'Not applicable', 'Refused', "Don't know"], 'denom': ['Afr meth ep zion', 'Afr meth episcopal', 'Am bapt ch in usa', 'Am baptist asso', 'Am lutheran', 'Baptist-dk which', 'Episcopal', 'Evangelical luth', 'Luth ch in america', 'Lutheran-dk which', 'Lutheran-mo synod', 'Methodist-dk which', 'Nat bapt conv of am', 'Nat bapt conv usa', 'No answer', 'No denomination', 'Not applicable', 'Other', 'Other baptists', 'Other lutheran', 'Other methodist', 'Other presbyterian', 'Presbyterian c in us', 'Presbyterian, merged', 'Presbyterian-dk wh', 'Southern baptist', 'United methodist', 'United pres ch in us', 'Wi evan luth synod', "Don't know"]}
fct_relevel(fct, "e")
[2022-12-02 14:00:07][datar][WARNING] [fct_relevel] Unknown levels in `_f`: ['e']
['a', 'b', 'c', 'd'] Categories (4, object): ['b', 'c', 'd', 'a']
fct_inorder, fct_infreq, and fct_inseq¶
fct = factor(c("b", "b", "a", "c", "c", "c"))
fct
['b', 'b', 'a', 'c', 'c', 'c'] Categories (3, object): ['a', 'b', 'c']
fct_inorder(fct)
['b', 'b', 'a', 'c', 'c', 'c'] Categories (3, object): ['b', 'a', 'c']
fct_infreq(fct)
['b', 'b', 'a', 'c', 'c', 'c'] Categories (3, object): ['c', 'b', 'a']
fct = factor([1,2,3], levels = [3,2,1])
fct_inseq(fct)
[1, 2, 3] Categories (3, int64): [1, 2, 3]
fct_reorder, fct_reorder2, last2, and first2¶
df = tribble(
f.color, f.a, f.b,
"blue", 1, 2,
"green", 6, 2,
"purple", 3, 3,
"red", 2, 3,
"yellow", 5, 1
) >> mutate(color=as_factor(f.color))
fct_reorder(df.color, df.a, _fun=min)
['blue', 'green', 'purple', 'red', 'yellow'] Categories (5, object): ['blue', 'red', 'purple', 'yellow', 'green']
fct_reorder2(df.color, df.a, df.b)
['blue', 'green', 'purple', 'red', 'yellow'] Categories (5, object): ['red', 'purple', 'green', 'blue', 'yellow']
p9.ggplot(iris) + p9.geom_boxplot(
p9.aes(x="Species", y="Sepal_Width")
)
<ggplot: (8749823469044)>
p9.ggplot(
iris >> mutate(Species=fct_reorder(f.Species, f.Sepal_Width))
) + p9.geom_boxplot(
p9.aes(x="Species", y="Sepal_Width")
)
<ggplot: (8749823508375)>
p9.ggplot(
iris >> mutate(Species=fct_reorder(f.Species, f.Sepal_Width, _desc=True))
) + p9.geom_boxplot(
p9.aes(x="Species", y="Sepal_Width")
)
<ggplot: (8749821226873)>
chks = (
ChickWeight
>> filter(as_integer(f.Chick) < 10)
>> mutate(Chick=fct_shuffle(f.Chick))
)
(
p9.ggplot(chks, p9.aes("Time", "weight", colour="Chick"))
+ p9.geom_point()
+ p9.geom_line()
)
<ggplot: (8749821203432)>
(
p9.ggplot(
chks >> mutate(Chick=fct_reorder2(f.Chick, f.Time, f.weight)),
p9.aes("Time", "weight", colour="Chick"),
)
+ p9.geom_point()
+ p9.geom_line()
+ p9.labs(colour="Chick")
)
<ggplot: (8749821116657)>
fct_shuffle¶
fct = factor(c("a", "b", "c"))
fct_shuffle(fct)
['a', 'b', 'c'] Categories (3, object): ['b', 'c', 'a']
fct_shuffle(fct)
['a', 'b', 'c'] Categories (3, object): ['c', 'b', 'a']
fct_rev¶
fct_rev(fct)
['a', 'b', 'c'] Categories (3, object): ['c', 'b', 'a']
fct_shift¶
x = factor(
c("Mon", "Tue", "Wed"),
levels = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"),
ordered = TRUE
)
x
['Mon', 'Tue', 'Wed'] Categories (7, object): ['Sun' < 'Mon' < 'Tue' < 'Wed' < 'Thu' < 'Fri' < 'Sat']
fct_shift(x)
['Mon', 'Tue', 'Wed'] Categories (7, object): ['Mon' < 'Tue' < 'Wed' < 'Thu' < 'Fri' < 'Sat' < 'Sun']
fct_shift(x, 2)
['Mon', 'Tue', 'Wed'] Categories (7, object): ['Tue' < 'Wed' < 'Thu' < 'Fri' < 'Sat' < 'Sun' < 'Mon']
fct_shift(x, -1)
['Mon', 'Tue', 'Wed'] Categories (7, object): ['Sat' < 'Sun' < 'Mon' < 'Tue' < 'Wed' < 'Thu' < 'Fri']