"""Provides jekyll filters
See: https://jekyllrb.com/docs/liquid/filters/
"""
import datetime
import os
import random
import re
import urllib.parse
from typing import TYPE_CHECKING, Any, Sequence
if TYPE_CHECKING:
from jinja2.environment import Environment
# environmentfilter deprecated
try:
from jinja2 import pass_environment
except ImportError:
from jinja2 import environmentfilter as pass_environment
from jinja2.filters import FILTERS
from .manager import FilterManager
jekyll_filter_manager = FilterManager()
def _getattr(obj: Any, attr: str) -> Any:
"""Get attribute of an object, if fails, try get item"""
try:
return getattr(obj, attr)
except AttributeError:
return obj[attr]
def _getattr_multi(obj: Any, attr: str) -> Any:
"""Get attribute of an object at multiple levels
For example: x.a.b = 1, _getattr_multi(x, "a.b") == 1
"""
attrs = attr.split(".")
for att in attrs:
try:
obj = _getattr(obj, att)
except (TypeError, KeyError):
obj = None
return obj
def _get_global_var(env: "Environment", name: str, attr: str = None) -> Any:
if name not in env.globals:
raise ValueError(f"Global variables has not been set: {name}")
out = env.globals[name]
if attr is None: # pragma: no cover
return out
return _getattr(out, attr)
jekyll_filter_manager.register("group_by")(FILTERS["groupby"])
jekyll_filter_manager.register("to_integer")(FILTERS["int"])
jekyll_filter_manager.register("inspect")(repr)
@jekyll_filter_manager.registerDOCS
@pass_environment
def relative_url(env, value):
"""Get relative url based on site.baseurl"""
baseurl = _get_global_var(env, "site", "baseurl")
parts = urllib.parse.urlparse(baseurl)
return os.path.join(parts.path, value)
@jekyll_filter_manager.registerDOCS
@pass_environment
def absolute_url(env, value):
"""Get absolute url based on site.baseurl"""
baseurl = _get_global_var(env, "site", "baseurl")
return urllib.parse.urljoin(baseurl, value)
@jekyll_filter_manager.registerDOCS
@pass_environment
def date_to_xmlschema(env, value: datetime.datetime):
"""Convert date to xml schema format"""
return value.isoformat()
# TODO: other date filters
@jekyll_filter_manager.registerDOCS
@pass_environment
def where_exp(env, value, item, expr):
"""Where using expression"""
compiled = env.compile_expression(expr)
return [itm for itm in value if compiled(**{item: itm})]
@jekyll_filter_manager.registerDOCS
def find(value, attr, query):
"""Find elements from array using attribute value"""
for item in value:
try:
if _getattr(item, attr) == query:
return item
except (KeyError, AttributeError):
continue
return None
@jekyll_filter_manager.registerDOCS
@pass_environment
def find_exp(env, value, item, expr):
"""Find elements using expression"""
compiled = env.compile_expression(expr)
for itm in value:
try:
test = compiled(**{item: itm})
except AttributeError:
continue
if test:
return itm
return None
@jekyll_filter_manager.registerDOCS
@pass_environment
def group_by_expr(env, value, item, expr):
"""Group by data using expression"""
compiled = env.compile_expression(expr)
out = {}
for itm in value:
name = compiled(**{item: itm})
out.setdefault(name, []).append(itm)
return [{name: name, items: items} for name, items in out.items()]
@jekyll_filter_manager.registerDOCS
def xml_escape(input: str) -> str:
"""Convert an object into its String representation
Args:
input: The object to be converted
Returns:
The converted string
"""
if input is None:
return ""
from xml.sax.saxutils import escape
return escape(input)
@jekyll_filter_manager.registerDOCS
def cgi_escape(input: str) -> str:
"""CGI escape a string for use in a URL. Replaces any special characters
with appropriate %XX replacements.
Args:
input: The string to escape
Returns:
The escaped string
"""
return urllib.parse.quote_plus(input)
@jekyll_filter_manager.registerDOCS
def uri_escape(input: str) -> str:
"""URI escape a string.
Args:
input: The string to escape
Returns:
The escaped string
"""
return urllib.parse.quote(input, safe="!*'();:@&=+$,/?#[]")
# TODO: smartify, sassify, scssify
@jekyll_filter_manager.registerDOCS
def jsonify(input: Any) -> str:
"""Convert the input into json string
Args:
input: The Array or Hash to be converted
Returns:
The converted json string
"""
import json
return json.dumps(input)
@jekyll_filter_manager.registerDOCS
def array_to_sentence_string(
array: Sequence[str],
connector: str = "and",
) -> str:
"""Join an array of things into a string by separating with commas and the
word "and" for the last one.
Args:
array: The Array of Strings to join.
connector: Word used to connect the last 2 items in the array
Returns:
The formatted string.
"""
if len(array) == 0:
return ""
array = [str(elm) for elm in array]
if len(array) == 1:
return array[0]
if len(array) == 2:
return f"{array[0]} {connector} {array[1]}"
return ", ".join(array[:-1]) + f", {connector} {array[-1]}"
@jekyll_filter_manager.register("slugify")DOCS
def jekyll_slugify(input: str, mode: str = "default") -> str:
"""Slugify a string
Note that non-ascii characters are always translated to ascii ones.
Args:
input: The input string
mode: How string is slugified
Returns:
The slugified string
"""
if input is None or mode == "none":
return input
from slugify import slugify # type: ignore
if mode == "pretty":
return slugify(input, regex_pattern=r"[^_.~!$&'()+,;=@\w]+")
if mode == "raw":
return slugify(input, regex_pattern=r"\s+")
return slugify(input)
@jekyll_filter_manager.registerDOCS
def number_of_words(input: str, mode: str = None) -> int:
"""Count the number of words in the input string.
Args:
input: The String on which to operate.
mode: Passing 'cjk' as the argument will count every CJK character
detected as one word irrespective of being separated by whitespace.
Passing 'auto' (auto-detect) works similar to 'cjk'
Returns:
The word count.
"""
import regex
cjk_charset = r"\p{Han}\p{Katakana}\p{Hiragana}\p{Hangul}"
cjk_regex = fr"[{cjk_charset}]"
word_regex = fr"[^{cjk_charset}\s]+"
if mode == "cjk":
return len(regex.findall(cjk_regex, input)) + len(
regex.findall(word_regex, input)
)
if mode == "auto":
cjk_count = len(regex.findall(cjk_regex, input))
return (
len(input.split())
if cjk_count == 0
else cjk_count + len(regex.findall(word_regex, input))
)
return len(input.split())
@jekyll_filter_manager.registerDOCS
def markdownify(value):
"""Markdownify a string"""
from markdown import markdown # type: ignore
return markdown(value)
@jekyll_filter_manager.registerDOCS
def normalize_whitespace(value):
"""Replace multiple spaces into one"""
return re.sub(r"\s+", " ", value)
@jekyll_filter_manager.register("sort")DOCS
def jekyll_sort(
array: Sequence,
prop: str = None,
none_pos: str = "first",
) -> Sequence:
"""Sort an array in a reverse way by default.
Note that the order might be different than it with ruby. For example,
in python `"1abc" > "1"`, but it's not the case in jekyll. Also, it's
always in reverse order for property values.
Args:
array: The array
prop: property name
none_pos: None order (first or last).
Returns:
The sorted array
"""
if array is None:
raise ValueError("Cannot sort None object.")
if none_pos not in ("first", "last"):
raise ValueError(
f"{none_pos!r} is not a valid none_pos order. "
"It must be 'first' or 'last'."
)
if prop is None:
non_none_arr = [elm for elm in array if elm is not None]
n_none = len(array) - len(non_none_arr)
sorted_arr = list(sorted(non_none_arr, reverse=True))
if none_pos == "first":
return [None] * n_none + sorted_arr
return sorted_arr + [None] * n_none
non_none_arr = [
elm for elm in array if _getattr_multi(elm, prop) is not None
]
none_arr = [elm for elm in array if _getattr_multi(elm, prop) is None]
sorted_arr = list(
sorted(
non_none_arr,
key=lambda elm: _getattr_multi(elm, prop),
reverse=True,
)
)
if none_pos == "first":
return none_arr + sorted_arr
return sorted_arr + none_arr
@jekyll_filter_manager.registerDOCS
def sample(value, n: int = 1):
"""Sample elements from array"""
return random.sample(value, k=n)