Skip to content

SOURCE CODE datar.core.names DOCS

"""Name repairing"""
import inspect
import re
import keyword
import math
from numbers import Number
from typing import Any, Callable, List, Union, Iterable, Tuple

from .utils import logger


class NameNonUniqueError(ValueError):DOCS
    """Error for non-unique names"""


def _isnan(x: Any) -> bool:
    """Check if x is nan"""
    return isinstance(x, Number) and math.isnan(x)


def _is_scalar(x: Any) -> bool:
    """Check if x is scalar"""
    if isinstance(x, str):  # pragma: no cover
        return True
    try:
        iter(x)
    except TypeError:
        return True
    return False


def _log_changed_names(changed_names: List[Tuple[str, str]]) -> None:
    """Log the changed names"""
    if not changed_names:
        return

    logger.warning("New names:")
    for orig_name, new_name in changed_names:
        logger.warning("* %r -> %r", orig_name, new_name)


def _repair_names_minimal(names: Iterable[str]) -> List[str]:
    """Minimal repairing"""
    return ["" if name is None or _isnan(name) else str(name) for name in names]


def _repair_names_unique(
    names: Iterable[str],
    quiet: bool = False,
    sanitizer: Callable = None,
) -> List[str]:
    """Make sure names are unique"""
    min_names = _repair_names_minimal(names)
    neat_names = [
        re.sub(r"(?:(?<!_)_{1,2}\d+|(?<!_)__)+$", "", name)
        for name in min_names
    ]
    if callable(sanitizer):
        neat_names = [sanitizer(name) for name in neat_names]

    new_names = []
    changed_names = []
    for i, name in enumerate(names):
        neat_name = neat_names[i]
        if neat_names.count(neat_name) > 1 or neat_name == "":
            neat_name = f"{neat_name}__{i}"
        if neat_name != name:
            changed_names.append((name, neat_name))
        new_names.append(neat_name)
    if not quiet:
        _log_changed_names(changed_names)
    return new_names


def _repair_names_universal(
    names: Iterable[str],
    quiet: bool = False,
) -> List[str]:
    """Make sure names are safely to be used as variable or attribute"""
    min_names = _repair_names_minimal(names)
    neat_names = [re.sub(r"[^\w]", "_", name) for name in min_names]
    new_names = _repair_names_unique(
        neat_names,
        quiet=True,
        sanitizer=lambda name: (
            f"_{name}"
            if keyword.iskeyword(name) or (name and name[0].isdigit())
            else name
        ),
    )
    if not quiet:
        changed_names = [
            (orig_name, new_name)
            for orig_name, new_name in zip(names, new_names)
            if orig_name != new_name
        ]
        _log_changed_names(changed_names)
    return new_names


def _repair_names_check_unique(names: Iterable[str]) -> Iterable[str]:
    """Just check the uniqueness"""
    for name in names:
        if names.count(name) > 1:
            raise NameNonUniqueError(f"Names must be unique: {name}")
        if name == "" or _isnan(name):
            raise NameNonUniqueError(f"Names can't be empty: {name}")
        if re.search(r"(?:(?<!_)_{2}\d+|(?<!_)__)+$", str(name)):
            raise NameNonUniqueError(
                f"Names can't be of the form `__` or `_j`: {name}"
            )
    return names


BUILTIN_REPAIR_METHODS = dict(
    minimal=_repair_names_minimal,
    unique=_repair_names_unique,
    universal=_repair_names_universal,
    check_unique=_repair_names_check_unique,
)


def repair_names(DOCS
    names: Iterable[str],
    repair: Union[str, Callable],
) -> List[str]:
    """Repair names based on the method

    Args:
        names: The names to be repaired
        repair: The method to repair
            - `minimal`: Minimal names are never None or NA.
                When an element doesn't have a name, its minimal name
                is an empty string.
            - `unique`: Unique names are unique. A suffix is appended to
                duplicate names to make them unique.
            - `universal`: Universal names are unique and syntactic,
                meaning that you can safely use the names as variables without
                causing a syntax error (like `f.<name>`).
            - A function, accepts either a list of names or a single name.
                Function accepts a list of names must annotate the first
                argument with `typing.Iterable` or `typing.Sequence`.

    Examples:
        >>> repair_names([None]*3, repair="minimal")
        >>> # ["", "", ""]
        >>> repair_names(["x", NA], repair="minimal")
        >>> # ["x", ""]
        >>> repair_names(["", "x", "", "y", "x", "_2", "__"], repair="unique")
        >>> # ["__1", "x__2", "__3", "y", "x__5", "__6", "__7"]
        >>> repair_names(["", "x", NA, "x"], repair="universal")
        >>> # ["__1", "x__2", "__3", "x__4"]
        >>> repair_names(["(y)"  "_z"  ".2fa"  "False"], repair="universal")
        >>> # ["_y_", "_z", "_2fa", "_False"]

    Returns:
        The repaired names

    Raises:
        ValueError: when repair is not a string or callable
        NameNonUniqueError: when check_unique fails
    """
    if isinstance(repair, str):
        repair = BUILTIN_REPAIR_METHODS[repair]  # type: ignore
    elif (
        not _is_scalar(repair)
        and all(isinstance(elem, str) for elem in repair)
    ):
        return repair  # type: ignore
    elif not callable(repair):
        raise ValueError("Expect a function for name repairing.")

    parameters = inspect.signature(repair).parameters  # type: ignore
    annotation = list(parameters.values())[0].annotation
    if annotation is inspect._empty or annotation._name not in (
        "Iterable",
        "Sequence",
    ):  # scalar input
        return [repair(name) for name in names]

    return repair(names)