Skip to content

SOURCE CODE panpath.base DOCS

"""Base class for all PanPath path implementations."""

import os
import re
import sys
from pathlib import Path as PathlibPath, PurePosixPath
from typing import TYPE_CHECKING, Any, AsyncGenerator, List, Union

from panpath.registry import get_path_class

if TYPE_CHECKING:
    from panpath.clients import AsyncFileHandle


# URI scheme pattern
_URI_PATTERN = re.compile(r"^([a-z][a-z0-9+.-]*):\/\/", re.IGNORECASE)


def _parse_uri(path: str) -> tuple[Union[str, None], str]:
    """Parse URI to extract scheme and path.

    Args:
        path: Path string that may contain URI scheme

    Returns:
        Tuple of (scheme, path_without_scheme) or (None, path) for local paths
    """
    match = _URI_PATTERN.match(path)
    if match:
        scheme = match.group(1).lower()
        # Special handling for file:// URLs - strip to local path
        if scheme == "file":
            return None, path[7:]  # Keeps path from file://path
        return scheme, path
    return None, path


class PanPath(PathlibPath):DOCS
    """Universal path base class and factory.

    This class inherits from pathlib.Path and serves dual purposes:
    1. Base class for all path types in the panpath package
    2. Factory for creating appropriate path instances via __new__

    As a base class, it's inherited by:
    - LocalPath (local filesystem paths with sync and async methods)
    - CloudPath (cloud storage paths with sync and async methods)
    - All cloud-specific subclasses (GSPath, S3Path, AzurePath, etc.)

    As a factory, calling PanPath(...) returns the appropriate concrete implementation
    based on the URI scheme.

    Use `isinstance(obj, PanPath)` to check if an object is a path created by this package.

    Examples:
        >>> # Local path
        >>> path = PanPath("/local/file.txt")
        >>> isinstance(path, PanPath)
        True

        >>> # S3 path
        >>> path = PanPath("s3://bucket/key.txt")
        >>> isinstance(path, PanPath)
        True

        >>> # Async method with a_ prefix
        >>> content = await path.a_read_text()
    """

    def __new__(cls, *args: Any, **kwargs: Any) -> "PanPath":DOCS
        """Create and return the appropriate path instance.

        If called on a subclass, returns instance of that subclass.
        If called on PanPath itself, routes to the appropriate concrete class.
        """
        # If this is a subclass (not PanPath itself), use normal Path behavior
        if cls is not PanPath:
            # For CloudPath and its subclasses, we need special handling
            # since they inherit from PurePosixPath behavior
            if hasattr(cls, "_is_cloud_path") and cls._is_cloud_path:  # pragma: no cover
                # CloudPath subclasses use PurePosixPath-like behavior
                # Create via PurePosixPath mechanism
                return PurePosixPath.__new__(cls, *args)
            # For LocalPath, use pathlib.Path behavior
            return PathlibPath.__new__(cls, *args)

        # PanPath factory logic - only when called as PanPath(...) directly
        # Extract the first argument as the path
        if not args:
            args = ("",)  # Default to empty path if no args provided

        path = args[0]
        if isinstance(path, PanPath):
            # If already a PanPath instance, return as is
            return path

        path_str = str(path)

        # Parse URI to get scheme
        scheme, clean_path = _parse_uri(path_str)

        if scheme is None:
            # Local path - create a new args tuple with the clean path
            # This will be passed to LocalPath.__new__ and __init__
            from panpath.local_path import LocalPath

            new_args = (clean_path,) + args[1:]
            # Use PathlibPath.__new__() to properly initialize the path object
            instance = PathlibPath.__new__(LocalPath, *new_args)
            # In Python 3.10, __init__ doesn't accept arguments
            # In Python 3.12+, __init__ needs the arguments
            if sys.version_info >= (3, 12):
                LocalPath.__init__(instance, *new_args, **kwargs)  # type: ignore[no-untyped-call]
            else:  # pragma: no cover
                LocalPath.__init__(instance)  # type: ignore[no-untyped-call]
            return instance

        # Cloud path - look up in registry and instantiate
        try:
            path_class = get_path_class(scheme)
            return path_class(*args, **kwargs)  # type: ignore[no-any-return]
        except KeyError:
            raise ValueError(f"Unsupported URI scheme: {scheme!r}")

    # These methods are used for IDE type hinting and documentation generation.
    # Actual implementations are in LocalPath and CloudPath subclasses and
    # are routed via __new__.
    async def a_resolve(self) -> "PanPath":  # type: ignore[empty-body]DOCS
        """Resolve to absolute path (no-op for cloud paths).

        Returns:
            Self (cloud paths are already absolute)
        """

    async def a_exists(self) -> bool:  # type: ignore[empty-body]DOCS
        """Asynchronously check if the path exists.

        Returns:
            True if the path exists, False otherwise.
        """

    async def a_read_bytes(self) -> bytes:  # type: ignore[empty-body]DOCS
        """Asynchronously read the file's bytes.

        Returns:
            File content as bytes.
        """

    async def a_read_text(self, encoding: str = "utf-8") -> str:  # type: ignore[empty-body]DOCS
        """Asynchronously read the file's text content.

        Args:
            encoding: Text encoding to use (default: 'utf-8')

        Returns:
            File content as string.
        """

    async def a_write_bytes(self, data: bytes) -> Union[int, None]:DOCS
        """Asynchronously write bytes to the file.

        Args:
            data: Bytes to write to the file.

        Returns:
            Number of bytes written. For some cloud paths, may return None.
        """

    async def a_write_text(  # type: ignore[empty-body]DOCS
        self,
        data: str,
        encoding: str = "utf-8",
    ) -> int:
        """Asynchronously write text to the file.

        Args:
            data: Text to write to the file.
            encoding: Text encoding to use (default: 'utf-8')

        Returns:
            Number of characters written.
        """

    async def a_unlink(self, missing_ok: bool = False) -> None:DOCS
        """Asynchronously remove (delete) the file or empty directory.

        Args:
            missing_ok: If True, does not raise an error if the file does not exist.
        """

    async def a_iterdir(self) -> AsyncGenerator["PanPath", None]:  # type: ignore[empty-body]DOCS
        """Asynchronously iterate over directory contents.

        Yields:
            PanPath instances for each item in the directory.
        """

    async def a_is_dir(self) -> bool:  # type: ignore[empty-body]DOCS
        """Asynchronously check if the path is a directory.

        Returns:
            True if the path is a directory, False otherwise.
        """

    async def a_is_file(self) -> bool:  # type: ignore[empty-body]DOCS
        """Asynchronously check if the path is a file.

        Returns:
            True if the path is a file, False otherwise.
        """

    async def a_stat(  # type: ignore[empty-body]DOCS
        self,
        follow_symlinks: bool = True,
    ) -> os.stat_result:
        """Asynchronously get the file or directory's status information.

        Returns:
            An object containing file status information (platform-dependent).
        """

    async def a_mkdir(DOCS
        self,
        mode: int = 0o777,
        parents: bool = False,
        exist_ok: bool = False,
    ) -> None:
        """Asynchronously create a directory at this path.

        Args:
            mode: Directory mode (permissions) to set.
            parents: If True, create parent directories as needed.
            exist_ok: If True, does not raise an error if the directory already exists.
        """

    async def a_glob(  # type: ignore[empty-body]DOCS
        self,
        pattern: str,
    ) -> AsyncGenerator["PanPath", None]:
        """Asynchronously yield paths matching a glob pattern.

        Args:
            pattern: Glob pattern to match.

        Returns:
            List of PanPath instances matching the pattern.
        """

    async def a_rglob(  # type: ignore[empty-body]DOCS
        self,
        pattern: str,
    ) -> AsyncGenerator["PanPath", None]:
        """Asynchronously yield paths matching a recursive glob pattern.

        Args:
            pattern: Recursive glob pattern to match.

        Returns:
            List of PanPath instances matching the pattern.
        """

    async def a_walk(  # type: ignore[empty-body]DOCS
        self,
    ) -> AsyncGenerator[tuple["PanPath", List[str], List[str]], None]:
        """Asynchronously walk the directory tree.

        Yields:
            Tuples of (current_path, dirnames, filenames) at each level.
        """

    async def a_touch(DOCS
        self,
        mode: int = 0o666,
        exist_ok: bool = True,
    ) -> None:
        """Asynchronously create the file if it does not exist.

        Args:
            mode: File mode (permissions) to set if creating the file.
            exist_ok: If False, raises an error if the file already exists.
        """

    async def a_rename(  # type: ignore[empty-body]DOCS
        self,
        target: Union[str, "PathlibPath"],
    ) -> "PanPath":
        """Asynchronously rename this path to the target path.

        Args:
            target: New path to rename to.

        Returns:
            The renamed PanPath instance.
        """

    async def a_replace(  # type: ignore[empty-body]DOCS
        self,
        target: Union[str, "PathlibPath"],
    ) -> "PanPath":
        """Asynchronously replace this path with the target path.

        Args:
            target: New path to replace with.

        Returns:
            The replaced PanPath instance.
        """

    async def a_rmdir(self) -> None:DOCS
        """Asynchronously remove the directory and its contents recursively."""

    async def a_is_symlink(self) -> bool:  # type: ignore[empty-body]DOCS
        """Asynchronously check if the path is a symbolic link.

        For local path, this checks if the path is a symlink.
        For cloud paths, this will check if the object has a metdata flag indicating it's a symlink.
        Note that it is not a real symlink like in local filesystems.
        But for example, gcsfuse supports symlink-like behavior via metadata.

        Returns:
            True if the path is a symlink, False otherwise.
        """

    async def a_readlink(self) -> "PanPath":  # type: ignore[empty-body]DOCS
        """Asynchronously read the target of the symbolic link.

        For local path, this reads the symlink target.
        For cloud paths, this reads the metadata flag indicating the symlink target.

        Returns:
            The target PanPath of the symlink.
        """

    async def a_symlink_to(DOCS
        self,
        target: Union[str, "PathlibPath"],
        target_is_directory: bool = False,
    ) -> None:
        """Asynchronously create a symbolic link pointing to the target path.

        For local path, this creates a real symlink.
        For cloud paths, this sets a metadata flag indicating the symlink target.

        Args:
            target: The target PanPath the symlink points to.
            target_is_directory: Whether the target is a directory (ignored for cloud paths).
        """

    async def a_rmtree(self, ignore_errors: bool = False, onerror: Any = None) -> None:DOCS
        """Asynchronously remove the directory and all its contents recursively.

        Args:
            ignore_errors: If True, ignores errors during removal.
            onerror: Optional function to call on errors.
        """

    async def a_copy(  # type: ignore[empty-body]DOCS
        self,
        target: Union[str, "PathlibPath"],
    ) -> "PanPath":
        """Asynchronously copy this path to the target path.

        Args:
            target: Destination PanPath to copy to.

        Returns:
            The copied PanPath instance.
        """

    async def a_copytree(  # type: ignore[empty-body]DOCS
        self,
        target: Union[str, "PathlibPath"],
        follow_symlinks: bool = True,
    ) -> "PanPath":
        """Asynchronously copy the directory and all its contents recursively to the target path.

        Args:
            target: Destination PanPath to copy to.
            follow_symlinks: If True, copies the contents of symlinks.

        Returns:
            The copied PanPath instance.
        """

    def a_open(  # type: ignore[empty-body]DOCS
        self,
        mode: str = "r",
        encoding: str = "utf-8",
        **kwargs: Any,
    ) -> "AsyncFileHandle":
        """Asynchronously open the file and return an async file handle.

        Args:
            mode: Mode to open the file (e.g., 'r', 'rb', 'w', 'wb').
            encoding: Text encoding to use (default: 'utf-8').
            **kwargs: Additional arguments to pass to the underlying open method.

        Returns:
            An async file handle.
        """

    # backports
    def walk(self) -> Any:DOCS
        """Walk the directory tree.

        Yields:
            Tuples of (current_path, dirnames, filenames) at each level.
        """