"""Path validation utilities for secure file operations.
This module provides path validation functions to prevent path traversal
attacks and ensure safe file operations for save_path parameters.
Security fixes implemented as part of code review remediation (Dec 2025).
Addresses CVSS 7.5 path traversal vulnerability (VUL-001).
"""
from __future__ import annotations
from pathlib import Path
from homodyne.utils.logging import get_logger
logger = get_logger(__name__)
[docs]
class PathValidationError(ValueError):
"""Raised when path validation fails due to security concerns."""
pass
[docs]
def validate_save_path(
path: str | Path | None,
*,
allowed_extensions: tuple[str, ...] | None = None,
require_parent_exists: bool = True,
allow_absolute: bool = True,
base_dir: Path | None = None,
) -> Path | None:
"""Validate and sanitize a file save path.
Prevents path traversal attacks and ensures the path is safe for
file operations.
Parameters
----------
path : str | Path | None
Path to validate. If None, returns None.
allowed_extensions : tuple[str, ...], optional
Allowed file extensions (e.g., ('.png', '.pdf')).
If None, all extensions are allowed.
require_parent_exists : bool, default=True
If True, validates that the parent directory exists.
allow_absolute : bool, default=True
If True, absolute paths are allowed.
If False, only relative paths are allowed.
base_dir : Path, optional
Base directory for relative paths. If provided, the resolved
path must be within this directory (prevents path traversal).
Defaults to current working directory.
Returns
-------
Path | None
Validated and resolved Path object, or None if path is None.
Raises
------
PathValidationError
If path validation fails due to security concerns.
ValueError
If path has invalid extension or parent doesn't exist.
Examples
--------
>>> validate_save_path("output/results.png")
PosixPath('/current/dir/output/results.png')
>>> validate_save_path("../../../etc/passwd")
PathValidationError: Path traversal detected
>>> validate_save_path("/tmp/test.png", allow_absolute=False)
PathValidationError: Absolute paths not allowed
"""
if path is None:
return None
# Reject null bytes before Path conversion (security: prevents null-byte injection;
# Python 3.13+ may not raise ValueError for embedded nulls on all platforms)
if isinstance(path, str) and "\x00" in path:
raise PathValidationError(f"Null bytes not allowed in path: {path!r}")
# Convert to Path object
path = Path(path)
# Check for path traversal by inspecting each path component.
# Using parts instead of a raw string search avoids false positives for
# filenames like "version..2.png" which legitimately contain "..".
# Also split on backslashes to catch Windows-style traversal on POSIX systems
# (e.g., "..\\..\\etc\\passwd" which Path treats as a single component on Linux).
path_str = str(path)
# Gather components from both the POSIX parts and any backslash-delimited segments
raw_components = set(path.parts)
for segment in path_str.replace("\\", "/").split("/"):
raw_components.add(segment)
if ".." in raw_components:
raise PathValidationError(
f"Path traversal detected: path contains '..': {_sanitize_log_path(path_str)}"
)
# Check absolute path permission
if path.is_absolute() and not allow_absolute:
raise PathValidationError(
f"Absolute paths not allowed: {_sanitize_log_path(path_str)}"
)
# Resolve the path (normalize)
if base_dir is None:
base_dir = Path.cwd()
else:
base_dir = Path(base_dir).resolve()
if path.is_absolute():
resolved_path = path.resolve()
# For explicitly allowed absolute paths, skip base_dir containment check
# The ".." check above already prevents traversal attacks
else:
resolved_path = (base_dir / path).resolve()
# For relative paths, verify resolved path is within base_dir
# (prevents traversal via symlinks in relative paths)
try:
resolved_path.relative_to(base_dir)
except ValueError as e:
# Path is outside base_dir
raise PathValidationError(
f"Path resolves outside allowed directory: "
f"{_sanitize_log_path(str(resolved_path))} is not within "
f"{_sanitize_log_path(str(base_dir))}"
) from e
# Check extension
if allowed_extensions is not None:
suffix = resolved_path.suffix.lower()
if suffix not in allowed_extensions:
raise ValueError(
f"Invalid file extension '{suffix}'. "
f"Allowed: {', '.join(allowed_extensions)}"
)
# Check parent directory exists
if require_parent_exists:
parent = resolved_path.parent
if not parent.exists():
raise ValueError(
f"Parent directory does not exist: {_sanitize_log_path(str(parent))}"
)
if not parent.is_dir():
raise ValueError(
f"Parent path is not a directory: {_sanitize_log_path(str(parent))}"
)
return resolved_path
[docs]
def validate_plot_save_path(
path: str | Path | None,
*,
require_parent_exists: bool = True,
) -> Path | None:
"""Validate a save path for plot files.
Convenience wrapper for validate_save_path with plot-specific defaults.
Parameters
----------
path : str | Path | None
Path to validate.
require_parent_exists : bool, default=True
If True, validates that the parent directory exists.
Returns
-------
Path | None
Validated Path object or None.
Raises
------
PathValidationError
If path validation fails.
ValueError
If extension is not a valid image format.
Examples
--------
>>> validate_plot_save_path("results/trace_plot.png")
PosixPath('/current/dir/results/trace_plot.png')
"""
# Common plot file extensions
allowed_extensions = (
".png",
".pdf",
".svg",
".eps",
".jpg",
".jpeg",
".tiff",
".tif",
)
return validate_save_path(
path,
allowed_extensions=allowed_extensions,
require_parent_exists=require_parent_exists,
allow_absolute=True,
)
def _sanitize_log_path(path: str, max_length: int = 50) -> str:
"""Sanitize path for logging to prevent log injection.
Parameters
----------
path : str
Path string to sanitize.
max_length : int
Maximum length of returned string.
Returns
-------
str
Sanitized path safe for logging.
"""
# Remove potentially dangerous characters for log injection
sanitized = path.replace("\n", "\\n").replace("\r", "\\r").replace("\t", "\\t")
# Truncate if too long (hide potentially sensitive deep paths)
if len(sanitized) > max_length:
# Show beginning and end
return f"{sanitized[:20]}...{sanitized[-20:]}"
return sanitized
[docs]
def get_safe_output_dir(
output_dir: str | Path | None = None,
default_subdir: str = "homodyne_output",
) -> Path:
"""Get a safe output directory, creating it if necessary.
Parameters
----------
output_dir : str | Path | None
Requested output directory. If None, uses cwd/default_subdir.
default_subdir : str
Default subdirectory name if output_dir is None.
Returns
-------
Path
Validated and existing output directory.
Raises
------
PathValidationError
If the path is invalid or unsafe.
PermissionError
If directory cannot be created due to permissions.
"""
if output_dir is None:
output_dir = Path.cwd() / default_subdir
else:
output_dir = Path(output_dir)
# Validate path doesn't contain traversal (component-level check,
# matching validate_save_path to avoid false positives like "version..2")
path_str = str(output_dir)
raw_components = set(output_dir.parts)
for segment in path_str.replace("\\", "/").split("/"):
raw_components.add(segment)
if ".." in raw_components:
raise PathValidationError(
f"Path traversal detected in output directory: "
f"{_sanitize_log_path(path_str)}"
)
# Resolve and create if needed
resolved = output_dir.resolve()
if not resolved.exists():
try:
resolved.mkdir(parents=True, exist_ok=True)
logger.debug(
f"Created output directory: {_sanitize_log_path(str(resolved))}"
)
except OSError as e:
raise PermissionError(
f"Cannot create output directory: {_sanitize_log_path(str(resolved))}"
) from e
if not resolved.is_dir():
raise PathValidationError(
f"Output path exists but is not a directory: "
f"{_sanitize_log_path(str(resolved))}"
)
return resolved