Source code for homodyne.optimization.exceptions

"""Custom exceptions for NLSQ optimization.

This module defines a comprehensive exception hierarchy for handling
errors specific to NLSQ optimization, including convergence failures,
numerical instabilities, and checkpoint-related issues.

The exception hierarchy enables fine-grained error handling and recovery
strategies tailored to specific failure modes.

Exception Hierarchy:
    NLSQOptimizationError (base)
    ├── NLSQConvergenceError (convergence failures)
    ├── NLSQNumericalError (NaN/Inf issues)
    └── NLSQCheckpointError (checkpoint save/load failures)

Examples
--------
Catching specific errors for targeted recovery:

>>> try:
...     result = optimizer.fit(data, model, p0)
... except NLSQNumericalError as e:
...     # Handle NaN/Inf with learning rate reduction
...     result = optimizer.fit(data, model, p0, learning_rate=0.5*lr)
... except NLSQConvergenceError as e:
...     # Handle convergence failure with perturbation
...     p0_perturbed = p0 * (1 + 0.01 * np.random.randn(*p0.shape))
...     result = optimizer.fit(data, model, p0_perturbed)

Using base exception for generic handling:

>>> try:
...     result = optimizer.fit(data, model, p0)
... except NLSQOptimizationError as e:
...     logger.error(f"Optimization failed: {e}")
...     # Fallback to simpler strategy
...     result = use_fallback_strategy()

Notes
-----
All exceptions inherit from `NLSQOptimizationError`, enabling catch-all
error handling while also supporting fine-grained recovery strategies.

The exception messages are designed to be actionable, providing specific
guidance on how to address each type of failure.

See Also
--------
NLSQWrapper : Main optimization wrapper using these exceptions
homodyne.optimization.strategy : Strategy selection and fallback logic
"""

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    import numpy as np


[docs] class NLSQOptimizationError(Exception): """Base exception for all NLSQ optimization errors. This is the base class for all NLSQ-related exceptions. Catching this exception will catch all optimization failures regardless of their specific cause. Attributes ---------- message : str Detailed error message error_context : dict Additional context about the error (parameters, data characteristics, etc.) Examples -------- >>> try: ... result = optimizer.fit(data, model, p0) ... except NLSQOptimizationError as e: ... print(f"Optimization failed: {e}") ... print(f"Context: {e.error_context}") """
[docs] def __init__(self, message: str, error_context: dict | None = None): """Initialize base optimization error. Parameters ---------- message : str Detailed error message error_context : dict, optional Additional context about the error """ super().__init__(message) self.error_context = error_context or {}
[docs] def __str__(self) -> str: """Return formatted error message with context.""" base_msg = super().__str__() if self.error_context: context_str = ", ".join(f"{k}={v}" for k, v in self.error_context.items()) return f"{base_msg} (context: {context_str})" return base_msg
[docs] class NLSQConvergenceError(NLSQOptimizationError): """Raised when NLSQ optimization fails to converge. This exception indicates that the optimizer could not find a satisfactory solution within the specified constraints (maximum iterations, tolerance, etc.). Common Causes ------------- - Poor initial guess (p0 too far from optimum) - Overly restrictive parameter bounds - Insufficient maximum iterations - Model function incompatible with data - Local minimum trap Recovery Strategies ------------------- 1. Perturb initial guess: `p0 * (1 + 0.05 * np.random.randn(*p0.shape))` 2. Relax bounds: Increase parameter search space 3. Increase max iterations: Allow more optimization steps 4. Try different optimization method: Switch between 'trf' and 'lm' 5. Simplify model: Use fewer parameters Attributes ---------- iteration_count : int Number of iterations completed before failure final_loss : float Final loss value at termination parameters : np.ndarray Parameter values at termination Examples -------- >>> try: ... result = optimizer.fit(data, model, p0, max_iter=100) ... except NLSQConvergenceError as e: ... print(f"Failed after {e.iteration_count} iterations") ... print(f"Final loss: {e.final_loss}") ... # Retry with more iterations ... result = optimizer.fit(data, model, p0, max_iter=500) """
[docs] def __init__( self, message: str, iteration_count: int | None = None, final_loss: float | None = None, parameters: np.ndarray | None = None, error_context: dict | None = None, ): """Initialize convergence error. Parameters ---------- message : str Detailed error message iteration_count : int, optional Number of iterations completed final_loss : float, optional Final loss value parameters : np.ndarray, optional Parameter values at termination error_context : dict, optional Additional context """ context = error_context or {} if iteration_count is not None: context["iteration_count"] = iteration_count if final_loss is not None: context["final_loss"] = final_loss if parameters is not None: context["n_params"] = len(parameters) super().__init__(message, context) self.iteration_count = iteration_count self.final_loss = final_loss self.parameters = parameters
[docs] class NLSQNumericalError(NLSQOptimizationError): """Raised for NaN/Inf numerical stability issues. This exception indicates that the optimization encountered numerical instabilities such as NaN (Not a Number) or Inf (Infinity) values during computation. Common Causes ------------- - Gradient overflow/underflow - Division by zero in model function - Exponential overflow in parameters - Ill-conditioned Jacobian matrix - Learning rate too large Detection Points ---------------- 1. After gradient computation: `jnp.isfinite(gradients).all()` 2. After parameter update: `jnp.isfinite(new_params).all()` 3. After loss calculation: `jnp.isfinite(loss_value)` Recovery Strategies ------------------- 1. Reduce learning rate: `lr = 0.5 * lr` 2. Scale data: Normalize inputs to [0, 1] range 3. Add numerical stability: Use log-transform for exponentials 4. Check model function: Ensure JAX-compatible operations 5. Adjust parameter bounds: Prevent extreme values Attributes ---------- detection_point : str Where NaN/Inf was detected ('gradient', 'parameter', 'loss') invalid_values : list Description of invalid values found Examples -------- >>> try: ... result = optimizer.fit(data, model, p0) ... except NLSQNumericalError as e: ... if e.detection_point == 'gradient': ... # Reduce learning rate ... result = optimizer.fit(data, model, p0, learning_rate=0.01) ... elif e.detection_point == 'parameter': ... # Tighten bounds ... bounds = (lower * 0.8, upper * 0.8) ... result = optimizer.fit(data, model, p0, bounds=bounds) """
[docs] def __init__( self, message: str, detection_point: str | None = None, invalid_values: list | None = None, error_context: dict | None = None, ): """Initialize numerical error. Parameters ---------- message : str Detailed error message detection_point : str, optional Where NaN/Inf was detected invalid_values : list, optional Description of invalid values error_context : dict, optional Additional context """ context = error_context or {} if detection_point: context["detection_point"] = detection_point if invalid_values: context["n_invalid"] = len(invalid_values) super().__init__(message, context) self.detection_point = detection_point self.invalid_values = invalid_values or []
[docs] class NLSQCheckpointError(NLSQOptimizationError): """Raised for checkpoint save/load/resume failures. This exception indicates that the streaming optimizer encountered an error while saving checkpoints, loading checkpoints, or resuming from a checkpoint. Common Causes ------------- - Checkpoint file corrupted - Insufficient disk space - Invalid checkpoint path - HDF5 file lock conflict - Version mismatch in checkpoint format - Missing checkpoint metadata Recovery Strategies ------------------- 1. Disable checkpoints: `config.enable_checkpoints = False` 2. Change checkpoint directory: Use different storage location 3. Clear old checkpoints: Remove corrupted checkpoint files 4. Start fresh: `config.resume_from_checkpoint = False` 5. Reduce checkpoint frequency: Save less often to avoid I/O issues Attributes ---------- checkpoint_path : str Path to the checkpoint file involved operation : str Operation that failed ('save', 'load', 'resume', 'validate') io_error : Exception Original I/O exception if available Examples -------- >>> try: ... config = HybridStreamingConfig(enable_checkpoints=True) ... optimizer = AdaptiveHybridStreamingOptimizer(config) ... result = optimizer.fit(data, model, p0) ... except NLSQCheckpointError as e: ... if e.operation == 'load': ... # Start fresh if checkpoint is corrupted ... config = HybridStreamingConfig(enable_checkpoints=False) ... optimizer = AdaptiveHybridStreamingOptimizer(config) ... result = optimizer.fit(data, model, p0) ... elif e.operation == 'save': ... # Continue without checkpoints ... config = HybridStreamingConfig(enable_checkpoints=False) ... optimizer = AdaptiveHybridStreamingOptimizer(config) ... result = optimizer.fit(data, model, p0) """
[docs] def __init__( self, message: str, checkpoint_path: str | None = None, operation: str | None = None, io_error: Exception | None = None, error_context: dict | None = None, ): """Initialize checkpoint error. Parameters ---------- message : str Detailed error message checkpoint_path : str, optional Path to checkpoint file operation : str, optional Operation that failed io_error : Exception, optional Original I/O exception error_context : dict, optional Additional context """ context = error_context or {} if checkpoint_path: context["checkpoint_path"] = checkpoint_path if operation: context["operation"] = operation if io_error: context["io_error_type"] = type(io_error).__name__ super().__init__(message, context) self.checkpoint_path = checkpoint_path self.operation = operation self.io_error = io_error