Source code for photon_mosaic.preprocessing.noop

"""
No-operation preprocessing step using symlinks.

This preprocessing step creates symbolic links to the original files instead
of copying them, which is much faster and saves disk space for large files.
"""

import logging
import shutil
from pathlib import Path

logger = logging.getLogger(__name__)


[docs] def run( dataset_folder: Path, output_folder: Path, tiff_name: str, **kwargs, ): """ No-operation preprocessing step using symlinks instead of copying. Creates a symbolic link to the original TIFF file in the output directory, avoiding the need to copy large files when no processing is required. Parameters ---------- dataset_folder : Path Path to the dataset folder containing the input TIFF files. output_folder : Path Path to the output folder where symlinks will be created. tiff_name : str Name of the TIFF file to symlink. **kwargs : dict Additional keyword arguments (unused). """ # Convert paths to Path objects if they're strings if isinstance(dataset_folder, str): dataset_folder = Path(dataset_folder) if isinstance(output_folder, str): output_folder = Path(output_folder) # Create output directory output_folder.mkdir(parents=True, exist_ok=True) # Define input and output paths output_file = output_folder / tiff_name # Skip if symlink already exists and points to the right location if output_file.is_symlink(): logger.info(f"Symlink already exists: {output_file}") return elif output_file.exists(): logger.warning( f"File exists but is not a symlink: {output_file}. Removing." ) output_file.unlink() # Try to find the input file input_file = dataset_folder / tiff_name if not input_file.exists(): # Use rglob to find the file recursively try: input_file = next(dataset_folder.rglob(tiff_name)) logger.info( f"Found input file using recursive search: {input_file}" ) except StopIteration: raise FileNotFoundError( f"Could not find {tiff_name} in {dataset_folder}" ) # Create symlink try: output_file.symlink_to(input_file.resolve()) logger.info( f"Created symlink: {output_file} -> {input_file.resolve()}" ) except OSError as e: logger.error(f"Failed to create symlink: {e}") # Fallback to copying if symlink fails (e.g., cross-filesystem) logger.info( f"Falling back to copying file: {input_file} -> {output_file}" ) shutil.copy2(input_file, output_file)