Source code for pyflowreg.util.io.factory

from typing import Union, List
from pathlib import Path
import warnings

import numpy as np

from pyflowreg.util.io._base import VideoReader, VideoWriter



[docs]
def get_video_file_reader(
    input_source: Union[str, Path, np.ndarray, VideoReader, List[str]],
    buffer_size: int = 500,
    bin_size: int = 1,
    **kwargs,
) -> VideoReader:
    """
    Factory function to create appropriate reader based on input type.
    Mirrors MATLAB get_video_file_reader functionality.

    Args:
        input_source: Path to video file, numpy array, VideoReader instance,
                     list of paths for multichannel, or folder for images
        buffer_size: Buffer size for reading
        bin_size: Temporal binning factor
        **kwargs: Additional reader-specific arguments

    Returns:
        Appropriate VideoReader subclass instance
    """
    from pathlib import Path

    # Handle numpy arrays
    if isinstance(input_source, np.ndarray):
        from pyflowreg.util.io._arr import ArrayReader

        return ArrayReader(input_source, buffer_size, bin_size)

    # Handle VideoReader instances (already initialized)
    if isinstance(input_source, VideoReader):
        return input_source

    # Import readers here to avoid circular imports
    from pyflowreg.util.io.tiff import TIFFFileReader
    from pyflowreg.util.io.hdf5 import HDF5FileReader
    from pyflowreg.util.io.mat import MATFileReader
    from pyflowreg.util.io.mdf import MDFFileReader
    from pyflowreg.util.io.multifile_wrappers import MULTICHANNELFileReader

    # Handle multichannel input (list of files)
    if isinstance(input_source, list):
        return MULTICHANNELFileReader(input_source, buffer_size, bin_size, **kwargs)

    # From here on, treat as file path
    file_path = input_source
    path = Path(file_path)

    # Handle folder input (image sequence) - TODO: implement IMGFileReader
    if path.is_dir():
        # Check if folder contains images
        image_exts = {".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp"}
        has_images = any(
            f.suffix.lower() in image_exts for f in path.iterdir() if f.is_file()
        )
        if has_images:
            # TODO: Implement IMGFileReader for image folders
            raise NotImplementedError(
                "Image folder reading not yet implemented. Use TIFF stacks instead."
            )
        else:
            raise ValueError(f"Folder {file_path} does not contain images")

    # Handle file input
    if not path.exists():
        raise FileNotFoundError(f"File not found: {file_path}")

    ext = path.suffix.lower()

    readers = {
        ".tif": TIFFFileReader,
        ".tiff": TIFFFileReader,
        ".h5": HDF5FileReader,
        ".hdf5": HDF5FileReader,
        ".hdf": HDF5FileReader,
        ".mat": MATFileReader,
        ".mdf": MDFFileReader,
    }

    reader_class = readers.get(ext)
    if reader_class:
        return reader_class(str(file_path), buffer_size, bin_size, **kwargs)
    else:
        # Try to check if it's HDF5 without extension
        try:
            import h5py

            with h5py.File(str(file_path), "r"):
                return HDF5FileReader(str(file_path), buffer_size, bin_size, **kwargs)
        except Exception as e:
            # Not an HDF5 file - warn before raising error
            warnings.warn(f"File format detection failed: could not open as HDF5: {e}")

        # Try video formats as last resort
        # TODO: Implement AVIFileReader for video files
        raise ValueError(f"Unsupported file format: {ext}")




[docs]
def get_video_file_writer(file_path: str, output_format: str, **kwargs) -> VideoWriter:
    """
    Factory function to create appropriate writer based on output format.
    Mirrors MATLAB get_video_file_writer functionality.

    Args:
        file_path: Output file path
        output_format: Output format string (e.g., 'TIFF', 'HDF5', 'MAT', 'MULTIFILE_TIFF', 'ARRAY', 'NULL', etc.)
                       Special formats:
                       - 'ARRAY': Returns ArrayWriter for in-memory accumulation
                       - 'NULL': Returns NullVideoWriter that discards all frames (useful for callbacks only)
        **kwargs: Additional writer-specific arguments

    Returns:
        Appropriate VideoWriter subclass instance
    """

    # Import writers here to avoid circular imports
    from pyflowreg.util.io.tiff import TIFFFileWriter
    from pyflowreg.util.io.hdf5 import HDF5FileWriter
    from pyflowreg.util.io.mat import MATFileWriter
    from pyflowreg.util.io.multifile_wrappers import MULTIFILEFileWriter

    # Special handling for memory formats
    if output_format == "ARRAY":
        from pyflowreg.util.io._arr import ArrayWriter

        return ArrayWriter()

    # NULL writer - discards output without storage
    if output_format == "NULL":
        from pyflowreg.util.io._null import NullVideoWriter

        return NullVideoWriter()

    # Handle different output formats (matches MATLAB switch statement)
    if output_format == "TIFF":
        return TIFFFileWriter(file_path, **kwargs)
    elif output_format == "SUITE2P_TIFF":
        # TODO: Add suite2p-specific formatting
        return TIFFFileWriter(file_path, format="suite2p", **kwargs)
    elif output_format == "MAT":
        return MATFileWriter(file_path, **kwargs)
    elif output_format == "HDF5":
        return HDF5FileWriter(file_path, **kwargs)
    elif output_format == "MULTIFILE_TIFF":
        return MULTIFILEFileWriter(file_path, "TIFF", **kwargs)
    elif output_format == "MULTIFILE_MAT":
        return MULTIFILEFileWriter(file_path, "MAT", **kwargs)
    elif output_format == "MULTIFILE_HDF5":
        return MULTIFILEFileWriter(file_path, "HDF5", **kwargs)
    elif output_format == "CAIMAN_HDF5":
        # Multifile HDF5 with /mov dataset for CaImAn compatibility
        return MULTIFILEFileWriter(file_path, "HDF5", dataset_names="/mov", **kwargs)
    elif output_format == "BEGONIA":
        # TODO: Implement TSERIESH5_file_writer
        raise NotImplementedError("BEGONIA format not yet implemented")
    else:
        raise ValueError(f"Unsupported output format: {output_format}")




[docs]
def main():
    """Test wrapper implementations."""
    import tempfile
    from multifile_wrappers import (
        MULTICHANNELFileReader,
        SUBSETFileReader,
        MULTIFILEFileWriter,
    )

    # Create test data
    test_frames = np.random.randint(0, 255, (20, 64, 64, 2), dtype=np.uint8)

    # Test MULTIFILE writer
    print("Testing MULTIFILE writer...")
    with tempfile.TemporaryDirectory() as tmpdir:
        multifile_path = Path(tmpdir) / "test_multi"

        with MULTIFILEFileWriter(str(multifile_path), "TIFF") as writer:
            writer.write_frames(test_frames[:10])
            writer.write_frames(test_frames[10:])

        # Check files were created
        ch1_file = multifile_path / "compensated_ch1.TIFF"
        ch2_file = multifile_path / "compensated_ch2.TIFF"

        assert ch1_file.exists(), "Channel 1 file not created"
        assert ch2_file.exists(), "Channel 2 file not created"
        print("✓ MULTIFILE writer test passed")

        # Test MULTICHANNEL reader
        print("\nTesting MULTICHANNEL reader...")
        reader = MULTICHANNELFileReader([str(ch1_file), str(ch2_file)])

        print(f"Shape: {reader.shape}")
        print(f"Channels: {reader.n_channels}")

        # Read all frames
        all_frames = reader[:]
        assert all_frames.shape == (
            20,
            64,
            64,
            2,
        ), f"Shape mismatch: {all_frames.shape}"
        print("✓ MULTICHANNEL reader test passed")

        # Test SUBSET reader
        print("\nTesting SUBSET reader...")
        subset_indices = [0, 5, 10, 15, 19]
        subset_reader = SUBSETFileReader(reader, subset_indices)

        print(f"Subset shape: {subset_reader.shape}")
        assert subset_reader.frame_count == 5, "Subset frame count incorrect"

        subset_frames = subset_reader[:]
        assert subset_frames.shape == (
            5,
            64,
            64,
            2,
        ), f"Subset shape mismatch: {subset_frames.shape}"

        # Verify correct frames were selected
        for i, orig_idx in enumerate(subset_indices):
            np.testing.assert_array_equal(
                subset_frames[i],
                all_frames[orig_idx],
                err_msg=f"Frame {i} (original {orig_idx}) mismatch",
            )

        print("✓ SUBSET reader test passed")

        reader.close()

    print("\n✓ All wrapper tests passed!")



if __name__ == "__main__":
    main()