Project-OSmOSE · Gautzilla · Sep 23, 2025 · Sep 24, 2025 · Sep 24, 2025 · Sep 24, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,6 +29,11 @@ classifiers = [
 [project.scripts]
 OSEkit = "OSmOSE:main"
 
+[project.optional-dependencies]
+mseed = [
+    "obspy",
+]
+
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"

diff --git a/src/osekit/audio_backend/audio_backend.py b/src/osekit/audio_backend/audio_backend.py
@@ -0,0 +1,49 @@
+from os import PathLike
+from typing import Protocol
+
+import numpy as np
+
+
+class AudioBackend(Protocol):
+    def info(self, path: PathLike | str) -> tuple[int, int, int]:
+        """Return the sample rate, number of frames and channels of the audio file.
+
+        Parameters
+        ----------
+        path: PathLike | str
+            Path to the audio file.
+
+        Returns
+        -------
+        tuple[int,int,int]:
+            Sample rate, number of frames and channels of the audio file.
+
+        """
+        ...
+
+    def read(self, path: PathLike | str, start: int, stop: int) -> np.ndarray:
+        """Read the content of an audio file.
+
+        If the audio file is not the current opened file,
+        the current opened file is switched.
+
+        Parameters
+        ----------
+        path: PathLike | str
+            Path to the audio file.
+        start: int
+            First frame to read.
+        stop: int
+            Frame after the last frame to read.
+
+        Returns
+        -------
+        np.ndarray:
+            A (channel * frames) array containing the audio data.
+
+        """
+        ...
+
+    def close(self) -> None:
+        """Close the currently opened file."""
+        ...
diff --git a/src/osekit/core_api/audio_file_manager.py → ...sekit/audio_backend/audio_file_manager.py b/src/osekit/core_api/audio_file_manager.py → ...sekit/audio_backend/audio_file_manager.py
@@ -5,9 +5,11 @@
 
 from __future__ import annotations
 
+from pathlib import Path
 from typing import TYPE_CHECKING
 
-import soundfile as sf
+from osekit.audio_backend.mseed_backend import MSeedBackend
+from osekit.audio_backend.soundfile_backend import SoundFileBackend
 
 if TYPE_CHECKING:
     from os import PathLike
@@ -20,25 +22,40 @@ class AudioFileManager:
 
     def __init__(self) -> None:
         """Initialize an audio file manager."""
-        self.opened_file = None
+        self._soundfile = SoundFileBackend()
+        self._mseed: MSeedBackend | None = None
 
     def close(self) -> None:
         """Close the currently opened file."""
-        if self.opened_file is None:
-            return
-        self.opened_file.close()
-        self.opened_file = None
-
-    def _open(self, path: PathLike | str) -> None:
-        self.opened_file = sf.SoundFile(path, "r")
-
-    def _switch(self, path: PathLike | str) -> None:
-        if self.opened_file is None:
-            self._open(path)
-        if self.opened_file.name == str(path):
-            return
-        self.close()
-        self._open(path)
+        self._soundfile.close()
+        if self._mseed:
+            self._mseed.close()
+
+    def _backend(self, path: PathLike | str) -> SoundFileBackend | MSeedBackend:
+        suffix = Path(path).suffix.lower()
+
+        if suffix == ".mseed":
+            if self._mseed is None:
+                self._mseed = MSeedBackend()
+            return self._mseed
+
+        return self._soundfile
+
+    def info(self, path: PathLike | str) -> tuple[int, int, int]:
+        """Return the sample rate, number of frames and channels of the audio file.
+
+        Parameters
+        ----------
+        path: PathLike | str
+            Path to the audio file.
+
+        Returns
+        -------
+        tuple[int,int,int]:
+            Sample rate, number of frames and channels of the audio file.
+
+        """
+        return self._backend(path).info(path)
 
     def read(
         self,
@@ -57,7 +74,7 @@ def read(
             Path to the audio file.
         start: int
             First frame to read.
-        stop: int
+        stop: int | None
             Frame after the last frame to read.
 
         Returns
@@ -66,42 +83,22 @@ def read(
             A (channel * frames) array containing the audio data.
 
         """
-        self._switch(path)
         _, frames, _ = self.info(path)
+
+        if stop is None:
+            stop = frames
+
         if stop is None:
             stop = frames
 
         if not 0 <= start < frames:
-            raise ValueError(
-                "Start should be between 0 and the last frame of the audio file.",
-            )
+            msg = "Start should be between 0 and the last frame of the audio file."
+            raise ValueError(msg)
         if not 0 <= stop <= frames:
-            raise ValueError(
-                "Stop should be between 0 and the last frame of the audio file.",
-            )
+            msg = "Stop should be between 0 and the last frame of the audio file."
+            raise ValueError(msg)
         if start > stop:
-            raise ValueError("Start should be inferior to Stop.")
-
-        self.opened_file.seek(start)
-        return self.opened_file.read(stop - start)
-
-    def info(self, path: PathLike | str) -> tuple[int, int, int]:
-        """Return the sample rate, number of frames and channels of the audio file.
-
-        Parameters
-        ----------
-        path: PathLike | str
-            Path to the audio file.
+            msg = "Start should be inferior to Stop."
+            raise ValueError(msg)
 
-        Returns
-        -------
-        tuple[int,int,int]:
-            Sample rate, number of frames and channels of the audio file.
-
-        """
-        self._switch(path)
-        return (
-            self.opened_file.samplerate,
-            self.opened_file.frames,
-            self.opened_file.channels,
-        )
+        return self._backend(path).read(path=path, start=start, stop=stop)
diff --git a/src/osekit/audio_backend/mseed_backend.py b/src/osekit/audio_backend/mseed_backend.py
@@ -0,0 +1,77 @@
+from os import PathLike
+
+import numpy as np
+
+
+class MSeedBackend:
+    def __init__(self) -> None:
+        try:
+            import obspy  # noqa: F401, PLC0415
+        except ImportError as e:
+            msg = "MSEED support requires the optional dependency 'obspy' "
+            "Install with: pip install osekit[mseed]. "
+            "If you're on windows and don't use conda, may the force be with you."
+            raise ImportError(msg) from e
+
+    def close(self) -> None:
+        """Close the currently opened file. No use in MSEED files."""
+
+    def info(self, path: PathLike | str) -> tuple[int, int, int]:
+        """Return the sample rate, number of frames and channels of the MSEED file.
+
+        Parameters
+        ----------
+        path: PathLike | str
+            Path to the audio file.
+
+        Returns
+        -------
+        tuple[int,int,int]:
+            Sample rate, number of frames and channels of the MSEED file.
+
+        """
+        import obspy  # type: ignore[import-not-found]  # noqa: PLC0415
+
+        metadata = obspy.read(pathname_or_url=path, headonly=True)
+        sample_rate = {trace.meta.sampling_rate for trace in metadata.traces}
+        if len(sample_rate) != 1:
+            msg = "Inconsistent sampling rates in MSEED file."
+            raise ValueError(msg)
+
+        frames = sum(trace.meta.npts for trace in metadata.traces)
+        return (
+            int(sample_rate.pop()),
+            frames,
+            1,
+        )
+
+    def read(
+        self,
+        path: PathLike | str,
+        start: int = 0,
+        stop: int | None = None,
+    ) -> np.ndarray:
+        """Read the content of a MSEED file.
+
+        Parameters
+        ----------
+        path: PathLike | str
+            Path to the audio file.
+        start: int
+            First frame to read.
+        stop: int
+            Frame after the last frame to read.
+
+        Returns
+        -------
+        np.ndarray:
+            A (channel * frames) array containing the MSEED data.
+
+        """
+        import obspy  # type: ignore[import-not-found]  # noqa: PLC0415
+
+        file_content = obspy.read(path)
+        file_content.merge(method=1, fill_value=0)
+
+        data = np.concatenate([trace.data for trace in file_content])
+        return data[start:stop]
diff --git a/src/osekit/audio_backend/soundfile_backend.py b/src/osekit/audio_backend/soundfile_backend.py
@@ -0,0 +1,82 @@
+from os import PathLike
+
+import numpy as np
+import soundfile as sf
+
+
+class SoundFileBackend:
+    def __init__(self) -> None:
+        """Instantiate a SoundFileBackend."""
+        self._file: sf.SoundFile | None = None
+
+    def close(self) -> None:
+        """Close the currently opened file."""
+        if self._file is None:
+            return
+        self._file.close()
+        self._file = None
+
+    def info(self, path: PathLike | str) -> tuple[int, int, int]:
+        """Return the sample rate, number of frames and channels of the audio file.
+
+        Parameters
+        ----------
+        path: PathLike | str
+            Path to the audio file.
+
+        Returns
+        -------
+        tuple[int,int,int]:
+            Sample rate, number of frames and channels of the audio file.
+
+        """
+        self._switch(path)
+        return (
+            self._file.samplerate,
+            self._file.frames,
+            self._file.channels,
+        )
+
+    def read(
+        self,
+        path: PathLike | str,
+        start: int = 0,
+        stop: int | None = None,
+    ) -> np.ndarray:
+        """Read the content of an audio file.
+
+        If the audio file is not the current opened file,
+        the current opened file is switched.
+
+        Parameters
+        ----------
+        path: PathLike | str
+            Path to the audio file.
+        start: int
+            First frame to read.
+        stop: int
+            Frame after the last frame to read.
+
+        Returns
+        -------
+        np.ndarray:
+            A (channel * frames) array containing the audio data.
+
+        """
+        self._switch(path)
+        self._file.seek(start)
+        return self._file.read(stop - start)
+
+    def _close(self) -> None:
+        if self._file is None:
+            return
+        self._file.close()
+        self._file = None
+
+    def _open(self, path: PathLike | str) -> None:
+        self._file = sf.SoundFile(path, "r")
+
+    def _switch(self, path: PathLike | str) -> None:
+        if self._file is None or self._file.name != str(path):
+            self._close()
+            self._open(path)
diff --git a/src/osekit/core_api/__init__.py b/src/osekit/core_api/__init__.py
@@ -1,3 +1,3 @@
-from osekit.core_api.audio_file_manager import AudioFileManager
+from osekit.audio_backend.audio_file_manager import AudioFileManager
 
 audio_file_manager = AudioFileManager()
diff --git a/src/osekit/core_api/audio_dataset.py b/src/osekit/core_api/audio_dataset.py
@@ -244,7 +244,7 @@ def from_folder(  # noqa: PLR0913
         kwargs.update(
             {
                 "file_class": AudioFile,
-                "supported_file_extensions": [".wav", ".flac", ".mp3"],
+                "supported_file_extensions": [".wav", ".flac", ".mseed", ".mp3"],
             },
         )
         base_dataset = BaseDataset.from_folder(

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -14,11 +14,11 @@
 from pandas import Timestamp
 
 from osekit import config
+from osekit.audio_backend.soundfile_backend import SoundFileBackend
 from osekit.config import (
     TIMESTAMP_FORMAT_EXPORTED_FILES_LOCALIZED,
     TIMESTAMP_FORMAT_EXPORTED_FILES_UNLOCALIZED,
 )
-from osekit.core_api import AudioFileManager
 from osekit.core_api.audio_data import AudioData
 from osekit.core_api.audio_file import AudioFile
 from osekit.core_api.base_dataset import BaseDataset
@@ -159,13 +159,13 @@ def patch_afm_open(monkeypatch: pytest.MonkeyPatch) -> list[Path]:
     """Mock the AudioFileManager._open method in order to track the file openings."""
 
     opened_files = []
-    open_func = AudioFileManager._open
+    open_func = SoundFileBackend._open
 
-    def mock_open(self: AudioFileManager, path: Path) -> None:
+    def mock_open(self: SoundFileBackend, path: Path) -> None:
         opened_files.append(path)
         open_func(self, path)
 
-    monkeypatch.setattr(AudioFileManager, "_open", mock_open)
+    monkeypatch.setattr(SoundFileBackend, "_open", mock_open)
     return opened_files