add paddle audio dataset && backend (#45939)

* add audio feature dataset * fix coding style * fix coding style2 * rm librosa * rm voxceleb * rm librosa in test * add scipy fftpack * add functional * fix setup * fix setup2 * rm colorlog * refactor dataset __init__.py * fix converage * fix librosa import error * fix windows test * fix windows ci * rm datasets * fix setup * remove testdata * add librosa in requirement * add librosa in requirement2 * change librosa to 0.8.1 * update ci docker * fix ci error * fix ci error2 * fix ci coverage * fix converage * fix coverage * rm audio_base in test, notest,test=coverage * fix copyright * rm backend * add datast in __init__ * rm compliance&&add function test * fix setup * fix windows * fix windows2 * fix test timeout * add backend & datasets * fix bugs * fix ci time issue * add dataset test * rm test_audio_feature * avoid windows isssue, tmp * note windows isssue * skip windows issue * refactor dataset test * add dataset.py * fix dtype in layers.mfcc * fix ci-static-check * fix dtype in layers.mfcc && fix ci-static-check * add relative accuracy * modity API.spec * skip cuda11.2 test * skip cuda11.2 test2 * skip cuda11.2 * change dataset name * fix format * update api.spec * update api.spec2 * fix coverage * add dataset test * rm download load dict * rm download load dict in init * update api.spec3 * fix dataset coverage * fix coverage * fix coverage2 * restore api.spec * restore api.spec2 * fix api-spec 3 * fix api-spec 4 * fix api.spec * fix api.spec6 * refactor init_backend * fix typo * change paddleaudio backend set * fix get_current_audio_backend() * fix format * fix format2 * remove format in parameters * fix format2 * add warning massage in wave_backend && remove redundant audio util * rm audio util in print_signatures * fix format3 * add tess dataset license * format warning * add more info in warning msg * add paddleaudio version check * replace dataset esc50 with tess * add tess dataset && rm numpy transform in dataset.py * fix set audio backend bug * fix equal error * fix format && coverage error * add api example * fix format * fix error * fix typo * add noqa in __init__ * fix backend doc example error * rm seed in dataset * update bakcend example * fix typo * fix typo * fix example err * fix typo * fix ci dataset test * fix example fil * try to fix ci * clean dataset doc * change get_current_audio_backend to get_current_backend * creplace paddle.audio.backends.info with paddle.audio.info, same with load, save * fix ci error * repalce api in test_audio_backend * fix save&&set_backend exmaple
PaddlePaddle · Oct 20, 2022 · ec5b27f · ec5b27f
1 parent 5a2e517
commit ec5b27f
Show file tree

Hide file tree

Showing 14 changed files with 1,363 additions and 12 deletions.
diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
@@ -21,3 +21,13 @@ paddle.audio.functional.functional.mel_frequencies (ArgSpec(args=['n_mels', 'f_m
 paddle.audio.functional.functional.mel_to_hz (ArgSpec(args=['mel', 'htk'], varargs=None, varkw=None, defaults=(False,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Union[float, paddle.Tensor], 'mel': typing.Union[float, paddle.Tensor], 'htk': <class 'bool'>}), ('document', 'e93b432d382f98c60d7c7599489e7072'))
 paddle.audio.functional.functional.power_to_db (ArgSpec(args=['spect', 'ref_value', 'amin', 'top_db'], varargs=None, varkw=None, defaults=(1.0, 1e-10, 80.0), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'spect': <class 'paddle.Tensor'>, 'ref_value': <class 'float'>, 'amin': <class 'float'>, 'top_db': typing.Union[float, NoneType]}), ('document', '28bbb1973e8399e856bfaea0415cecb9'))
 paddle.audio.functional.window.get_window (ArgSpec(args=['window', 'win_length', 'fftbins', 'dtype'], varargs=None, varkw=None, defaults=(True, 'float64'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'window': typing.Union[str, typing.Tuple[str, float]], 'win_length': <class 'int'>, 'fftbins': <class 'bool'>, 'dtype': <class 'str'>}), ('document', '2418d63da10c0cd5da9ecf0a88ddf783'))
+paddle.audio.backends (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e'))
+paddle.audio.backends.init_backend.get_current_audio_backend (ArgSpec(args=[], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'str'>}), ('document', '3ff9fd62e8be1f3dc7e34afaf50e1645'))
+paddle.audio.backends.init_backend.list_available_backends (ArgSpec(args=[], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.List[str]}), ('document', '8eba49f1b69f7ec7fa139a0714a2724e'))
+paddle.audio.backends.init_backend.set_backend (ArgSpec(args=['backend_name'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'backend_name': <class 'str'>}), ('document', '9680247dd97274d345dee415e2787527'))
+paddle.audio.backends.wave_backend.info (ArgSpec(args=['filepath', 'format'], varargs=None, varkw=None, defaults=(None,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.audio.backends.backend.AudioInfo'>, 'filepath': <class 'str'>, 'format': typing.Union[str, NoneType]}), ('document', 'e0ffd3accd942a9b0a4c08463a9f60f6'))
+paddle.audio.backends.wave_backend.load (ArgSpec(args=['filepath', 'frame_offset', 'num_frames', 'normalize', 'channels_first', 'format'], varargs=None, varkw=None, defaults=(0, -1, True, True, None), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Tuple[paddle.Tensor, int], 'filepath': typing.Union[str, pathlib.Path], 'frame_offset': <class 'int'>, 'num_frames': <class 'int'>, 'normalize': <class 'bool'>, 'channels_first': <class 'bool'>, 'format': typing.Union[str, NoneType]}), ('document', '4de50575ca516b4b7c7c82c7fdec808f'))
+paddle.audio.backends.wave_backend.save (ArgSpec(args=['filepath', 'src', 'sample_rate', 'channels_first', 'compression', 'format', 'encoding', 'bits_per_sample'], varargs=None, varkw=None, defaults=(True, None, None, None, None), kwonlyargs=[], kwonlydefaults=None, annotations={'filepath': <class 'str'>, 'src': <class 'paddle.Tensor'>, 'sample_rate': <class 'int'>, 'channels_first': <class 'bool'>, 'compression': typing.Union[float, NoneType], 'format': typing.Union[str, NoneType], 'encoding': typing.Union[str, NoneType], 'bits_per_sample': typing.Union[int, NoneType]}), ('document', '4c85cfcd29a0dcdfc32e74db8c0c3961'))
+paddle.audio.datasets (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e'))
+paddle.audio.datasets.TESS (ArgSpec(), ('document', '3605f3aa2191ede7ddbe594cd27bb067'))
+paddle.audio.datasets.TESS.meta_info (ArgSpec(), ('document', '60d548a6f71629c3b69bcda3a30d4819'))
diff --git a/python/paddle/audio/__init__.py b/python/paddle/audio/__init__.py
@@ -14,5 +14,11 @@
 
 from . import features
 from . import functional
+from . import datasets
+from . import backends
 
-__all__ = ["functional", "features"]
+from .backends.backend import info, load, save
+
+__all__ = [
+    "functional", "features", "datasets", "backends", "load", "info", "save"
+]
diff --git a/python/paddle/audio/backends/__init__.py b/python/paddle/audio/backends/__init__.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from . import init_backend
+from .init_backend import get_current_backend  # noqa: F401
+from .init_backend import list_available_backends  # noqa: F401
+from .init_backend import set_backend
+
+init_backend._init_set_audio_backend()
+
+__all__ = [
+    'get_current_backend',
+    'list_available_backends',
+    'set_backend',
+]
diff --git a/python/paddle/audio/backends/backend.py b/python/paddle/audio/backends/backend.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License
+
+import paddle
+
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+
+class AudioInfo:
+    """ Audio info, return type of backend info function """
+
+    def __init__(self, sample_rate: int, num_samples: int, num_channels: int,
+                 bits_per_sample: int, encoding: str):
+        self.sample_rate = sample_rate
+        self.num_samples = num_samples
+        self.num_channels = num_channels
+        self.bits_per_sample = bits_per_sample
+        self.encoding = encoding
+
+
+def info(filepath: str) -> AudioInfo:
+    """Get signal information of input audio file.
+
+    Args:
+       filepath: audio path or file object.
+
+    Returns:
+        AudioInfo: info of the given audio.
+
+    Example:
+        .. code-block:: python
+
+            import os
+            import paddle
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+            base_dir = os.getcwd()
+            filepath = os.path.join(base_dir, "test.wav")
+
+            paddle.audio.save(filepath, waveform, sample_rate)
+            wav_info = paddle.audio.info(filepath)
+    """
+    # for API doc
+    raise NotImplementedError("please set audio backend")
+
+
+def load(filepath: Union[str, Path],
+         frame_offset: int = 0,
+         num_frames: int = -1,
+         normalize: bool = True,
+         channels_first: bool = True) -> Tuple[paddle.Tensor, int]:
+    """Load audio data from file.Load the audio content start form frame_offset, and get num_frames.
+
+    Args:
+        frame_offset: from 0 to total frames,
+        num_frames: from -1 (means total frames) or number frames which want to read,
+        normalize:
+            if True: return audio which norm to (-1, 1), dtype=float32
+            if False: return audio with raw data, dtype=int16
+
+        channels_first:
+            if True: return audio with shape (channels, time)
+
+    Return:
+        Tuple[paddle.Tensor, int]: (audio_content, sample rate)
+
+    Exampels:
+        .. code-block:: python
+
+            import os
+            import paddle
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+            base_dir = os.getcwd()
+            filepath = os.path.join(base_dir, "test.wav")
+
+            paddle.audio.save(filepath, waveform, sample_rate)
+            wav_data_read, sr = paddle.audio.load(filepath)
+    """
+    # for API doc
+    raise NotImplementedError("please set audio backend")
+
+
+def save(
+    filepath: str,
+    src: paddle.Tensor,
+    sample_rate: int,
+    channels_first: bool = True,
+    encoding: Optional[str] = None,
+    bits_per_sample: Optional[int] = 16,
+):
+    """
+    Save audio tensor to file.
+
+    Args:
+        filepath: saved path
+        src: the audio tensor
+        sample_rate: the number of samples of audio per second.
+        channels_first: src channel infomation
+            if True, means input tensor is (channels, time)
+            if False, means input tensor is (time, channels)
+        encoding:encoding format, wave_backend only support PCM16 now.
+        bits_per_sample: bits per sample, wave_backend only support 16 bits now.
+
+    Returns:
+        None
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+            filepath = "./test.wav"
+
+            paddle.audio.save(filepath, waveform, sample_rate)
+    """
+    # for API doc
+    raise NotImplementedError("please set audio backend")
diff --git a/python/paddle/audio/backends/init_backend.py b/python/paddle/audio/backends/init_backend.py
@@ -0,0 +1,185 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import warnings
+from . import wave_backend
+from . import backend
+from typing import List
+
+import paddle
+
+
+def _check_version(version: str) -> bool:
+    # require paddleaudio >= 1.0.2
+    ver_arr = version.split('.')
+    v0 = int(ver_arr[0])
+    v1 = int(ver_arr[1])
+    v2 = int(ver_arr[2])
+    if v0 < 1:
+        return False
+    if v0 == 1 and v1 == 0 and v2 <= 1:
+        return False
+    return True
+
+
+def list_available_backends() -> List[str]:
+    """ List available backends, the backends in paddleaudio and the default backend.
+
+    Returns:
+        List[str]: The list of available backends.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+            wav_path = "./test.wav"
+
+            current_backend = paddle.audio.backends.get_current_backend()
+            print(current_backend) # wave_backend, the default backend.
+            backends = paddle.audio.backends.list_available_backends()
+            # default backends is ['wave_backend']
+            # backends is ['wave_backend', 'soundfile'], if have installed paddleaudio >= 1.0.2
+            if 'soundfile' in backends:
+                paddle.audio.backends.set_backend('soundfile')
+
+            paddle.audio.save(wav_path, waveform, sample_rate)
+
+    """
+    backends = []
+    try:
+        import paddleaudio
+    except ImportError:
+        package = "paddleaudio"
+        warn_msg = (
+            "Failed importing {}. \n"
+            "only wave_banckend(only can deal with PCM16 WAV) supportted.\n"
+            "if want soundfile_backend(more audio type suppported),\n"
+            "please manually installed (usually with `pip install {} >= 1.0.2`). "
+        ).format(package, package)
+        warnings.warn(warn_msg)
+
+    if "paddleaudio" in sys.modules:
+        version = paddleaudio.__version__
+        if _check_version(version) == False:
+            err_msg = (
+                "the version of paddleaudio installed is {},\n"
+                "please ensure the paddleaudio >= 1.0.2.").format(version)
+            raise ImportError(err_msg)
+        backends = paddleaudio.backends.list_audio_backends()
+    backends.append("wave_backend")
+    return backends
+
+
+def get_current_backend() -> str:
+    """ Get the name of the current audio backend
+
+    Returns:
+        str: The name of the current backend,
+        the wave_backend or backend imported from paddleaudio
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+            wav_path = "./test.wav"
+
+            current_backend = paddle.audio.backends.get_current_backend()
+            print(current_backend) # wave_backend, the default backend.
+            backends = paddle.audio.backends.list_available_backends()
+            # default backends is ['wave_backend']
+            # backends is ['wave_backend', 'soundfile'], if have installed paddleaudio >= 1.0.2
+
+            if 'soundfile' in backends:
+                paddle.audio.backends.set_backend('soundfile')
+
+            paddle.audio.save(wav_path, waveform, sample_rate)
+
+    """
+    current_backend = None
+    if "paddleaudio" in sys.modules:
+        import paddleaudio
+        current_backend = paddleaudio.backends.get_audio_backend()
+        if paddle.audio.load == paddleaudio.load:
+            return current_backend
+    return "wave_backend"
+
+
+def set_backend(backend_name: str):
+    """Set the backend by one of the list_audio_backend return.
+
+    Args:
+        backend (str): one of the list_audio_backend. "wave_backend" is the default. "soundfile" imported from paddleaudio.
+
+    Returns:
+        None
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            sample_rate = 16000
+            wav_duration = 0.5
+            num_channels = 1
+            num_frames = sample_rate * wav_duration
+            wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
+            waveform = wav_data.tile([num_channels, 1])
+            wav_path = "./test.wav"
+
+            current_backend = paddle.audio.backends.get_current_backend()
+            print(current_backend) # wave_backend, the default backend.
+            backends = paddle.audio.backends.list_available_backends()
+            # default backends is ['wave_backend']
+            # backends is ['wave_backend', 'soundfile'], if have installed paddleaudio >= 1.0.2
+
+            if 'soundfile' in backends:
+                paddle.audio.backends.set_backend('soundfile')
+
+            paddle.audio.save(wav_path, waveform, sample_rate)
+
+    """
+    if backend_name not in list_available_backends():
+        raise NotImplementedError()
+
+    if backend_name == "wave_backend":
+        module = wave_backend
+    else:
+        import paddleaudio
+        paddleaudio.backends.set_audio_backend(backend_name)
+        module = paddleaudio
+
+    for func in ["save", "load", "info"]:
+        setattr(backend, func, getattr(module, func))
+        setattr(paddle.audio, func, getattr(module, func))
+
+
+def _init_set_audio_backend():
+    # init the default wave_backend.
+    for func in ["save", "load", "info"]:
+        setattr(backend, func, getattr(wave_backend, func))