Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add paddle audio dataset && backend (#45939)
* add audio feature dataset * fix coding style * fix coding style2 * rm librosa * rm voxceleb * rm librosa in test * add scipy fftpack * add functional * fix setup * fix setup2 * rm colorlog * refactor dataset __init__.py * fix converage * fix librosa import error * fix windows test * fix windows ci * rm datasets * fix setup * remove testdata * add librosa in requirement * add librosa in requirement2 * change librosa to 0.8.1 * update ci docker * fix ci error * fix ci error2 * fix ci coverage * fix converage * fix coverage * rm audio_base in test, notest,test=coverage * fix copyright * rm backend * add datast in __init__ * rm compliance&&add function test * fix setup * fix windows * fix windows2 * fix test timeout * add backend & datasets * fix bugs * fix ci time issue * add dataset test * rm test_audio_feature * avoid windows isssue, tmp * note windows isssue * skip windows issue * refactor dataset test * add dataset.py * fix dtype in layers.mfcc * fix ci-static-check * fix dtype in layers.mfcc && fix ci-static-check * add relative accuracy * modity API.spec * skip cuda11.2 test * skip cuda11.2 test2 * skip cuda11.2 * change dataset name * fix format * update api.spec * update api.spec2 * fix coverage * add dataset test * rm download load dict * rm download load dict in init * update api.spec3 * fix dataset coverage * fix coverage * fix coverage2 * restore api.spec * restore api.spec2 * fix api-spec 3 * fix api-spec 4 * fix api.spec * fix api.spec6 * refactor init_backend * fix typo * change paddleaudio backend set * fix get_current_audio_backend() * fix format * fix format2 * remove format in parameters * fix format2 * add warning massage in wave_backend && remove redundant audio util * rm audio util in print_signatures * fix format3 * add tess dataset license * format warning * add more info in warning msg * add paddleaudio version check * replace dataset esc50 with tess * add tess dataset && rm numpy transform in dataset.py * fix set audio backend bug * fix equal error * fix format && coverage error * add api example * fix format * fix error * fix typo * add noqa in __init__ * fix backend doc example error * rm seed in dataset * update bakcend example * fix typo * fix typo * fix example err * fix typo * fix ci dataset test * fix example fil * try to fix ci * clean dataset doc * change get_current_audio_backend to get_current_backend * creplace paddle.audio.backends.info with paddle.audio.info, same with load, save * fix ci error * repalce api in test_audio_backend * fix save&&set_backend exmaple
- Loading branch information
Showing
14 changed files
with
1,363 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
from . import init_backend | ||
from .init_backend import get_current_backend # noqa: F401 | ||
from .init_backend import list_available_backends # noqa: F401 | ||
from .init_backend import set_backend | ||
|
||
init_backend._init_set_audio_backend() | ||
|
||
__all__ = [ | ||
'get_current_backend', | ||
'list_available_backends', | ||
'set_backend', | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License | ||
|
||
import paddle | ||
|
||
from pathlib import Path | ||
from typing import Optional, Tuple, Union | ||
|
||
|
||
class AudioInfo: | ||
""" Audio info, return type of backend info function """ | ||
|
||
def __init__(self, sample_rate: int, num_samples: int, num_channels: int, | ||
bits_per_sample: int, encoding: str): | ||
self.sample_rate = sample_rate | ||
self.num_samples = num_samples | ||
self.num_channels = num_channels | ||
self.bits_per_sample = bits_per_sample | ||
self.encoding = encoding | ||
|
||
|
||
def info(filepath: str) -> AudioInfo: | ||
"""Get signal information of input audio file. | ||
Args: | ||
filepath: audio path or file object. | ||
Returns: | ||
AudioInfo: info of the given audio. | ||
Example: | ||
.. code-block:: python | ||
import os | ||
import paddle | ||
sample_rate = 16000 | ||
wav_duration = 0.5 | ||
num_channels = 1 | ||
num_frames = sample_rate * wav_duration | ||
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1 | ||
waveform = wav_data.tile([num_channels, 1]) | ||
base_dir = os.getcwd() | ||
filepath = os.path.join(base_dir, "test.wav") | ||
paddle.audio.save(filepath, waveform, sample_rate) | ||
wav_info = paddle.audio.info(filepath) | ||
""" | ||
# for API doc | ||
raise NotImplementedError("please set audio backend") | ||
|
||
|
||
def load(filepath: Union[str, Path], | ||
frame_offset: int = 0, | ||
num_frames: int = -1, | ||
normalize: bool = True, | ||
channels_first: bool = True) -> Tuple[paddle.Tensor, int]: | ||
"""Load audio data from file.Load the audio content start form frame_offset, and get num_frames. | ||
Args: | ||
frame_offset: from 0 to total frames, | ||
num_frames: from -1 (means total frames) or number frames which want to read, | ||
normalize: | ||
if True: return audio which norm to (-1, 1), dtype=float32 | ||
if False: return audio with raw data, dtype=int16 | ||
channels_first: | ||
if True: return audio with shape (channels, time) | ||
Return: | ||
Tuple[paddle.Tensor, int]: (audio_content, sample rate) | ||
Exampels: | ||
.. code-block:: python | ||
import os | ||
import paddle | ||
sample_rate = 16000 | ||
wav_duration = 0.5 | ||
num_channels = 1 | ||
num_frames = sample_rate * wav_duration | ||
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1 | ||
waveform = wav_data.tile([num_channels, 1]) | ||
base_dir = os.getcwd() | ||
filepath = os.path.join(base_dir, "test.wav") | ||
paddle.audio.save(filepath, waveform, sample_rate) | ||
wav_data_read, sr = paddle.audio.load(filepath) | ||
""" | ||
# for API doc | ||
raise NotImplementedError("please set audio backend") | ||
|
||
|
||
def save( | ||
filepath: str, | ||
src: paddle.Tensor, | ||
sample_rate: int, | ||
channels_first: bool = True, | ||
encoding: Optional[str] = None, | ||
bits_per_sample: Optional[int] = 16, | ||
): | ||
""" | ||
Save audio tensor to file. | ||
Args: | ||
filepath: saved path | ||
src: the audio tensor | ||
sample_rate: the number of samples of audio per second. | ||
channels_first: src channel infomation | ||
if True, means input tensor is (channels, time) | ||
if False, means input tensor is (time, channels) | ||
encoding:encoding format, wave_backend only support PCM16 now. | ||
bits_per_sample: bits per sample, wave_backend only support 16 bits now. | ||
Returns: | ||
None | ||
Examples: | ||
.. code-block:: python | ||
import paddle | ||
sample_rate = 16000 | ||
wav_duration = 0.5 | ||
num_channels = 1 | ||
num_frames = sample_rate * wav_duration | ||
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1 | ||
waveform = wav_data.tile([num_channels, 1]) | ||
filepath = "./test.wav" | ||
paddle.audio.save(filepath, waveform, sample_rate) | ||
""" | ||
# for API doc | ||
raise NotImplementedError("please set audio backend") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import sys | ||
import warnings | ||
from . import wave_backend | ||
from . import backend | ||
from typing import List | ||
|
||
import paddle | ||
|
||
|
||
def _check_version(version: str) -> bool: | ||
# require paddleaudio >= 1.0.2 | ||
ver_arr = version.split('.') | ||
v0 = int(ver_arr[0]) | ||
v1 = int(ver_arr[1]) | ||
v2 = int(ver_arr[2]) | ||
if v0 < 1: | ||
return False | ||
if v0 == 1 and v1 == 0 and v2 <= 1: | ||
return False | ||
return True | ||
|
||
|
||
def list_available_backends() -> List[str]: | ||
""" List available backends, the backends in paddleaudio and the default backend. | ||
Returns: | ||
List[str]: The list of available backends. | ||
Examples: | ||
.. code-block:: python | ||
import paddle | ||
sample_rate = 16000 | ||
wav_duration = 0.5 | ||
num_channels = 1 | ||
num_frames = sample_rate * wav_duration | ||
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1 | ||
waveform = wav_data.tile([num_channels, 1]) | ||
wav_path = "./test.wav" | ||
current_backend = paddle.audio.backends.get_current_backend() | ||
print(current_backend) # wave_backend, the default backend. | ||
backends = paddle.audio.backends.list_available_backends() | ||
# default backends is ['wave_backend'] | ||
# backends is ['wave_backend', 'soundfile'], if have installed paddleaudio >= 1.0.2 | ||
if 'soundfile' in backends: | ||
paddle.audio.backends.set_backend('soundfile') | ||
paddle.audio.save(wav_path, waveform, sample_rate) | ||
""" | ||
backends = [] | ||
try: | ||
import paddleaudio | ||
except ImportError: | ||
package = "paddleaudio" | ||
warn_msg = ( | ||
"Failed importing {}. \n" | ||
"only wave_banckend(only can deal with PCM16 WAV) supportted.\n" | ||
"if want soundfile_backend(more audio type suppported),\n" | ||
"please manually installed (usually with `pip install {} >= 1.0.2`). " | ||
).format(package, package) | ||
warnings.warn(warn_msg) | ||
|
||
if "paddleaudio" in sys.modules: | ||
version = paddleaudio.__version__ | ||
if _check_version(version) == False: | ||
err_msg = ( | ||
"the version of paddleaudio installed is {},\n" | ||
"please ensure the paddleaudio >= 1.0.2.").format(version) | ||
raise ImportError(err_msg) | ||
backends = paddleaudio.backends.list_audio_backends() | ||
backends.append("wave_backend") | ||
return backends | ||
|
||
|
||
def get_current_backend() -> str: | ||
""" Get the name of the current audio backend | ||
Returns: | ||
str: The name of the current backend, | ||
the wave_backend or backend imported from paddleaudio | ||
Examples: | ||
.. code-block:: python | ||
import paddle | ||
sample_rate = 16000 | ||
wav_duration = 0.5 | ||
num_channels = 1 | ||
num_frames = sample_rate * wav_duration | ||
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1 | ||
waveform = wav_data.tile([num_channels, 1]) | ||
wav_path = "./test.wav" | ||
current_backend = paddle.audio.backends.get_current_backend() | ||
print(current_backend) # wave_backend, the default backend. | ||
backends = paddle.audio.backends.list_available_backends() | ||
# default backends is ['wave_backend'] | ||
# backends is ['wave_backend', 'soundfile'], if have installed paddleaudio >= 1.0.2 | ||
if 'soundfile' in backends: | ||
paddle.audio.backends.set_backend('soundfile') | ||
paddle.audio.save(wav_path, waveform, sample_rate) | ||
""" | ||
current_backend = None | ||
if "paddleaudio" in sys.modules: | ||
import paddleaudio | ||
current_backend = paddleaudio.backends.get_audio_backend() | ||
if paddle.audio.load == paddleaudio.load: | ||
return current_backend | ||
return "wave_backend" | ||
|
||
|
||
def set_backend(backend_name: str): | ||
"""Set the backend by one of the list_audio_backend return. | ||
Args: | ||
backend (str): one of the list_audio_backend. "wave_backend" is the default. "soundfile" imported from paddleaudio. | ||
Returns: | ||
None | ||
Examples: | ||
.. code-block:: python | ||
import paddle | ||
sample_rate = 16000 | ||
wav_duration = 0.5 | ||
num_channels = 1 | ||
num_frames = sample_rate * wav_duration | ||
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1 | ||
waveform = wav_data.tile([num_channels, 1]) | ||
wav_path = "./test.wav" | ||
current_backend = paddle.audio.backends.get_current_backend() | ||
print(current_backend) # wave_backend, the default backend. | ||
backends = paddle.audio.backends.list_available_backends() | ||
# default backends is ['wave_backend'] | ||
# backends is ['wave_backend', 'soundfile'], if have installed paddleaudio >= 1.0.2 | ||
if 'soundfile' in backends: | ||
paddle.audio.backends.set_backend('soundfile') | ||
paddle.audio.save(wav_path, waveform, sample_rate) | ||
""" | ||
if backend_name not in list_available_backends(): | ||
raise NotImplementedError() | ||
|
||
if backend_name == "wave_backend": | ||
module = wave_backend | ||
else: | ||
import paddleaudio | ||
paddleaudio.backends.set_audio_backend(backend_name) | ||
module = paddleaudio | ||
|
||
for func in ["save", "load", "info"]: | ||
setattr(backend, func, getattr(module, func)) | ||
setattr(paddle.audio, func, getattr(module, func)) | ||
|
||
|
||
def _init_set_audio_backend(): | ||
# init the default wave_backend. | ||
for func in ["save", "load", "info"]: | ||
setattr(backend, func, getattr(wave_backend, func)) |
Oops, something went wrong.