Skip to content

Commit

Permalink
add paddle audio dataset && backend (#45939)
Browse files Browse the repository at this point in the history
* add audio feature dataset

* fix coding style

* fix coding style2

* rm librosa

* rm voxceleb

* rm librosa in test

* add scipy fftpack

* add functional

* fix setup

* fix setup2

* rm colorlog

* refactor dataset __init__.py

* fix converage

* fix librosa import error

* fix windows test

* fix windows ci

* rm datasets

* fix setup

* remove testdata

* add librosa in requirement

* add librosa in requirement2

* change librosa to 0.8.1

* update ci docker

* fix ci error

* fix ci error2

* fix ci coverage

* fix converage

* fix coverage

* rm audio_base in test, notest,test=coverage

* fix copyright

* rm backend

* add datast in __init__

* rm compliance&&add function test

* fix setup

* fix windows

* fix windows2

* fix test timeout

* add backend & datasets

* fix bugs

* fix ci time issue

* add dataset test

* rm test_audio_feature

* avoid windows isssue, tmp

* note windows isssue

* skip windows issue

* refactor dataset test

* add dataset.py

* fix dtype in layers.mfcc

* fix ci-static-check

* fix dtype in layers.mfcc && fix ci-static-check

* add relative accuracy

* modity API.spec

* skip cuda11.2 test

* skip cuda11.2 test2

* skip cuda11.2

* change dataset name

* fix format

* update api.spec

* update api.spec2

* fix coverage

* add dataset test

* rm download load dict

* rm download load dict in init

* update api.spec3

* fix dataset coverage

* fix coverage

* fix coverage2

* restore api.spec

* restore api.spec2

* fix api-spec 3

* fix api-spec 4

* fix api.spec

* fix api.spec6

* refactor init_backend

* fix typo

* change paddleaudio backend set

* fix get_current_audio_backend()

* fix format

* fix format2

* remove format in parameters

* fix format2

* add warning massage in wave_backend && remove redundant audio util

* rm audio util in print_signatures

* fix format3

* add tess dataset license

* format warning

* add more info in warning msg

* add paddleaudio version check

* replace dataset esc50 with tess

* add tess dataset && rm numpy transform in dataset.py

* fix set audio backend bug

* fix equal error

* fix format && coverage error

* add api example

* fix format

* fix error

* fix typo

* add noqa in __init__

* fix backend doc example error

* rm seed in dataset

* update bakcend example

* fix typo

* fix typo

* fix example err

* fix typo

* fix ci dataset test

* fix example fil

* try to fix ci

* clean dataset doc

* change get_current_audio_backend to get_current_backend

* creplace paddle.audio.backends.info with paddle.audio.info, same with load, save

* fix ci error

* repalce api in test_audio_backend

* fix save&&set_backend exmaple
  • Loading branch information
SmileGoat committed Oct 20, 2022
1 parent 5a2e517 commit ec5b27f
Show file tree
Hide file tree
Showing 14 changed files with 1,363 additions and 12 deletions.
10 changes: 10 additions & 0 deletions paddle/fluid/API.spec
Expand Up @@ -21,3 +21,13 @@ paddle.audio.functional.functional.mel_frequencies (ArgSpec(args=['n_mels', 'f_m
paddle.audio.functional.functional.mel_to_hz (ArgSpec(args=['mel', 'htk'], varargs=None, varkw=None, defaults=(False,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Union[float, paddle.Tensor], 'mel': typing.Union[float, paddle.Tensor], 'htk': <class 'bool'>}), ('document', 'e93b432d382f98c60d7c7599489e7072'))
paddle.audio.functional.functional.power_to_db (ArgSpec(args=['spect', 'ref_value', 'amin', 'top_db'], varargs=None, varkw=None, defaults=(1.0, 1e-10, 80.0), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'spect': <class 'paddle.Tensor'>, 'ref_value': <class 'float'>, 'amin': <class 'float'>, 'top_db': typing.Union[float, NoneType]}), ('document', '28bbb1973e8399e856bfaea0415cecb9'))
paddle.audio.functional.window.get_window (ArgSpec(args=['window', 'win_length', 'fftbins', 'dtype'], varargs=None, varkw=None, defaults=(True, 'float64'), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.Tensor'>, 'window': typing.Union[str, typing.Tuple[str, float]], 'win_length': <class 'int'>, 'fftbins': <class 'bool'>, 'dtype': <class 'str'>}), ('document', '2418d63da10c0cd5da9ecf0a88ddf783'))
paddle.audio.backends (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e'))
paddle.audio.backends.init_backend.get_current_audio_backend (ArgSpec(args=[], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'str'>}), ('document', '3ff9fd62e8be1f3dc7e34afaf50e1645'))
paddle.audio.backends.init_backend.list_available_backends (ArgSpec(args=[], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.List[str]}), ('document', '8eba49f1b69f7ec7fa139a0714a2724e'))
paddle.audio.backends.init_backend.set_backend (ArgSpec(args=['backend_name'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'backend_name': <class 'str'>}), ('document', '9680247dd97274d345dee415e2787527'))
paddle.audio.backends.wave_backend.info (ArgSpec(args=['filepath', 'format'], varargs=None, varkw=None, defaults=(None,), kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'paddle.audio.backends.backend.AudioInfo'>, 'filepath': <class 'str'>, 'format': typing.Union[str, NoneType]}), ('document', 'e0ffd3accd942a9b0a4c08463a9f60f6'))
paddle.audio.backends.wave_backend.load (ArgSpec(args=['filepath', 'frame_offset', 'num_frames', 'normalize', 'channels_first', 'format'], varargs=None, varkw=None, defaults=(0, -1, True, True, None), kwonlyargs=[], kwonlydefaults=None, annotations={'return': typing.Tuple[paddle.Tensor, int], 'filepath': typing.Union[str, pathlib.Path], 'frame_offset': <class 'int'>, 'num_frames': <class 'int'>, 'normalize': <class 'bool'>, 'channels_first': <class 'bool'>, 'format': typing.Union[str, NoneType]}), ('document', '4de50575ca516b4b7c7c82c7fdec808f'))
paddle.audio.backends.wave_backend.save (ArgSpec(args=['filepath', 'src', 'sample_rate', 'channels_first', 'compression', 'format', 'encoding', 'bits_per_sample'], varargs=None, varkw=None, defaults=(True, None, None, None, None), kwonlyargs=[], kwonlydefaults=None, annotations={'filepath': <class 'str'>, 'src': <class 'paddle.Tensor'>, 'sample_rate': <class 'int'>, 'channels_first': <class 'bool'>, 'compression': typing.Union[float, NoneType], 'format': typing.Union[str, NoneType], 'encoding': typing.Union[str, NoneType], 'bits_per_sample': typing.Union[int, NoneType]}), ('document', '4c85cfcd29a0dcdfc32e74db8c0c3961'))
paddle.audio.datasets (ArgSpec(), ('document', 'd41d8cd98f00b204e9800998ecf8427e'))
paddle.audio.datasets.TESS (ArgSpec(), ('document', '3605f3aa2191ede7ddbe594cd27bb067'))
paddle.audio.datasets.TESS.meta_info (ArgSpec(), ('document', '60d548a6f71629c3b69bcda3a30d4819'))
8 changes: 7 additions & 1 deletion python/paddle/audio/__init__.py
Expand Up @@ -14,5 +14,11 @@

from . import features
from . import functional
from . import datasets
from . import backends

__all__ = ["functional", "features"]
from .backends.backend import info, load, save

__all__ = [
"functional", "features", "datasets", "backends", "load", "info", "save"
]
25 changes: 25 additions & 0 deletions python/paddle/audio/backends/__init__.py
@@ -0,0 +1,25 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import init_backend
from .init_backend import get_current_backend # noqa: F401
from .init_backend import list_available_backends # noqa: F401
from .init_backend import set_backend

init_backend._init_set_audio_backend()

__all__ = [
'get_current_backend',
'list_available_backends',
'set_backend',
]
146 changes: 146 additions & 0 deletions python/paddle/audio/backends/backend.py
@@ -0,0 +1,146 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License

import paddle

from pathlib import Path
from typing import Optional, Tuple, Union


class AudioInfo:
""" Audio info, return type of backend info function """

def __init__(self, sample_rate: int, num_samples: int, num_channels: int,
bits_per_sample: int, encoding: str):
self.sample_rate = sample_rate
self.num_samples = num_samples
self.num_channels = num_channels
self.bits_per_sample = bits_per_sample
self.encoding = encoding


def info(filepath: str) -> AudioInfo:
"""Get signal information of input audio file.
Args:
filepath: audio path or file object.
Returns:
AudioInfo: info of the given audio.
Example:
.. code-block:: python
import os
import paddle
sample_rate = 16000
wav_duration = 0.5
num_channels = 1
num_frames = sample_rate * wav_duration
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
waveform = wav_data.tile([num_channels, 1])
base_dir = os.getcwd()
filepath = os.path.join(base_dir, "test.wav")
paddle.audio.save(filepath, waveform, sample_rate)
wav_info = paddle.audio.info(filepath)
"""
# for API doc
raise NotImplementedError("please set audio backend")


def load(filepath: Union[str, Path],
frame_offset: int = 0,
num_frames: int = -1,
normalize: bool = True,
channels_first: bool = True) -> Tuple[paddle.Tensor, int]:
"""Load audio data from file.Load the audio content start form frame_offset, and get num_frames.
Args:
frame_offset: from 0 to total frames,
num_frames: from -1 (means total frames) or number frames which want to read,
normalize:
if True: return audio which norm to (-1, 1), dtype=float32
if False: return audio with raw data, dtype=int16
channels_first:
if True: return audio with shape (channels, time)
Return:
Tuple[paddle.Tensor, int]: (audio_content, sample rate)
Exampels:
.. code-block:: python
import os
import paddle
sample_rate = 16000
wav_duration = 0.5
num_channels = 1
num_frames = sample_rate * wav_duration
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
waveform = wav_data.tile([num_channels, 1])
base_dir = os.getcwd()
filepath = os.path.join(base_dir, "test.wav")
paddle.audio.save(filepath, waveform, sample_rate)
wav_data_read, sr = paddle.audio.load(filepath)
"""
# for API doc
raise NotImplementedError("please set audio backend")


def save(
filepath: str,
src: paddle.Tensor,
sample_rate: int,
channels_first: bool = True,
encoding: Optional[str] = None,
bits_per_sample: Optional[int] = 16,
):
"""
Save audio tensor to file.
Args:
filepath: saved path
src: the audio tensor
sample_rate: the number of samples of audio per second.
channels_first: src channel infomation
if True, means input tensor is (channels, time)
if False, means input tensor is (time, channels)
encoding:encoding format, wave_backend only support PCM16 now.
bits_per_sample: bits per sample, wave_backend only support 16 bits now.
Returns:
None
Examples:
.. code-block:: python
import paddle
sample_rate = 16000
wav_duration = 0.5
num_channels = 1
num_frames = sample_rate * wav_duration
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
waveform = wav_data.tile([num_channels, 1])
filepath = "./test.wav"
paddle.audio.save(filepath, waveform, sample_rate)
"""
# for API doc
raise NotImplementedError("please set audio backend")
185 changes: 185 additions & 0 deletions python/paddle/audio/backends/init_backend.py
@@ -0,0 +1,185 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import warnings
from . import wave_backend
from . import backend
from typing import List

import paddle


def _check_version(version: str) -> bool:
# require paddleaudio >= 1.0.2
ver_arr = version.split('.')
v0 = int(ver_arr[0])
v1 = int(ver_arr[1])
v2 = int(ver_arr[2])
if v0 < 1:
return False
if v0 == 1 and v1 == 0 and v2 <= 1:
return False
return True


def list_available_backends() -> List[str]:
""" List available backends, the backends in paddleaudio and the default backend.
Returns:
List[str]: The list of available backends.
Examples:
.. code-block:: python
import paddle
sample_rate = 16000
wav_duration = 0.5
num_channels = 1
num_frames = sample_rate * wav_duration
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
waveform = wav_data.tile([num_channels, 1])
wav_path = "./test.wav"
current_backend = paddle.audio.backends.get_current_backend()
print(current_backend) # wave_backend, the default backend.
backends = paddle.audio.backends.list_available_backends()
# default backends is ['wave_backend']
# backends is ['wave_backend', 'soundfile'], if have installed paddleaudio >= 1.0.2
if 'soundfile' in backends:
paddle.audio.backends.set_backend('soundfile')
paddle.audio.save(wav_path, waveform, sample_rate)
"""
backends = []
try:
import paddleaudio
except ImportError:
package = "paddleaudio"
warn_msg = (
"Failed importing {}. \n"
"only wave_banckend(only can deal with PCM16 WAV) supportted.\n"
"if want soundfile_backend(more audio type suppported),\n"
"please manually installed (usually with `pip install {} >= 1.0.2`). "
).format(package, package)
warnings.warn(warn_msg)

if "paddleaudio" in sys.modules:
version = paddleaudio.__version__
if _check_version(version) == False:
err_msg = (
"the version of paddleaudio installed is {},\n"
"please ensure the paddleaudio >= 1.0.2.").format(version)
raise ImportError(err_msg)
backends = paddleaudio.backends.list_audio_backends()
backends.append("wave_backend")
return backends


def get_current_backend() -> str:
""" Get the name of the current audio backend
Returns:
str: The name of the current backend,
the wave_backend or backend imported from paddleaudio
Examples:
.. code-block:: python
import paddle
sample_rate = 16000
wav_duration = 0.5
num_channels = 1
num_frames = sample_rate * wav_duration
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
waveform = wav_data.tile([num_channels, 1])
wav_path = "./test.wav"
current_backend = paddle.audio.backends.get_current_backend()
print(current_backend) # wave_backend, the default backend.
backends = paddle.audio.backends.list_available_backends()
# default backends is ['wave_backend']
# backends is ['wave_backend', 'soundfile'], if have installed paddleaudio >= 1.0.2
if 'soundfile' in backends:
paddle.audio.backends.set_backend('soundfile')
paddle.audio.save(wav_path, waveform, sample_rate)
"""
current_backend = None
if "paddleaudio" in sys.modules:
import paddleaudio
current_backend = paddleaudio.backends.get_audio_backend()
if paddle.audio.load == paddleaudio.load:
return current_backend
return "wave_backend"


def set_backend(backend_name: str):
"""Set the backend by one of the list_audio_backend return.
Args:
backend (str): one of the list_audio_backend. "wave_backend" is the default. "soundfile" imported from paddleaudio.
Returns:
None
Examples:
.. code-block:: python
import paddle
sample_rate = 16000
wav_duration = 0.5
num_channels = 1
num_frames = sample_rate * wav_duration
wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
waveform = wav_data.tile([num_channels, 1])
wav_path = "./test.wav"
current_backend = paddle.audio.backends.get_current_backend()
print(current_backend) # wave_backend, the default backend.
backends = paddle.audio.backends.list_available_backends()
# default backends is ['wave_backend']
# backends is ['wave_backend', 'soundfile'], if have installed paddleaudio >= 1.0.2
if 'soundfile' in backends:
paddle.audio.backends.set_backend('soundfile')
paddle.audio.save(wav_path, waveform, sample_rate)
"""
if backend_name not in list_available_backends():
raise NotImplementedError()

if backend_name == "wave_backend":
module = wave_backend
else:
import paddleaudio
paddleaudio.backends.set_audio_backend(backend_name)
module = paddleaudio

for func in ["save", "load", "info"]:
setattr(backend, func, getattr(module, func))
setattr(paddle.audio, func, getattr(module, func))


def _init_set_audio_backend():
# init the default wave_backend.
for func in ["save", "load", "info"]:
setattr(backend, func, getattr(wave_backend, func))

0 comments on commit ec5b27f

Please sign in to comment.