Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nemo/collections/audio/data/audio_to_audio_lhotse.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def convert_manifest_nemo_to_lhotse(
get_full_path(audio_file=item_reference_key, manifest_file=input_manifest)
)

_as_relative(cut.reference_recording, item_target_key, enabled=not force_absolute_paths)
_as_relative(cut.reference_recording, item_reference_key, enabled=not force_absolute_paths)

if (channels := item.pop(REFERENCE_CHANNEL_SELECTOR, None)) is not None:
if cut.reference_recording.num_channels == 1:
Expand Down
62 changes: 62 additions & 0 deletions tests/collections/audio/test_audio_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pytest
import soundfile as sf
import torch.cuda
from lhotse import CutSet
from omegaconf import OmegaConf

from nemo.collections.asr.parts.utils.manifest_utils import write_manifest
Expand All @@ -38,6 +39,14 @@


class TestAudioDatasets:
@staticmethod
def _convert_manifest_item_to_cut(test_dir, item):
manifest_filepath = os.path.join(test_dir, 'manifest.json')
cuts_path = manifest_filepath.replace('.json', '_cuts.jsonl')
write_manifest(manifest_filepath, [item])
convert_manifest_nemo_to_lhotse(input_manifest=manifest_filepath, output_manifest=cuts_path)
return next(iter(CutSet.from_file(cuts_path)))

@pytest.mark.unit
@pytest.mark.parametrize('num_channels', [1, 2])
@pytest.mark.parametrize('num_targets', [1, 3])
Expand All @@ -60,6 +69,59 @@ def test_list_to_multichannel(self, num_channels, num_targets):
# Check the list is converted back to the original signal
assert (ASRAudioProcessor.list_to_multichannel(target_list) == golden_target).all()

@pytest.mark.unit
def test_convert_manifest_nemo_to_lhotse_with_reference_only(self):
sample_rate = 16000
duration = 0.1
num_samples = int(sample_rate * duration)

with tempfile.TemporaryDirectory() as test_dir:
input_filepath = 'input.wav'
reference_filepath = 'reference.wav'
sf.write(os.path.join(test_dir, input_filepath), np.zeros(num_samples), sample_rate, 'float')
sf.write(os.path.join(test_dir, reference_filepath), np.ones(num_samples), sample_rate, 'float')

cut = self._convert_manifest_item_to_cut(
test_dir,
{
'input_filepath': input_filepath,
'reference_filepath': reference_filepath,
'duration': duration,
},
)

assert cut.recording.sources[0].source == input_filepath
assert cut.reference_recording.sources[0].source == reference_filepath
assert 'target_recording' not in (cut.custom or {})

@pytest.mark.unit
def test_convert_manifest_nemo_to_lhotse_with_different_target_and_reference_paths(self):
sample_rate = 16000
duration = 0.1
num_samples = int(sample_rate * duration)

with tempfile.TemporaryDirectory() as test_dir:
input_filepath = 'input.wav'
target_filepath = 'target.wav'
reference_filepath = 'reference.wav'
sf.write(os.path.join(test_dir, input_filepath), np.zeros(num_samples), sample_rate, 'float')
sf.write(os.path.join(test_dir, target_filepath), np.ones(num_samples), sample_rate, 'float')
sf.write(os.path.join(test_dir, reference_filepath), -np.ones(num_samples), sample_rate, 'float')

cut = self._convert_manifest_item_to_cut(
test_dir,
{
'input_filepath': input_filepath,
'target_filepath': target_filepath,
'reference_filepath': reference_filepath,
'duration': duration,
},
)

assert cut.target_recording.sources[0].source == target_filepath
assert cut.reference_recording.sources[0].source == reference_filepath
assert cut.target_recording.sources[0].source != cut.reference_recording.sources[0].source

@pytest.mark.unit
@pytest.mark.parametrize('num_channels', [1, 2])
def test_processor_process_audio(self, num_channels):
Expand Down
Loading