Source code for qualia_core.preprocessing.MFCC

import logging
import time

import numpy as np

from .Preprocessing import Preprocessing

logger = logging.getLogger(__name__)

[docs] class MFCC(Preprocessing): def __init__(self, sample_rate: int, n_mfcc: int, dct_type: int=2, norm: str='ortho', log_mels=False, melkwargs: dict={}, chunks: int = 4, dims: int = 1): import torchaudio self.mfcc_transform = torchaudio.transforms.MFCC(sample_rate=sample_rate, n_mfcc=n_mfcc, dct_type=dct_type, norm=norm, log_mels=log_mels, melkwargs=melkwargs) self.__chunks = chunks self.__dims = dims def __call__(self, datamodel): start = time.time() import torch for name, s in datamodel: # 1D s.x = s.x.squeeze(-1) s.x = torch.tensor(s.x) chunks = torch.chunk(s.x, self.__chunks) chunks_mfcc = [self.mfcc_transform(chunk) for chunk in chunks] s.x = torch.concat(chunks_mfcc) s.x = s.x.numpy() s.x = s.x.swapaxes(1, 2) if self.__dims == 2: s.x = np.expand_dims(s.x, -1) # 2D, add channels dim #s.x = self.mfcc_transform(torch.tensor(s.x.squeeze(-1))).unsqueeze(-1).numpy() logger.info('Elapsed: %s s', time.time() - start) return datamodel