Source code for pyatsyn.ats_synth

# -*- coding: utf-8 -*-

# This source code is licensed under the BSD-style license found in the
# LICENSE.rst file in the root directory of this source tree. 

# pyatsyn Copyright (c) <2023>, <Johnathan G Lyon>
# All rights reserved.

# Except where otherwise noted, ATSA and ATSH is Copyright (c) <2002-2004>
# <Oscar Pablo Di Liscia, Pete Moss, and Juan Pampin>


"""Synthesizer Methods for Rendering .ats Files to Audio

"""

from numpy import zeros, matmul, arange, cos, linspace, cumsum, sin, pi, real
from numpy.fft import fft, ifft
from numpy.random import uniform
import soundfile as sf
from math import tau
import argparse

from pyatsyn.atsa.critical_bands import ATS_CRITICAL_BAND_EDGES
from pyatsyn.atsa.utils import compute_frames
from pyatsyn.ats_io import ats_load


[docs]def synth(ats_snd, normalize=False, compute_phase=True, 
            export_file=None, sine_pct = 1.0, noise_pct = 0.0, noise_bands = None, 
            normalize_sine = False, normalize_noise = False):    
    """Function to synthesize audio from :obj:`~pyatsyn.ats_structure.AtsSound`

    Sine generator bank and band-limited noise synthesizer for .ats files. When
    phase information is ignored phase is linearly interpolated between consecutive
    frequencies from an initial phase of 0.0 at the first non-zero amplitude for that partial.
    
    The method for cubic polynomial interpolation of phase used is credited to:

        MR. McAulay and T. Quatieri, "Speech analysis/Synthesis based on a 
        sinusoidal representation," in IEEE Transactions on Acoustics, 
        Speech, and Signal Processing, vol. 34, no. 4, pp. 744-754, 
        August 1986
        
        `doi: 10.1109/TASSP.1986.1164910 <https://doi.org/10.1109/TASSP.1986.1164910>`_.

    Parameters
    ----------
    ats_snd : :obj:`~pyatsyn.ats_structure.AtsSound`
        the .ats file used to synthesize
    normalize : bool, optional
        normalize sound to ±1 before output (default: False)
    compute_phase : bool, optional
        use cubic polynomial interpolation of phase information during synthesis, if available (default: True)
    export_file : str
        audio file path to write synthesis to, or None for no file output (default: None)
    sine_pct : float
        percentage of sine components to mix into output (default: 1.0)
    noise_pct : float
        percentage of noise components to mix into output (default: 0.0)
    noise_bands : ndarray[float]
        1D array of band edges to use for noise analysis. Currently using other than 25 bands 
        (i.e. 26 edges) is not fully supported. If None, 
        :obj:`~pyatsyn.atsa.critical_bands.ATS_CRITICAL_BAND_EDGES` will be used. (default: None)
    normalize_sine : bool
        normalize sine components to ±1 before mixing (default: False)
    normalize_noise : bool
        normalize noise componenets to ±1 before mixing (default: False)

    Returns
    -------
    ndarray[float]
        A 1D array of amplitudes representing the synthesized sound
    """
    sample_rate = ats_snd.sampling_rate
    out_size = int(ats_snd.dur * sample_rate)
    frame_size = ats_snd.frame_size
    frames = ats_snd.frames

    synthesized = zeros(out_size,"float64")

    frame_size_range = frame_size
    
    if sine_pct > 0.0:
        n_partials = ats_snd.partials    
        freq_to_radians_per_sample = tau / sample_rate
        
        has_pha = compute_phase and len(ats_snd.pha) > 0
        """
        for cubic polynomial interpolation of phase
        credit: McAulay & Quatieri (1986)
        """
        alpha_beta_coeffs = zeros([2,2], "float64")
        alpha_beta_coeffs[0][0] = 3 / (frame_size**2)
        alpha_beta_coeffs[0][1] = -1 / frame_size
        alpha_beta_coeffs[1][0] = -2 / (frame_size**3)
        alpha_beta_coeffs[1][1] = 1 / (frame_size**2)
        alpha_beta_terms = zeros([2,1],"float64")

        half_T = frame_size / 2

        samps = arange(frame_size, dtype='int64')
        samps_squared = samps ** 2
        samps_cubed = samps ** 3

        prior_partial_phases = None
        if not has_pha:
            prior_partial_phases = zeros(n_partials,"float64")

        fil_ptr = 0
        for frame_n in range(frames):
            
            # constrain number of samples we write at tail end of sound
            if fil_ptr + frame_size_range > out_size:
                frame_size_range = out_size - fil_ptr
            
            for partial in range(n_partials):
                if ats_snd.frq[partial][frame_n] == 0.0 and ats_snd.frq[partial][frame_n + 1] == 0.0:
                    continue

                # get amp step
                amp_0 = ats_snd.amp[partial][frame_n]
                amp_t = ats_snd.amp[partial][frame_n + 1]
                amp_step = (amp_t - amp_0) / frame_size

                # compute frequency/phase interpolation preliminaries
                w_0 = ats_snd.frq[partial][frame_n] * freq_to_radians_per_sample
                w_t = ats_snd.frq[partial][frame_n + 1] * freq_to_radians_per_sample
                
                if w_0 == 0.0:
                    w_0 = w_t
                elif w_t == 0.0:
                    w_t = w_0

                if has_pha:
                    pha_0 = ats_snd.pha[partial][frame_n]
                    pha_t = ats_snd.pha[partial][frame_n + 1]

                    """
                    cubic polynomial interpolation of phase
                    credit: McAulay & Quatieri (1986)
                    """
                    M = round((((pha_0 + (w_0 * frame_size) - pha_t) + (half_T * (w_t - w_0))) / tau))
                    alpha_beta_terms[0] = pha_t - pha_0 - (w_0 * frame_size) + (tau * M)
                    alpha_beta_terms[1] = w_t - w_0
                    alpha, beta = matmul(alpha_beta_coeffs, alpha_beta_terms)
                    synthesized[fil_ptr:fil_ptr + frame_size_range] += ((samps[:frame_size_range] * amp_step) + amp_0) * \
                                                                            cos(pha_0 + (w_0 * samps[:frame_size_range]) + 
                                                                                (alpha * samps_squared[:frame_size_range]) + 
                                                                                (beta * samps_cubed[:frame_size_range]))
                
                else:
                    # phaseless version
                    pha_0 = prior_partial_phases[partial]
                    w = cumsum(linspace(w_0, w_t, frame_size))
                    synthesized[fil_ptr:fil_ptr + frame_size_range] += ((samps[:frame_size_range] * amp_step) + amp_0) * \
                                                                            cos(w[:frame_size_range] + pha_0)
                    prior_partial_phases[partial] = pha_0 + w[-1]

            fil_ptr += frame_size

            if fil_ptr >= out_size:
                break

            if normalize_sine:
                gain = max(abs(synthesized))
                if gain != 1.0 and gain > 0.0:
                    synthesized /= gain

        synthesized *= sine_pct
        
    has_noi = noise_pct > 0.0 and len(ats_snd.band_energy) > 0

    if has_noi:
        # using white noise -> band-limited noise fft resynthesis method
        noise = zeros(out_size,"float64")
        
        window = sin(arange(sample_rate) * pi / sample_rate)**2 # using Hann window
        overlap = 0.5

        noise_hop = int(overlap * sample_rate)
        noise_M_over_2 = sample_rate // 2
        noise_frames = compute_frames(out_size, noise_hop)
        
        white_noise = uniform(-1,1, int(noise_frames * sample_rate / overlap) + 1)
        banded_noise = zeros([len(ats_snd.bands), out_size])

        # indices for refolding a symmetric fft after clearing freq bins
        bin_indices = zeros(sample_rate, "int64")
        for i in range(noise_M_over_2):
            bin_indices[i] = i
            bin_indices[-(i + 1)] = i

        # build band-limited noise
        if noise_bands is None:
            noise_bands = ATS_CRITICAL_BAND_EDGES
        for band in ats_snd.bands:
            lo = int(noise_bands[band])
            hi = int(noise_bands[band+1])
            
            in_ptr = 0
            out_ptr = -noise_M_over_2
            for frame_n in range(noise_frames):
                time_bins = white_noise[in_ptr:in_ptr+sample_rate] * window
                freq_bins = fft(time_bins)
                freq_bins[:lo] = 0.0
                freq_bins[hi+1:] = 0.0
                rev_fft = real(ifft(freq_bins[bin_indices]))

                front_pad = 0
                back_pad = 0
                if out_ptr < 0:
                    front_pad = -out_ptr
                if out_ptr + sample_rate >= out_size:
                    back_pad = out_ptr + sample_rate - out_size

                if not front_pad and not back_pad:    
                    banded_noise[band][out_ptr:out_ptr+sample_rate] += rev_fft
                else:
                    banded_noise[band][out_ptr+front_pad:out_ptr+sample_rate-back_pad] += rev_fft[front_pad:sample_rate-back_pad] 

                in_ptr += noise_hop
                out_ptr += noise_hop

                if out_ptr >= out_size:
                    break 

        # envelope bands
        fil_ptr = 0
        frame_size_range = frame_size
        for frame_n in range(frames):

            # constrain number of samples we write at tail end of sound
            if fil_ptr + frame_size_range > out_size:
                frame_size_range = out_size - fil_ptr
            for band in ats_snd.bands:
                if ats_snd.band_energy[band][frame_n] == 0.0 and ats_snd.band_energy[band][frame_n + 1] == 0.0:
                    continue

                # get amp step
                amp_0 = ats_snd.band_energy[band][frame_n]
                amp_t = ats_snd.band_energy[band][frame_n + 1]
                amp_step = (amp_t - amp_0) / frame_size
                noise[fil_ptr:fil_ptr + frame_size_range] += (amp_0 + (arange(frame_size_range) * amp_step)) * \
                                                                banded_noise[band][fil_ptr:fil_ptr + frame_size_range]

            fil_ptr += frame_size

            if fil_ptr >= out_size:
                break

        if normalize_noise:
            gain = max(abs(noise))
            if gain != 1.0 and gain > 0.0:
                noise /= gain

        synthesized += noise_pct * noise
    
    if normalize:
        gain = max(abs(synthesized))
        if gain != 1.0 and gain > 0.0:
            synthesized /= gain

    # export synthesized version to audio file
    if export_file is not None:
        sf.write(export_file, synthesized, ats_snd.sampling_rate)

    return synthesized  

[docs]def synth_CLI():    
    """Command line wrapper for :obj:`~pyatsyn.ats_synth.synth`

    Example
    ------- 
    Display usage details with help flag   

    ::

        $ pyatsyn-synth -h

    Generate a wav file from a sine generator bank from an ats file

    ::

        $ pyatsyn-synth example.ats example.wav

    Generate a wav file from a sine generator bank and band-limited noise using from an ats file

    ::
    
        $ pyatsyn-synth example.ats example.wav --noise 1.0

    """
    parser = argparse.ArgumentParser(
        description = "Sine generator bank and band-limited noise synthesizer for .ats files"        
    )
    parser.add_argument("ats_file_in", help="the path to the .ats file to synthesize")
    parser.add_argument("audio_file_out", help="audio file path to synthesize to")
    parser.add_argument("-n", "--normalize", help="normalize sound to ±1 before output", action="store_true")
    parser.add_argument("--sine", type=float, help="percentage of sine components to mix (default 1.0)", default=1.0)
    parser.add_argument("--noise", type=float, help="percentage of noise components to mix (default 0.0)", default=0.0)
    parser.add_argument("--normalize_sine", help="normalize sine components to ±1 before mixing", action="store_true")
    parser.add_argument("--normalize_noise", help="normalize noise componenets to ±1 before mixing", action="store_true")
    parser.add_argument("--ignore_phase", help="ignore phase information during synthesis", action="store_true")
    args = parser.parse_args()
    synth(  ats_load(args.ats_file_in, args.ats_file_in), 
            normalize = args.normalize,
            compute_phase = not args.ignore_phase,
            export_file = args.audio_file_out,
            sine_pct = args.sine,
            noise_pct = args.noise,
            normalize_sine = args.normalize_sine,
            normalize_noise = args.normalize_noise
            )