# -*- coding: utf-8 -*-
# This source code is licensed under the BSD-style license found in the
# LICENSE.rst file in the root directory of this source tree.
# pyatsyn Copyright (c) <2023>, <Johnathan G Lyon>
# All rights reserved.
# Except where otherwise noted, ATSA and ATSH is Copyright (c) <2002-2004>
# <Oscar Pablo Di Liscia, Pete Moss, and Juan Pampin>
"""Synthesizer Methods for Rendering .ats Files to Audio
"""
from numpy import zeros, matmul, arange, cos, linspace, cumsum, sin, pi, real
from numpy.fft import fft, ifft
from numpy.random import uniform
import soundfile as sf
from math import tau
import argparse
from pyatsyn.atsa.critical_bands import ATS_CRITICAL_BAND_EDGES
from pyatsyn.atsa.utils import compute_frames
from pyatsyn.ats_io import ats_load
[docs]def synth(ats_snd, normalize=False, compute_phase=True,
export_file=None, sine_pct = 1.0, noise_pct = 0.0, noise_bands = None,
normalize_sine = False, normalize_noise = False):
"""Function to synthesize audio from :obj:`~pyatsyn.ats_structure.AtsSound`
Sine generator bank and band-limited noise synthesizer for .ats files. When
phase information is ignored phase is linearly interpolated between consecutive
frequencies from an initial phase of 0.0 at the first non-zero amplitude for that partial.
The method for cubic polynomial interpolation of phase used is credited to:
MR. McAulay and T. Quatieri, "Speech analysis/Synthesis based on a
sinusoidal representation," in IEEE Transactions on Acoustics,
Speech, and Signal Processing, vol. 34, no. 4, pp. 744-754,
August 1986
`doi: 10.1109/TASSP.1986.1164910 <https://doi.org/10.1109/TASSP.1986.1164910>`_.
Parameters
----------
ats_snd : :obj:`~pyatsyn.ats_structure.AtsSound`
the .ats file used to synthesize
normalize : bool, optional
normalize sound to ±1 before output (default: False)
compute_phase : bool, optional
use cubic polynomial interpolation of phase information during synthesis, if available (default: True)
export_file : str
audio file path to write synthesis to, or None for no file output (default: None)
sine_pct : float
percentage of sine components to mix into output (default: 1.0)
noise_pct : float
percentage of noise components to mix into output (default: 0.0)
noise_bands : ndarray[float]
1D array of band edges to use for noise analysis. Currently using other than 25 bands
(i.e. 26 edges) is not fully supported. If None,
:obj:`~pyatsyn.atsa.critical_bands.ATS_CRITICAL_BAND_EDGES` will be used. (default: None)
normalize_sine : bool
normalize sine components to ±1 before mixing (default: False)
normalize_noise : bool
normalize noise componenets to ±1 before mixing (default: False)
Returns
-------
ndarray[float]
A 1D array of amplitudes representing the synthesized sound
"""
sample_rate = ats_snd.sampling_rate
out_size = int(ats_snd.dur * sample_rate)
frame_size = ats_snd.frame_size
frames = ats_snd.frames
synthesized = zeros(out_size,"float64")
frame_size_range = frame_size
if sine_pct > 0.0:
n_partials = ats_snd.partials
freq_to_radians_per_sample = tau / sample_rate
has_pha = compute_phase and len(ats_snd.pha) > 0
"""
for cubic polynomial interpolation of phase
credit: McAulay & Quatieri (1986)
"""
alpha_beta_coeffs = zeros([2,2], "float64")
alpha_beta_coeffs[0][0] = 3 / (frame_size**2)
alpha_beta_coeffs[0][1] = -1 / frame_size
alpha_beta_coeffs[1][0] = -2 / (frame_size**3)
alpha_beta_coeffs[1][1] = 1 / (frame_size**2)
alpha_beta_terms = zeros([2,1],"float64")
half_T = frame_size / 2
samps = arange(frame_size, dtype='int64')
samps_squared = samps ** 2
samps_cubed = samps ** 3
prior_partial_phases = None
if not has_pha:
prior_partial_phases = zeros(n_partials,"float64")
fil_ptr = 0
for frame_n in range(frames):
# constrain number of samples we write at tail end of sound
if fil_ptr + frame_size_range > out_size:
frame_size_range = out_size - fil_ptr
for partial in range(n_partials):
if ats_snd.frq[partial][frame_n] == 0.0 and ats_snd.frq[partial][frame_n + 1] == 0.0:
continue
# get amp step
amp_0 = ats_snd.amp[partial][frame_n]
amp_t = ats_snd.amp[partial][frame_n + 1]
amp_step = (amp_t - amp_0) / frame_size
# compute frequency/phase interpolation preliminaries
w_0 = ats_snd.frq[partial][frame_n] * freq_to_radians_per_sample
w_t = ats_snd.frq[partial][frame_n + 1] * freq_to_radians_per_sample
if w_0 == 0.0:
w_0 = w_t
elif w_t == 0.0:
w_t = w_0
if has_pha:
pha_0 = ats_snd.pha[partial][frame_n]
pha_t = ats_snd.pha[partial][frame_n + 1]
"""
cubic polynomial interpolation of phase
credit: McAulay & Quatieri (1986)
"""
M = round((((pha_0 + (w_0 * frame_size) - pha_t) + (half_T * (w_t - w_0))) / tau))
alpha_beta_terms[0] = pha_t - pha_0 - (w_0 * frame_size) + (tau * M)
alpha_beta_terms[1] = w_t - w_0
alpha, beta = matmul(alpha_beta_coeffs, alpha_beta_terms)
synthesized[fil_ptr:fil_ptr + frame_size_range] += ((samps[:frame_size_range] * amp_step) + amp_0) * \
cos(pha_0 + (w_0 * samps[:frame_size_range]) +
(alpha * samps_squared[:frame_size_range]) +
(beta * samps_cubed[:frame_size_range]))
else:
# phaseless version
pha_0 = prior_partial_phases[partial]
w = cumsum(linspace(w_0, w_t, frame_size))
synthesized[fil_ptr:fil_ptr + frame_size_range] += ((samps[:frame_size_range] * amp_step) + amp_0) * \
cos(w[:frame_size_range] + pha_0)
prior_partial_phases[partial] = pha_0 + w[-1]
fil_ptr += frame_size
if fil_ptr >= out_size:
break
if normalize_sine:
gain = max(abs(synthesized))
if gain != 1.0 and gain > 0.0:
synthesized /= gain
synthesized *= sine_pct
has_noi = noise_pct > 0.0 and len(ats_snd.band_energy) > 0
if has_noi:
# using white noise -> band-limited noise fft resynthesis method
noise = zeros(out_size,"float64")
window = sin(arange(sample_rate) * pi / sample_rate)**2 # using Hann window
overlap = 0.5
noise_hop = int(overlap * sample_rate)
noise_M_over_2 = sample_rate // 2
noise_frames = compute_frames(out_size, noise_hop)
white_noise = uniform(-1,1, int(noise_frames * sample_rate / overlap) + 1)
banded_noise = zeros([len(ats_snd.bands), out_size])
# indices for refolding a symmetric fft after clearing freq bins
bin_indices = zeros(sample_rate, "int64")
for i in range(noise_M_over_2):
bin_indices[i] = i
bin_indices[-(i + 1)] = i
# build band-limited noise
if noise_bands is None:
noise_bands = ATS_CRITICAL_BAND_EDGES
for band in ats_snd.bands:
lo = int(noise_bands[band])
hi = int(noise_bands[band+1])
in_ptr = 0
out_ptr = -noise_M_over_2
for frame_n in range(noise_frames):
time_bins = white_noise[in_ptr:in_ptr+sample_rate] * window
freq_bins = fft(time_bins)
freq_bins[:lo] = 0.0
freq_bins[hi+1:] = 0.0
rev_fft = real(ifft(freq_bins[bin_indices]))
front_pad = 0
back_pad = 0
if out_ptr < 0:
front_pad = -out_ptr
if out_ptr + sample_rate >= out_size:
back_pad = out_ptr + sample_rate - out_size
if not front_pad and not back_pad:
banded_noise[band][out_ptr:out_ptr+sample_rate] += rev_fft
else:
banded_noise[band][out_ptr+front_pad:out_ptr+sample_rate-back_pad] += rev_fft[front_pad:sample_rate-back_pad]
in_ptr += noise_hop
out_ptr += noise_hop
if out_ptr >= out_size:
break
# envelope bands
fil_ptr = 0
frame_size_range = frame_size
for frame_n in range(frames):
# constrain number of samples we write at tail end of sound
if fil_ptr + frame_size_range > out_size:
frame_size_range = out_size - fil_ptr
for band in ats_snd.bands:
if ats_snd.band_energy[band][frame_n] == 0.0 and ats_snd.band_energy[band][frame_n + 1] == 0.0:
continue
# get amp step
amp_0 = ats_snd.band_energy[band][frame_n]
amp_t = ats_snd.band_energy[band][frame_n + 1]
amp_step = (amp_t - amp_0) / frame_size
noise[fil_ptr:fil_ptr + frame_size_range] += (amp_0 + (arange(frame_size_range) * amp_step)) * \
banded_noise[band][fil_ptr:fil_ptr + frame_size_range]
fil_ptr += frame_size
if fil_ptr >= out_size:
break
if normalize_noise:
gain = max(abs(noise))
if gain != 1.0 and gain > 0.0:
noise /= gain
synthesized += noise_pct * noise
if normalize:
gain = max(abs(synthesized))
if gain != 1.0 and gain > 0.0:
synthesized /= gain
# export synthesized version to audio file
if export_file is not None:
sf.write(export_file, synthesized, ats_snd.sampling_rate)
return synthesized
[docs]def synth_CLI():
"""Command line wrapper for :obj:`~pyatsyn.ats_synth.synth`
Example
-------
Display usage details with help flag
::
$ pyatsyn-synth -h
Generate a wav file from a sine generator bank from an ats file
::
$ pyatsyn-synth example.ats example.wav
Generate a wav file from a sine generator bank and band-limited noise using from an ats file
::
$ pyatsyn-synth example.ats example.wav --noise 1.0
"""
parser = argparse.ArgumentParser(
description = "Sine generator bank and band-limited noise synthesizer for .ats files"
)
parser.add_argument("ats_file_in", help="the path to the .ats file to synthesize")
parser.add_argument("audio_file_out", help="audio file path to synthesize to")
parser.add_argument("-n", "--normalize", help="normalize sound to ±1 before output", action="store_true")
parser.add_argument("--sine", type=float, help="percentage of sine components to mix (default 1.0)", default=1.0)
parser.add_argument("--noise", type=float, help="percentage of noise components to mix (default 0.0)", default=0.0)
parser.add_argument("--normalize_sine", help="normalize sine components to ±1 before mixing", action="store_true")
parser.add_argument("--normalize_noise", help="normalize noise componenets to ±1 before mixing", action="store_true")
parser.add_argument("--ignore_phase", help="ignore phase information during synthesis", action="store_true")
args = parser.parse_args()
synth( ats_load(args.ats_file_in, args.ats_file_in),
normalize = args.normalize,
compute_phase = not args.ignore_phase,
export_file = args.audio_file_out,
sine_pct = args.sine,
noise_pct = args.noise,
normalize_sine = args.normalize_sine,
normalize_noise = args.normalize_noise
)