# -*- coding: utf-8 -*-
# This source code is licensed under the BSD-style license found in the
# LICENSE.rst file in the root directory of this source tree.
# pyatsyn Copyright (c) <2023>, <Johnathan G Lyon>
# All rights reserved.
# Except where otherwise noted, ATSA and ATSH is Copyright (c) <2002-2004>
# <Oscar Pablo Di Liscia, Pete Moss, and Juan Pampin>
"""Utility Functions for ATS Analysis
Attributes
----------
MAX_DB_SPL : float
maximum DB_SPL level; used for converting amplitude units
ATS_MIN_SEGMENT_LENGTH : int
default minimum segment length
ATS_AMP_THRESHOLD : float
default amp threshold
ATS_NOISE_THRESHOLD : float
default noise threshold
"""
from numpy import inf, ceil, log2, log10
###################
# UTILITY CONSTANTS
###################
MAX_DB_SPL = 100.0
ATS_MIN_SEGMENT_LENGTH = 3
ATS_AMP_THRESHOLD = -60
ATS_NOISE_THRESHOLD = -120
###################
# UTILITY FUNCTIONS
###################
[docs]def db_to_amp(db):
"""Function to convert decibels to amplitude: :math:`10^{dB / 20.0}`
Parameters
----------
db : float
a decibel value
Returns
-------
float
the converted amplitude value
"""
if (db == -inf):
return 0.0
return pow(10, (db / 20.0))
[docs]def amp_to_db(amp):
"""Function to convert amplitude to decibels: :math:`20 * \\log_{10}{amp}`
Parameters
----------
amp : float
an amplitude value
Returns
-------
float
the converted decibel value
"""
return 20 * log10(amp)
[docs]def amp_to_db_spl(amp):
"""Function to convert amplitude to decibel sound pressure level (dB SPL)
Parameters
----------
amp : float
an amplitude value
Returns
-------
float
the converted dB SPL value
"""
return MAX_DB_SPL + amp_to_db(amp)
[docs]def next_power_of_2(num):
"""Function to return the closest power of 2 integer more than or equal to an input
Parameters
----------
num : int
a positive integer
Returns
-------
int
the closest power of 2 integer more than or equal to `num`
"""
return int(2**ceil(log2(num)))
[docs]def compute_frames(total_samps, hop):
"""Function to compute the number frames to use in the specified analysis.
Calculates an extra frame to prevent attenuation during windowing at the tail and to allow
for interpolation at the end of the soundfile.
Parameters
----------
total_samps : int
number of samples in analyzed sound duration
hop : int
interframe distance in samples
Returns
-------
int
number of frames to use for STFT analysis
"""
return int(ceil(total_samps / hop)) + 1
[docs]def optimize_tracks(tracks, analysis_frames, min_segment_length,
amp_threshold, highest_frequency, lowest_frequency):
"""Function to run optimization routines on the established tracks.
The optimizations performed are:
* trim short partials
* calculate and store maximum and average frq and amp
* prune tracks below amplitude threshold
* prune tracks outside frequency constraints
* sort and renumber tracks and peaks in analysis_frames according to average frq
NOTE: directly updates analysis_frames, pruning peaks corresponding to pruned tracks.
Parameters
----------
tracks : Iterable[:obj:`~pyatsyn.ats_structure.AtsSound`]
collection of established tracks
analysis_frames : Iterable[Iterable[:obj:`~pyatsyn.ats_structure.AtsPeak`]]
a collection storing the :obj:`~pyatsyn.ats_structure.AtsPeak` objects at each frame in time
min_segment_length : int
minimal size (in frames) of a valid track segment, otherwise it is pruned
amp_threshold : float
amplitude threshold used to prune tracks. If None, will default to :obj:`~pyatsyn.atsa.utils.ATS_AMP_THRESHOLD` converted to amplitude.
highest_frequency : float
upper frequency threshold, tracks with maxima above this will be pruned
lowest_frequency : float
lower frequency threshold, tracks with minima below this will be pruned
Returns
-------
tracks : Iterable[:obj:`pyatsyn.ats_structure.AtsPeak`]
the optimized subset of input tracks
"""
if min_segment_length < 1:
min_segment_length = ATS_MIN_SEGMENT_LENGTH
# NOTE: amp_threshold is expected in amps
if amp_threshold == None:
amp_threshold = db_to_amp(ATS_AMP_THRESHOLD)
tracks_for_removal = set()
# trim short partials
for tk in tracks:
if tk.duration < min_segment_length:
tracks_for_removal.add(tk.track)
else:
# zero amp & frq for averages
tk.frq = 0.0
tk.amp = 0.0
# get max & average values (store data on tracks)
for frame_n in range(len(analysis_frames)):
for pk in analysis_frames[frame_n]:
tk_ind = pk.track
if tk_ind not in tracks_for_removal:
tk = tracks[tk_ind]
tk.amp_max = max(tk.amp_max, pk.amp)
tk.frq_max = max(tk.frq_max, pk.frq)
tk.frq_min = min(tk.frq_min, pk.frq)
# rolling averages
alpha = 1 / tk.duration
tk.frq += pk.frq * alpha
tk.amp += pk.amp * alpha
# process tracks again for amp & freq thresholds
for tk in tracks:
if tk.amp_max < amp_threshold or tk.frq_max > highest_frequency or tk.frq_min < lowest_frequency:
tracks_for_removal.add(tk.track)
renumbering_tracks = [None] * len(tracks)
# prune invalid tracks
tracks = [tk for tk in tracks if tk.track not in tracks_for_removal]
# sort tracks by average freq and build renumbering map and renumber tracks
tracks.sort(key=lambda tk: tk.frq)
for ind, tk in enumerate(tracks):
renumbering_tracks[tk.track] = ind
tk.track = ind
# renumber and prune peaks
for frame_n in range(len(analysis_frames)):
new_frame = []
for pk in analysis_frames[frame_n]:
if renumbering_tracks[pk.track] is not None:
pk.track = renumbering_tracks[pk.track]
new_frame.append(pk)
analysis_frames[frame_n] = new_frame
return tracks