Source code for pyatsyn.atsa.utils

# -*- coding: utf-8 -*-

# This source code is licensed under the BSD-style license found in the
# LICENSE.rst file in the root directory of this source tree. 

# pyatsyn Copyright (c) <2023>, <Johnathan G Lyon>
# All rights reserved.

# Except where otherwise noted, ATSA and ATSH is Copyright (c) <2002-2004>
# <Oscar Pablo Di Liscia, Pete Moss, and Juan Pampin>

"""Utility Functions for ATS Analysis

Attributes
----------
MAX_DB_SPL : float
    maximum DB_SPL level; used for converting amplitude units
ATS_MIN_SEGMENT_LENGTH : int
    default minimum segment length
ATS_AMP_THRESHOLD : float
    default amp threshold
ATS_NOISE_THRESHOLD : float
    default noise threshold
"""

from numpy import inf, ceil, log2, log10


###################
# UTILITY CONSTANTS
###################

MAX_DB_SPL = 100.0
ATS_MIN_SEGMENT_LENGTH = 3
ATS_AMP_THRESHOLD = -60
ATS_NOISE_THRESHOLD = -120


###################
# UTILITY FUNCTIONS
###################

[docs]def db_to_amp(db): """Function to convert decibels to amplitude: :math:`10^{dB / 20.0}` Parameters ---------- db : float a decibel value Returns ------- float the converted amplitude value """ if (db == -inf): return 0.0 return pow(10, (db / 20.0))
[docs]def amp_to_db(amp): """Function to convert amplitude to decibels: :math:`20 * \\log_{10}{amp}` Parameters ---------- amp : float an amplitude value Returns ------- float the converted decibel value """ return 20 * log10(amp)
[docs]def amp_to_db_spl(amp): """Function to convert amplitude to decibel sound pressure level (dB SPL) Parameters ---------- amp : float an amplitude value Returns ------- float the converted dB SPL value """ return MAX_DB_SPL + amp_to_db(amp)
[docs]def next_power_of_2(num): """Function to return the closest power of 2 integer more than or equal to an input Parameters ---------- num : int a positive integer Returns ------- int the closest power of 2 integer more than or equal to `num` """ return int(2**ceil(log2(num)))
[docs]def compute_frames(total_samps, hop): """Function to compute the number frames to use in the specified analysis. Calculates an extra frame to prevent attenuation during windowing at the tail and to allow for interpolation at the end of the soundfile. Parameters ---------- total_samps : int number of samples in analyzed sound duration hop : int interframe distance in samples Returns ------- int number of frames to use for STFT analysis """ return int(ceil(total_samps / hop)) + 1
[docs]def optimize_tracks(tracks, analysis_frames, min_segment_length, amp_threshold, highest_frequency, lowest_frequency): """Function to run optimization routines on the established tracks. The optimizations performed are: * trim short partials * calculate and store maximum and average frq and amp * prune tracks below amplitude threshold * prune tracks outside frequency constraints * sort and renumber tracks and peaks in analysis_frames according to average frq NOTE: directly updates analysis_frames, pruning peaks corresponding to pruned tracks. Parameters ---------- tracks : Iterable[:obj:`~pyatsyn.ats_structure.AtsSound`] collection of established tracks analysis_frames : Iterable[Iterable[:obj:`~pyatsyn.ats_structure.AtsPeak`]] a collection storing the :obj:`~pyatsyn.ats_structure.AtsPeak` objects at each frame in time min_segment_length : int minimal size (in frames) of a valid track segment, otherwise it is pruned amp_threshold : float amplitude threshold used to prune tracks. If None, will default to :obj:`~pyatsyn.atsa.utils.ATS_AMP_THRESHOLD` converted to amplitude. highest_frequency : float upper frequency threshold, tracks with maxima above this will be pruned lowest_frequency : float lower frequency threshold, tracks with minima below this will be pruned Returns ------- tracks : Iterable[:obj:`pyatsyn.ats_structure.AtsPeak`] the optimized subset of input tracks """ if min_segment_length < 1: min_segment_length = ATS_MIN_SEGMENT_LENGTH # NOTE: amp_threshold is expected in amps if amp_threshold == None: amp_threshold = db_to_amp(ATS_AMP_THRESHOLD) tracks_for_removal = set() # trim short partials for tk in tracks: if tk.duration < min_segment_length: tracks_for_removal.add(tk.track) else: # zero amp & frq for averages tk.frq = 0.0 tk.amp = 0.0 # get max & average values (store data on tracks) for frame_n in range(len(analysis_frames)): for pk in analysis_frames[frame_n]: tk_ind = pk.track if tk_ind not in tracks_for_removal: tk = tracks[tk_ind] tk.amp_max = max(tk.amp_max, pk.amp) tk.frq_max = max(tk.frq_max, pk.frq) tk.frq_min = min(tk.frq_min, pk.frq) # rolling averages alpha = 1 / tk.duration tk.frq += pk.frq * alpha tk.amp += pk.amp * alpha # process tracks again for amp & freq thresholds for tk in tracks: if tk.amp_max < amp_threshold or tk.frq_max > highest_frequency or tk.frq_min < lowest_frequency: tracks_for_removal.add(tk.track) renumbering_tracks = [None] * len(tracks) # prune invalid tracks tracks = [tk for tk in tracks if tk.track not in tracks_for_removal] # sort tracks by average freq and build renumbering map and renumber tracks tracks.sort(key=lambda tk: tk.frq) for ind, tk in enumerate(tracks): renumbering_tracks[tk.track] = ind tk.track = ind # renumber and prune peaks for frame_n in range(len(analysis_frames)): new_frame = [] for pk in analysis_frames[frame_n]: if renumbering_tracks[pk.track] is not None: pk.track = renumbering_tracks[pk.track] new_frame.append(pk) analysis_frames[frame_n] = new_frame return tracks