Source code for thebeat.stats

# Copyright (C) 2022-2023  Jelle van der Werff
#
# This file is part of thebeat.
#
# thebeat is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# thebeat is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with thebeat.  If not, see <https://www.gnu.org/licenses/>.

from __future__ import annotations
from typing import Optional, Union
import scipy.stats
import scipy.fft
import numpy as np
import thebeat.core
from thebeat.helpers import sequence_to_binary
import matplotlib.pyplot as plt
import scipy.signal
import scipy.stats
from scipy.fft import rfft, rfftfreq
import pandas as pd
import thebeat.helpers
import Levenshtein
import warnings


[docs]def acf_df(sequence: thebeat.core.Sequence, resolution, smoothing_window: Optional[float] = None, smoothing_sd: Optional[float] = None) -> pd.DataFrame: """ Perform autocorrelation analysis on a :py:class:`Sequence` object, and return a :class:`Pandas.DataFrame` object containing the results. Parameters ---------- sequence A :py:class:`~thebeat.core.Sequence` object. resolution The temporal resolution. If the used Sequence is in seconds, you might want to use 0.001. If the Sequence is in milliseconds, try using 1. Incidentally, the number of lags for the autocorrelation function is calculated as ``n_lags = sequence_duration / resolution``. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. Returns ------- :class:`pandas.DataFrame` DataFrame containing two columns: the timestamps, and the autocorrelation factor. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. Examples -------- >>> rng = np.random.default_rng(seed=123) # for reproducability >>> seq = thebeat.core.Sequence.generate_random_uniform(n_events=10,a=400,b=600,rng=rng) >>> df = acf_df(seq, smoothing_window=50, smoothing_sd=20, resolution=10) >>> print(df.head(3)) timestamp correlation 0 0 1.000000 1 10 0.851373 2 20 0.590761 """ correlations = acf_values(sequence=sequence, resolution=resolution, smoothing_window=smoothing_window, smoothing_sd=smoothing_sd) correlations = correlations / np.max(correlations) timestamps = np.arange(start=0, stop=len(correlations) * resolution, step=resolution) df = pd.DataFrame( { "timestamp": timestamps, "correlation": correlations } ) return df
[docs]def acf_plot(sequence: thebeat.core.Sequence, resolution, max_lag: Optional[float] = None, smoothing_window: Optional[float] = None, smoothing_sd: Optional[float] = None, style: str = 'seaborn-v0_8', title: str = 'Autocorrelation', x_axis_label: str = 'Lag', y_axis_label: str = 'Correlation', figsize: Optional[tuple] = None, dpi: int = 100, ax: Optional[plt.Axes] = None, suppress_display: bool = False) -> tuple[plt.Figure, plt.Axes]: """ This function can be used for plotting an autocorrelation plot from a :class:`~thebeat.core.Sequence`. Parameters ---------- sequence A :py:class:`~thebeat.core.Sequence` object. resolution The temporal resolution. If the used Sequence is in seconds, you might want to use 0.001. If the Sequence is in milliseconds, try using 1. Incidentally, the number of lags for the autocorrelation function is calculated as ``n_lags = sequence_duration_in_ms / resolution``. max_lag The maximum lag to be plotted. Defaults to the sequence duration. smoothing_window The window (in milliseconds) within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. style Style used by matplotlib. See `matplotlib style sheets reference <https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html>`_. title If desired, one can provide a title for the plot. This takes precedence over using the :class:`~thebeat.core.Sequence` or :class:`~thebeat.core.SoundSequence` ``name`` attribute as the title of the plot (if the object has one). x_axis_label A label for the x axis. y_axis_label A label for the y axis. figsize A tuple containing the desired output size of the plot in inches, e.g. ``(4, 1)``. This refers to the ``figsize`` parameter in :func:`matplotlib.pyplot.figure`. dpi The desired output resolution of the plot in dots per inch (DPI). This refers to the ``dpi`` parameter in :func:`matplotlib.pyplot.figure`. ax If desired, one can provide an existing :class:`matplotlib.axes.Axes` object to plot the autocorrelation plot on. This is for instance useful if you want to plot multiple autocorrelation plots on the same figure. suppress_display If ``True``, :func:`matplotlib.pyplot.show` is not run. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. """ onsets = sequence.onsets correlation = acf_values(sequence=sequence, resolution=resolution, smoothing_window=smoothing_window, smoothing_sd=smoothing_sd) x_step = resolution max_lag = int(max_lag // resolution) if max_lag else np.floor(np.max(onsets) / resolution).astype(int) # plot try: y = correlation[:max_lag] y = y / np.max(y) # normalize except ValueError: raise ValueError("We end up with an empty y axis. Try changing the resolution.") # Make x axis x = np.arange(start=0, stop=len(y) * x_step, step=x_step) with plt.style.context(style): if ax is None: fig, ax = plt.subplots(figsize=figsize, tight_layout=True, dpi=dpi) else: fig = ax.get_figure() ax.set_xlabel(x_axis_label) ax.set_ylabel(y_axis_label) ax.set_title(title) ax.plot(x, y) if not suppress_display and ax is not None: plt.show() return fig, ax
[docs]def acf_values(sequence: thebeat.core.Sequence, resolution, smoothing_window: Optional[float] = None, smoothing_sd: Optional[float] = None) -> np.ndarray: """ Perform autocorrelation. This function takes a :class:`~thebeat.core.Sequence` object, and returns an array with steps of ``resolution`` of unstandardized correlation factors. Parameters ---------- sequence A :class:`~thebeat.core.Sequence` object. resolution The temporal resolution. If the used Sequence is in seconds, you might want to use 0.001. If the Sequence is in milliseconds, try using 1. Incidentally, the number of lags for the autocorrelation function is calculated as ``n_lags = sequence_duration / resolution``. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. This function uses the :func:`numpy.correlate` to calculate the correlations. """ signal = thebeat.helpers.sequence_to_binary(sequence, resolution) # npdf if smoothing_window and smoothing_sd: x = np.arange(start=-smoothing_window / 2, stop=smoothing_window / 2, step=resolution) npdf = scipy.stats.norm.pdf(x, 0, smoothing_sd) npdf = npdf / np.max(npdf) signal_convoluted = np.convolve(signal, npdf) signal = signal_convoluted[round(resolution * smoothing_window / 2):] try: correlation = np.correlate(signal, signal, 'full') correlation = correlation[round(len(correlation) / 2) - 1:] except ValueError as e: raise ValueError("Error! Hint: Most likely your resolution is too large for the chosen smoothing_window" "and smoothing_sd. Try choosing a smaller resolution.") from e return correlation
[docs]def ccf_df(test_sequence: thebeat.core.Sequence, reference_sequence: thebeat.core.Sequence, resolution, smoothing_window: Optional[float] = None, smoothing_sd: Optional[float] = None) -> pd.DataFrame: """ Perform autocorrelation analysis on a :py:class:`Sequence` object, and return a :class:`Pandas.DataFrame` object containing the results. Parameters ---------- test_sequence The test sequence. reference_sequence The reference sequence. resolution The temporal resolution. If the used Sequence is in seconds, you might want to use 0.001. If the Sequence is in milliseconds, try using 1. Incidentally, the number of lags for the autocorrelation function is calculated as ``n_lags = sequence_duration / resolution``. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. Returns ------- :class:`pandas.DataFrame` DataFrame containing two columns: the timestamps, and the cross-correlation factor. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. """ correlations = ccf_values(test_sequence=test_sequence, reference_sequence=reference_sequence, resolution=resolution, smoothing_window=smoothing_window, smoothing_sd=smoothing_sd) # normalize correlations = correlations / np.max(correlations) timestamps = np.arange(start=0, stop=len(correlations) * resolution, step=resolution) df = pd.DataFrame( { "timestamp": timestamps, "correlation": correlations } ) return df
[docs]def ccf_plot(test_sequence: thebeat.core.Sequence, reference_sequence: thebeat.core.Sequence, resolution, smoothing_window: Optional[float] = None, smoothing_sd: Optional[float] = None, style: str = 'seaborn-v0_8', title: str = 'Cross-correlation', x_axis_label: str = 'Lag', y_axis_label: str = 'Correlation', figsize: Optional[tuple] = None, dpi: int = 100, ax: Optional[plt.Axes] = None, suppress_display: bool = False) -> tuple[plt.Figure, plt.Axes]: """ Calculate and plot the cross-correlation function (CCF) between two :class:`~thebeat.core.Sequence` objects. The test sequence is compared to the reference sequence. Parameters ---------- test_sequence The test sequence. reference_sequence The reference sequence. resolution The temporal resolution. If the used Sequence is in milliseconds, you probably want 1. If the Sequence is in seconds, try using 0.001. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. style The matplotlib style to use. See `matplotlib style reference <https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html>`_. title The title of the plot. x_axis_label The label of the x axis. y_axis_label The label of the y axis. figsize A tuple containing the desired output size of the plot in inches, e.g. ``(4, 1)``. This refers to the ``figsize`` parameter in :func:`matplotlib.pyplot.figure`. dpi The resolution of the plot in dots per inch. This refers to the ``dpi`` parameter in :func:`matplotlib.pyplot.figure`. ax A :class:`matplotlib.axes.Axes` object. If ``None``, a new Figure and Axes is created. suppress_display If ``True``, the plot is not displayed. This is useful e.g. if you only want to save the plot to a file Returns ------- fig The :class:`matplotlib.figure.Figure` object. ax The :class:`matplotlib.axes.Axes` object. Notes ----- This function is based on the procedure described in :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. There, one can also find a more detailed description of the smoothing procedure. """ # Get correlation factors correlation = ccf_values(test_sequence=test_sequence, reference_sequence=reference_sequence, resolution=resolution, smoothing_window=smoothing_window, smoothing_sd=smoothing_sd) # Make y axis x_step = resolution max_lag = np.floor(np.max(np.concatenate([test_sequence.onsets, reference_sequence.onsets])) / resolution).astype( int) try: y = correlation[:max_lag] y = y / np.max(y) # normalize except ValueError: raise ValueError("We end up with an empty y axis. Try changing the resolution.") # Make x axis x = np.arange(start=0, stop=len(y) * x_step, step=x_step) # Plot with plt.style.context(style): if ax is None: fig, ax = plt.subplots(figsize=figsize, tight_layout=True, dpi=dpi) else: fig = ax.get_figure() ax.set_xlabel(x_axis_label) ax.set_ylabel(y_axis_label) ax.set_title(title) ax.plot(x, y) if not suppress_display and ax is not None: plt.show() return fig, ax
[docs]def ccf_values(test_sequence: thebeat.core.Sequence, reference_sequence: thebeat.core.Sequence, resolution: float, smoothing_window: Optional[float] = None, smoothing_sd: Optional[float] = None) -> np.ndarray: """ Returns the unstandardized cross-correlation function (CCF) for two :class:`~thebeat.core.Sequence` objects. The test sequence is compared to the reference sequence. Parameters ---------- test_sequence The test sequence. reference_sequence The reference sequence. resolution The temporal resolution. If the used Sequence is in milliseconds, you probably want 1. If the Sequence is in seconds, try using 0.001. smoothing_window The window within which a normal probability density function is used for smoothing out the analysis. smoothing_sd The standard deviation of the normal probability density function used for smoothing out the analysis. Returns ------- correlation The unstandardized cross-correlation function. """ # Make into 0's and 1's test_signal = thebeat.helpers.sequence_to_binary(test_sequence, resolution) ref_signal = thebeat.helpers.sequence_to_binary(reference_sequence, resolution) # npdf if smoothing_window and smoothing_sd: x = np.arange(start=-smoothing_window / 2, stop=smoothing_window / 2, step=resolution) npdf = scipy.stats.norm.pdf(x, 0, smoothing_sd) npdf = npdf / np.max(npdf) test_signal_convoluted = np.convolve(test_signal, npdf) ref_signal_convoluted = np.convolve(ref_signal, npdf) test_signal = test_signal_convoluted[round(resolution * smoothing_window / 2):] ref_signal = ref_signal_convoluted[round(resolution * smoothing_window / 2):] # Make signals of equal length diff = len(ref_signal) - len(test_signal) if diff > 0: # ref_signal is longer test_signal = np.concatenate((test_signal, np.zeros(diff))) elif diff < 0: # test_signal is longer ref_signal = np.concatenate((ref_signal, np.zeros(-diff))) # Calculate cross-correlation try: correlation = np.correlate(test_signal, ref_signal, 'full') correlation = correlation[round(len(correlation) / 2) - 1:] except ValueError as e: raise ValueError("Error! Hint: Most likely your resolution is too large for the chosen smoothing_window" "and smoothing_sd. Try choosing a smaller resolution.") from e return correlation
[docs]def edit_distance_rhythm(test_rhythm: thebeat.music.Rhythm, reference_rhythm: thebeat.music.Rhythm, smallest_note_value: int = 16) -> float: """ Caculates edit/Levenshtein distance between two rhythms. The ``smallest_note_value`` determines the underlying grid that is used. If e.g. 16, the underlying grid is composed of 1/16th notes. Note ---- Based on the procedure described in :cite:t:`postEditDistanceMeasure2011`. Parameters ---------- test_rhythm The rhythm to be tested. reference_rhythm The rhythm to which ``test_rhythm`` will be compared. smallest_note_value The smallest note value that is used in the underlying grid. 16 means 1/16th notes, 4 means 1/4th notes, etc. Examples -------- >>> from thebeat.music import Rhythm >>> test_rhythm = Rhythm.from_fractions([1/4, 1/4, 1/4, 1/4]) >>> reference_rhythm = Rhythm.from_fractions([1/4, 1/8, 1/8, 1/4, 1/4]) >>> print(edit_distance_rhythm(test_rhythm, reference_rhythm)) 1 """ if not isinstance(test_rhythm, thebeat.music.Rhythm) or not isinstance(reference_rhythm, thebeat.music.Rhythm): raise TypeError("test_rhythm and reference_rhythm must be of type Rhythm") test_string = thebeat.helpers.rhythm_to_binary(rhythm=test_rhythm, smallest_note_value=smallest_note_value) reference_string = thebeat.helpers.rhythm_to_binary(rhythm=reference_rhythm, smallest_note_value=smallest_note_value) # calculate edit distance edit_distance = Levenshtein.distance(test_string, reference_string) return edit_distance
[docs]def edit_distance_sequence(test_sequence: thebeat.core.Sequence, reference_sequence: thebeat.core.Sequence, resolution: int) -> float: """ Calculates the edit/Levenshtein distance between two sequences. If Sequences are not quantized to ``resolution``, they will be quantized to that resolution first. Note ---- The resolution also represents the underlying grid. If, for example, the resolution is 50, that means that a grid will be created with steps of 50. The onsets of the sequence are then placed on the grid for both sequences. The resulting sequences consist of ones and zeros, where ones represent the event onsets. This string for ``test_sequence`` is compared to the string of the ``reference_sequence``. Note that ``test_sequence`` and ``reference_sequence`` can be interchanged without an effect on the results. Parameters ---------- test_sequence The sequence to be tested. reference_sequence The sequence to which ``test_sequence`` will be compared. resolution The resolution to which the sequences will be quantized. If the sequences are already quantized to this resolution, they will not be quantized again. """ if not isinstance(test_sequence, thebeat.core.Sequence) or not isinstance(reference_sequence, thebeat.core.Sequence): raise TypeError("test_sequence and reference_sequence must be of type Sequence") # Check whether we need to quantize the sequences if np.any(test_sequence.onsets % resolution != 0): warnings.warn(f"Test sequence has been quantized to onsets that are multiples of {resolution}.") test_sequence.quantize(to=resolution) if np.any(reference_sequence.onsets % resolution != 0): warnings.warn(f"Reference sequence has been quantized to onsets that are multiples of {resolution}.") reference_sequence.quantize(to=resolution) test_string = thebeat.helpers.sequence_to_binary(sequence=test_sequence, resolution=resolution) reference_string = thebeat.helpers.sequence_to_binary(sequence=reference_sequence, resolution=resolution) # calculate edit distance edit_distance = Levenshtein.distance(test_string, reference_string) return edit_distance
[docs]def fft_plot(sequence: thebeat.core.Sequence, unit_size: float, x_min: float = 0, x_max: Optional[float] = None, style: str = 'seaborn-v0_8', title: str = 'Fourier transform', x_axis_label: str = 'Cycles per unit', y_axis_label: str = 'Absolute power', figsize: Optional[tuple] = None, dpi: int = 100, ax: Optional[plt.Axes] = None, suppress_display: bool = False) -> tuple[plt.Figure, plt.Axes]: """ Plots the Fourier transform of a :class:`~thebeat.core.Sequence` object. The ``unit_size`` parameter is required, because Sequence objects are agnostic about the used time unit. You can use 1000 if the Sequence is in milliseconds, and 1 if the Sequence is in seconds. Note that the first frame is discarded since it will always have the highest power, yet is not informative. Parameters ---------- sequence The sequence. unit_size The size of the unit in which the sequence is measured. If the sequence is in milliseconds, you probably want 1000. If the sequence is in seconds, you probably want 1. x_min The minimum number of cycles per unit to be plotted. x_max The maximum number of cycles per unit to be plotted. style The matplotlib style to use. See `matplotlib style reference <https://matplotlib.org/stable/gallery/style_sheets/style_sheets_reference.html>`_. title The title of the plot. x_axis_label The label of the x axis. y_axis_label The label of the y axis. figsize A tuple containing the desired output size of the plot in inches, e.g. ``(4, 1)``. This refers to the ``figsize`` parameter in :func:`matplotlib.pyplot.figure`. dpi The resolution of the plot in dots per inch. ax A matplotlib Axes object to plot on. If not provided, a new figure and axes will be created. suppress_display If True, the plot will not be displayed. Returns ------- fig The matplotlib Figure object. ax The matplotlib Axes object. Examples -------- >>> from thebeat import Sequence >>> from thebeat.stats import fft_plot >>> seq = Sequence.generate_random_normal(n_events=100, mu=500, sigma=25) # milliseconds >>> fft_plot(seq, unit_size=1000) (<Figure size 800x550 with 1 Axes>, <AxesSubplot: title={'center': 'Fourier transform'}, xlabel='Cycles per unit', ylabel='Absolute power'>) >>> seq = Sequence.generate_random_normal(n_events=100, mu=0.5, sigma=0.025) # seconds >>> fft_plot(seq, unit_size=1, x_max=5) (<Figure size 800x550 with 1 Axes>, <AxesSubplot: title={'center': 'Fourier transform'}, xlabel='Cycles per unit', ylabel='Absolute power'>) """ # Calculate step size step_size = unit_size / 1000 # Make a sequence of ones and zeroes timeseries = sequence_to_binary(sequence, resolution=step_size) duration = np.max(sequence.onsets) x_length = np.ceil(duration / step_size).astype(int) # Do the fft yf = rfft(timeseries)[1:] xf = rfftfreq(x_length, d=step_size)[1:] * (step_size / 0.001) # Calculate reasonable max_freq max_freq_index = np.min(np.where(xf > x_max)) if x_max else len(xf) / 10 yf = yf[:int(max_freq_index)] xf = xf[:int(max_freq_index)] # Plot with plt.style.context(style): if ax is None: fig, ax = plt.subplots(figsize=figsize, tight_layout=True, dpi=dpi) ax_provided = False else: fig = ax.get_figure() ax_provided = True ax.plot(xf, np.abs(yf)) ax.set_xlabel(x_axis_label) ax.set_ylabel(y_axis_label) ax.set_xlim(x_min, None) ax.set_title(title) if not suppress_display and ax_provided is False: fig.show() return fig, ax
[docs]def ks_test(sequence: thebeat.core.Sequence, reference_distribution: str = 'normal', alternative: str = 'two-sided'): """ This function returns the `D` statistic and `p` value of a one-sample Kolmogorov-Smirnov test. It calculates how different the distribution of inter-onset intervals (IOIs) is compared to the provided reference distribution. If `p` is significant it means that the IOIs are not distributed according to the provided reference distribution. Parameters ---------- sequence A :class:`~thebeat.core.Sequence` object. reference_distribution Either 'normal' or 'uniform'. The distribution against which the distribution of inter-onset intervals (IOIs) is compared. alternative Either ‘two-sided’, ‘less’ or ‘greater’. See :func:`scipy.stats.kstest` for more information. Returns ------- :class:`scipy.stats._stats_py.KstestResult` A SciPy named tuple containing the `D` statistic and the `p` value. Notes ----- This function uses :func:`scipy.stats.kstest`. For more information about the use of the Kolmogorov-Smirnov test in rhythm research, see :cite:t:`jadoulSeekingTemporalPredictability2016` and :cite:t:`ravignaniMeasuringRhythmicComplexity2017`. Examples -------- >>> rng = np.random.default_rng(seed=123) >>> seq = thebeat.core.Sequence.generate_random_normal(n_events=100,mu=500,sigma=25,rng=rng) >>> print(ks_test(seq)) KstestResult(statistic=0.07176677141846549, pvalue=0.6608009345687911) """ sequence = sequence.iois if reference_distribution == 'normal': mean = np.mean(sequence) sd = np.std(sequence) dist = scipy.stats.norm(loc=mean, scale=sd).cdf return scipy.stats.kstest(sequence, dist, alternative=alternative) elif reference_distribution == 'uniform': a = min(sequence) b = max(sequence) scale = b - a dist = scipy.stats.uniform(loc=a, scale=scale).cdf return scipy.stats.kstest(sequence, dist, alternative=alternative) else: raise ValueError("Unknown distribution. Choose 'normal' or 'uniform'.")
[docs]def get_rhythmic_entropy(sequence: Union[thebeat.core.Sequence, thebeat.music.Rhythm], bin_fraction: float = 0.03125): """ Calculate Shannon entropy from bins. This is a measure of rhythmic complexity. If many different 'note durations' are present, entropy is high. If only a few are present, entropy is low. A sequence that is completely isochronous has a Shannon entropy of 0. The bin size is determined from the average inter-onset interval in the :py:class:`thebeat.core.Sequence` object (i.e. the tempo) and the ``bin_fraction``. The ``bin_fraction`` corresponds to temporal sensitivity. The default is 1/32th of the average IOI. This implies that the smallest note value that can be detected is a 1/32th note. Parameters ---------- sequence The :py:class:`thebeat.core.Sequence` object for which Shannon entropy is calculated. bin_fraction The fraction of the average inter-onset interval (IOI) that determines the bin size. It is multiplied by the average IOI to get the bin size. Example ------- A :py:class:`~thebeat.core.Sequence` has an average IOI of 500 ms. With a bin_fraction of 0.03125 (corresponding to 1/32th note value) the bins will have a size of 15.625 ms. The entropy will be calculated from the number of IOIs in each bin. References ---------- #todo add reference here for this type of entropy calculation. """ bin_size = np.mean(sequence.iois) * bin_fraction bins = np.arange(0, np.max(sequence.iois) + 2 * bin_size, bin_size) - bin_size / 2 # shift bins to center bin_counts = np.histogram(sequence.iois, bins=bins)[0] return scipy.stats.entropy(bin_counts)
[docs]def get_cov(sequence: thebeat.core.Sequence) -> np.float64: """ Calculate the coefficient of variantion of the inter-onset intervals (IOIS) in a :py:class:`thebeat.core.Sequence` object. Parameters ---------- sequence A :py:class:`thebeat.core.Sequence` object. Returns ------- float The covariance of the sequence. """ return np.float64(np.std(sequence.iois) / np.mean(sequence.iois))
[docs]def get_npvi(sequence: thebeat.core.Sequence) -> np.float64: """ This function calculates the normalized pairwise variability index (nPVI) for a provided :py:class:`Sequence` or :py:class:`SoundSequence` object, or for an interable of inter-onset intervals (IOIs). Parameters ---------- sequence Either a :py:class:`Sequence` or :py:class:`SoundSequence` object, or an iterable containing inter-onset intervals (IOIs). Returns ------- :class:`numpy.float64` The nPVI for the provided sequence. Notes ----- The normalied pairwise variability index (nPVI) is a measure of the variability of adjacent temporal intervals. The nPVI is zero for sequences that are perfectly isochronous. See :cite:t:`jadoulSeekingTemporalPredictability2016` and :cite:t:`ravignaniMeasuringRhythmicComplexity2017` for more information on its use in rhythm research. Examples -------- >>> seq = thebeat.core.Sequence.generate_isochronous(n_events=10, ioi=500) >>> print(get_npvi(seq)) 0.0 >>> rng = np.random.default_rng(seed=123) >>> seq = thebeat.core.Sequence.generate_random_normal(n_events=10,mu=500,sigma=50,rng=rng) >>> print(get_npvi(seq)) 37.6263174529546 """ if isinstance(sequence, (thebeat.core.Sequence, thebeat.core.SoundSequence)): iois = sequence.iois else: iois = np.array(sequence) npvi_values = [] for i in range(1, len(iois)): diff = iois[i] - iois[i - 1] mean = np.mean(iois[i] + iois[i - 1]) npvi_values.append(np.abs(diff / mean)) npvi = np.mean(npvi_values) * (100 * (len(iois) - 1)) return np.float64(npvi)
[docs]def get_ugof_isochronous(test_sequence: thebeat.core.Sequence, reference_ioi: float, output_statistic: str = 'mean') -> np.float64: r""" This function calculates the universal goodness of fit (``ugof``) measure. The ``ugof`` statistic quantifies how well a theoretical sequence describes a sequence at hand (the ``test_sequence``). This function can only calculate ``ugof`` using a theoretical sequence that is isochronous. The ``reference_ioi`` is the IOI of an isochronous theoretical sequence. Parameters ---------- test_sequence A :py:class:`~thebeat.core.Sequence` or :py:class:`~thebeat.core.SoundSequence` object that will be compared to ``reference_sequence``. reference_ioi A number (float or int) representing the IOI of an isochronous sequence. output_statistic Either 'mean' (the default) or 'median'. This determines whether for the individual ugof values we take the mean or the median as the output statistic. Returns ------- :class:`numpy.float64` The ugof statistic. Notes ----- This measure is described in :cite:t:`burchardtNovelIdeasFurther2021`. Please also refer to `this Github page <https://github.com/LSBurchardt/R_app_rhythm>`_ for an R implementation of the *ugof* measure. Examples -------- >>> seq = thebeat.core.Sequence.generate_isochronous(n_events=10, ioi=1000) >>> ugof = get_ugof_isochronous(seq, reference_ioi=68.21) >>> print(ugof) 0.41817915 """ # Input validation and getting onsets for test sequence if not isinstance(test_sequence, thebeat.core.sequence.Sequence): raise TypeError('test_sequence must be a Sequence object') test_onsets = test_sequence.onsets # Input validation and getting onsets for reference sequence if not isinstance(reference_ioi, (int, float)): raise TypeError('reference_sequence must be a number (int or float)') reference_onsets = np.arange(start=0, stop=np.max(test_onsets) + reference_ioi + 1, step=reference_ioi) # For each onset, get the closest theoretical beat and get the absolute difference minimal_deviations = np.min(np.abs(test_onsets[:, None] - reference_onsets), axis=1) maximal_deviation = reference_ioi / 2 # calculate ugofs ugof_values = minimal_deviations / maximal_deviation if output_statistic == 'mean': return np.float32(np.mean(ugof_values)) elif output_statistic == 'median': return np.float32(np.median(ugof_values)) else: raise ValueError("The output statistic can only be 'median' or 'mean'.")