From c40f06209ba4bb2c35382873eef6656d6b014f35 Mon Sep 17 00:00:00 2001 From: tuomaseerola Date: Fri, 5 Apr 2024 15:20:28 +0100 Subject: [PATCH] fix site --- docs/Chapter1.qmd | 28 + docs/Chapter10.1.qmd | 183 ++++++ docs/Chapter10.2.qmd | 235 ++++++++ docs/Chapter10.3.qmd | 119 ++++ docs/Chapter10.4.qmd | 54 ++ docs/Chapter10.5.qmd | 170 ++++++ docs/Chapter11.3.qmd | 44 ++ docs/Chapter3.qmd | 73 +++ docs/Chapter4.qmd | 142 +++++ docs/Chapter6.1.qmd | 129 +++++ docs/Chapter6.2.qmd | 97 ++++ docs/Chapter6.3.qmd | 293 ++++++++++ docs/Chapter6.4.qmd | 133 +++++ docs/Chapter7.qmd | 180 ++++++ docs/Chapter9.1.qmd | 107 ++++ docs/Chapter9.2.qmd | 264 +++++++++ docs/Chapter9.3.qmd | 87 +++ docs/Chapter9.4.qmd | 123 ++++ docs/Chapter9.5.qmd | 83 +++ docs/Citation.qmd | 41 ++ docs/Links.qmd | 46 ++ docs/Technical.qmd | 32 ++ docs/Version_Python.qmd | 25 + docs/Version_R.qmd | 43 ++ docs/index.html | 190 +------ docs/robots.txt | 1 + docs/search.json | 1177 --------------------------------------- docs/sitemap.xml | 7 + 28 files changed, 2768 insertions(+), 1338 deletions(-) create mode 100644 docs/Chapter1.qmd create mode 100644 docs/Chapter10.1.qmd create mode 100644 docs/Chapter10.2.qmd create mode 100644 docs/Chapter10.3.qmd create mode 100644 docs/Chapter10.4.qmd create mode 100644 docs/Chapter10.5.qmd create mode 100644 docs/Chapter11.3.qmd create mode 100644 docs/Chapter3.qmd create mode 100644 docs/Chapter4.qmd create mode 100644 docs/Chapter6.1.qmd create mode 100644 docs/Chapter6.2.qmd create mode 100644 docs/Chapter6.3.qmd create mode 100644 docs/Chapter6.4.qmd create mode 100644 docs/Chapter7.qmd create mode 100644 docs/Chapter9.1.qmd create mode 100644 docs/Chapter9.2.qmd create mode 100644 docs/Chapter9.3.qmd create mode 100644 docs/Chapter9.4.qmd create mode 100644 docs/Chapter9.5.qmd create mode 100755 docs/Citation.qmd create mode 100644 docs/Links.qmd create mode 100755 docs/Technical.qmd create mode 100644 docs/Version_Python.qmd create mode 100644 docs/Version_R.qmd create mode 100644 docs/robots.txt create mode 100644 docs/sitemap.xml diff --git a/docs/Chapter1.qmd b/docs/Chapter1.qmd new file mode 100644 index 0000000..155862e --- /dev/null +++ b/docs/Chapter1.qmd @@ -0,0 +1,28 @@ + +Open In Colab + +# Ch. 1 – Notebook basics + +This first notebook is just a demonstration of running R in notebook calculates the correlation between ratings of energy and tension from an existing dataset. + +## Preliminaries + +To install the `MusicScienceData` package that contains several example datasets used in this book, run the following command. + +```{r} +#| eval: false +#if (!require(devtools)) install.packages("devtools",quiet=TRUE) +devtools::install_github("tuomaseerola/MusicScienceData",quiet=TRUE) +``` + +## Code 1.1 + +This is the first R code example, which demonstrates loading package that contains datasets, choosing one dataset, and then calculating correlation between two rated concepts (energy and tension). + +```{r} +# Code 1.1 +library(MusicScienceData) # loads library w data +data <- MusicScienceData::soundtrack # pick data +cor.test(data$Energy, # calc. correlation + data$Tension) +``` diff --git a/docs/Chapter10.1.qmd b/docs/Chapter10.1.qmd new file mode 100644 index 0000000..3cd0cfa --- /dev/null +++ b/docs/Chapter10.1.qmd @@ -0,0 +1,183 @@ + +# Ch. 10 – Basics (sines) + +## Figure 10.1. Illustration of basic representations and transformations of audio using a 400 Hz sine wave and complex tone consisting of 400, 600 and 1600 Hz sine waves. + +```{python} +#| echo: true +#| eval: true +#| label: libraries +#| code-fold: true +#| code-summary: "Show the code" +import numpy as np +from matplotlib import pyplot as plt +``` + +## Create sine waves + +```{python} +#| echo: true +#| eval: true +#| label: onesine +#| warning: false + +### Define the properties of a sine wave + +frequency = 400 # Frequency +duration = 0.01 # Duration of sound +amplitude = 1.0 # Amplitude +phase = 0.75 # Phase +Fs = 22050 # Sampling rate (per second) + +# This code creates the sine wave with the properties you detailed above +num_samples = int(Fs * duration) +t = np.arange(num_samples) / Fs +x = amplitude * np.sin(2 * np.pi * (frequency * t - phase)) +fig, ax = plt.subplots(figsize=(7.5, 2.75)) +ax.plot(t, x, color='red') +ax.set_xlabel('Time (s)') +ax.set_title("Sine (400 Hz)") +ax.set_ylabel('Air pressure deviation') +ax.set_ylim([-1.05, 1.05]) +ax.set_yticks(np.arange(-1, 1.5, 1.0)) +ax.set_xlim([0.0, 0.01]) +ax.set_xticks(np.arange(0, 0.0125, 0.0025)) +ax.grid() +ax.annotate('', xy=(0.0025, 0), xytext=(0.0025, 1), + arrowprops=dict(arrowstyle='<->', mutation_scale=15, + color='0.3'), size=2) +ax.text(0.0025, 0.5, "Amplitude", size=12, + color='0.3', ha="center", va="center") +ax.annotate('', xy=(0, 1), xytext=(0.0025, 1), + arrowprops=dict(arrowstyle='<->', mutation_scale=19, + color='0.3'), size=2) +ax.text(0.00125, 0.85, "Period", size=12, + color='0.3', ha="center", va="center") + +plt.show() + + +``` + +## Complex sounds + +Let's combine sine waves of different frequency (400, 600, 1600 Hz). + +```{python} +#| echo: true +#| eval: true +#| label: threesines +#| warning: false +#| code-fold: true +#| code-summary: "Show the code" + +import numpy as np +from matplotlib import pyplot as plt + +fig = plt.figure() +fig.set_figheight(6) +fig.set_figwidth(10) + +ax1 = plt.subplot2grid(shape=(6, 3), loc=(0, 1), colspan=2, rowspan=3) +ax3 = plt.subplot2grid(shape=(6, 3), loc=(3, 1), colspan=2, rowspan=3) +ax2 = plt.subplot2grid(shape=(6, 3), loc=(3, 0), colspan=1) +ax4 = plt.subplot2grid(shape=(6, 3), loc=(4, 0), colspan=1) +ax5 = plt.subplot2grid(shape=(6, 3), loc=(5, 0), colspan=1) + +frequency = 400 # Frequency +duration = 0.01 # Duration of sound +amplitude = 1.0 # Amplitude +phase = 0.75 # Phase +Fs = 22050 # Sampling rate (per second) + +num_samples = int(Fs * duration) +t = np.arange(num_samples) / Fs +x = amplitude * np.sin(2 * np.pi * (frequency * t - phase)) + +ax1.plot(t, x, color='red', linewidth=2.0, linestyle='-') +ax1.set_xlabel('Time (s)') +ax1.set_title("Sine (400 Hz)") +ax1.set_ylabel('Air pressure deviation') +ax1.set_ylim([-1.05, 1.05]) +ax1.set_yticks(np.arange(-1, 1.5, 1.0)) +ax1.set_xlim([0.0, 0.01]) +ax1.set_xticks(np.arange(0, 0.0125, 0.0025)) +ax1.grid() + +ax1.annotate('', xy=(0.0025, 0), xytext=(0.0025, 1), + arrowprops=dict(arrowstyle='<->', + mutation_scale=15, color='0.3'), size=2) +ax1.text(0.0025, 0.5, "Amplitude", size=12, color='0.3', + ha="center", va="center") +ax1.annotate('', xy=(0, 1), xytext=(0.0025, 1), + arrowprops=dict(arrowstyle='<->', mutation_scale=19, + color='0.3'), size=2) +ax1.text(0.00125, 0.85, "Period", size=12, + color='0.3', ha="center", va="center") + +# Combine several sine waves (here are three frequencies) +frequency1 = 400 +frequency2 = 600 +frequency3 = 1600 +duration = 0.01 +amplitude = 1.0 +phase = 0.75 +Fs = 20050 + +num_samples = int(Fs * duration) +t = np.arange(num_samples) / Fs +x1 = amplitude * np.sin(2 * np.pi * (frequency1 * t - phase)) # 1st sine +x2 = amplitude * np.sin(2 * np.pi * (frequency2 * t - phase)) # 2nd sine +x3 = amplitude * np.sin(2 * np.pi * (frequency3 * t - phase)) # 3rd sine + +ax2.plot(t, x1, color='red') +ax4.plot(t, x2, color='red') +ax5.plot(t, x3, color='red') + +ax2.set_title("400 Hz") +ax4.set_title("600 Hz") +ax5.set_title("1600 Hz") + +ax2.set_xticks(np.arange(0, 0.0125, 0.0025)) +ax2.set_xlim([0.0, 0.01]) +ax2.set_yticks(np.arange(-1, 1.5, 1.0)) + +ax4.set_xticks(np.arange(0, 0.0125, 0.0025)) +ax4.set_xlim([0.0, 0.01]) +ax4.set_yticks(np.arange(-1, 1.5, 1.0)) + +ax5.set_xticks(np.arange(0, 0.0125, 0.0025)) +ax5.set_xlim([0.0, 0.01]) +ax5.set_yticks(np.arange(-1, 1.5, 1.0)) + +fig.subplots_adjust(hspace=.001, wspace=0.5) + +# Combine all three (sum and divide by 3 to keep the amplitude as original) +x123 = (x1+x2+x3)/3 + +ax3.plot(t, x123, color='blue', linewidth=2.0, linestyle='-') +ax3.set_xlabel('Time (s)') +ax3.set_title("Complex tone (sines of 400 Hz + 600 Hz + 1600 Hz)") +ax3.set_ylabel('') +ax3.set_ylim([-1.01, 1.01]) +ax3.set_xlim([0, 0.01]) +ax3.set_xticks(np.arange(0, 0.0125, 0.0025)) +ax3.set_yticks(np.arange(-1, 1.5, 1.0)) +ax3.grid() +fig.tight_layout() + + +ax2.annotate('', xy=(1.11/100, -9.3), xytext=(1.01/100, 0), + arrowprops=dict(width=0.5, headlength=3, headwidth=3, + color='0.3'), size=2, annotation_clip=False) +ax4.annotate('', xy=(1.063/100, 0), xytext=(1.01/100, 0), + arrowprops=dict(width=0.5, headlength=3, headwidth=3, + color='0.3'), size=2, annotation_clip=False) +ax5.annotate('', xy=(1.11/100, 9.3), xytext=(1.01/100, 0), + arrowprops=dict(width=0.5, headlength=3, headwidth=3, + color='0.3'), size=2, annotation_clip=False) +ax4.text(1.09/100, -0.6, r'$\sum$', size=9, backgroundcolor='0.8') + +plt.show() + +``` diff --git a/docs/Chapter10.2.qmd b/docs/Chapter10.2.qmd new file mode 100644 index 0000000..9259eab --- /dev/null +++ b/docs/Chapter10.2.qmd @@ -0,0 +1,235 @@ + +# Ch. 10 – Spectrum and envelope + +## Figure 10.2. Spectrum and the envelope of three instruments (violin, clarinet, and marimba). + +The instrument samples are taken from McGill University Master Samples (MUMS, Opolko & Wapnick, 2006), polished by Eerola and Ferrer (2008) and used in subsequent experiments (Eerola et al., 2012). + +### Libraries + +```{python} +#| echo: true +#| eval: true +#| label: libraries +#| code-fold: true +#| code-summary: "Show the code" +import numpy as np +import librosa +import librosa.display +from matplotlib import pyplot as plt +``` + +### 1. Violin properties (spectrum and envelope) + +```{python} +#| echo: true +#| eval: true +#| label: violin +#| warning: false +#| code-fold: true +#| code-summary: "Show the code" + +x, sr = librosa.load('data/63.wav') +stft = np.abs(librosa.stft(x)) +freqs = librosa.fft_frequencies(sr=sr) + +f0, voiced_flag, voiced_probs = librosa.pyin(x, fmin=librosa.note_to_hz('C2'), + fmax=librosa.note_to_hz('C7')) +f = np.nanmedian(f0) # Get the Hz of the F0 for nice labels +n = librosa.hz_to_note(f) # Convert Hz to note name +print(n) +X = np.arange(f, f*10, f) + +fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8.0, 5.0)) + +# 1. Spectrum of a tone +# collapse across time and plot a spectrum +Dmean = stft.mean(axis=1)/max(stft.mean(axis=1)) +ax[0].plot(freqs, Dmean, color='blue') +ax[0].set_title("Violin Spectrum") +ax[0].set(xlim=[130, X.max()]) +ax[0].set_ylabel("Norm. Ampl.") +ax[0].set_xlabel("Frequency (Hz)") +ax[0].grid() +ax[0].set_xticks(X) + +# calculate spectral centroid and plot it +centroid = librosa.feature.spectral_centroid(y=x, sr=sr) +centroidM = centroid.mean() +print(centroidM.round(0)) +centroidM_label = "Centroid " + str(int(centroidM.round(0)))+" Hz" +ax[0].annotate("", xy=(130, 0.75), xycoords='data', xytext=(centroidM, 0.75), + arrowprops=dict(arrowstyle="<|-", connectionstyle="arc3", + color="0.3"), size=4) +ax[0].annotate("", xy=(centroidM, 0.75), xycoords='data', + xytext=(X.max(), 0.75), + arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3", + color="0.3"), size=4) +ax[0].text(centroidM-120, 0.83, centroidM_label, size=10, color='0.2') + +# Envelope +rms = librosa.feature.rms(y=x, frame_length=2048, hop_length=512) +times = librosa.times_like(rms) +ax[1].plot(times, rms[0], color='red') + +librosa.display.waveshow(x, sr=sr, ax=ax[1], color='0.75', max_points=3000) +ax[1].grid() +ax[1].set(ylim=[-0.25, 0.25]) +ax[1].text(0.25, 0.17, "A", size=12, color='0.2') +ax[1].text(1.20, 0.17, "S", size=12, color='0.2') +ax[1].text(1.85, 0.17, "D", size=12, color='0.2') +ax[1].annotate("", xy=(0.00, 0.15), xycoords='data', xytext=(0.50, 0.15), + arrowprops=dict(arrowstyle="|-|", connectionstyle="arc3", + color='0.2'), size=4) +ax[1].annotate("", xy=(0.50, 0.15), xycoords='data', xytext=(1.79, 0.15), + arrowprops=dict(arrowstyle="|-|", connectionstyle="arc3", + color='0.2'), size=4) +ax[1].annotate("", xy=(1.79, 0.15), xycoords='data', xytext=(2.0, 0.15), + arrowprops=dict(arrowstyle="|-|", connectionstyle="arc3", + color='0.2'), size=4) +ax[1].set_ylabel("Amplitude") +ax[1].set_title("Violin Envelope") +ax[1].set_xlabel("Time (s)") + +fig.tight_layout() +plt.show() + +``` + + +### 2. Clarinet properties (spectrum and envelope) + +```{python} +#| echo: true +#| eval: true +#| label: clarinet +#| code-fold: true +#| code-summary: "Show the code" + +x, sr = librosa.load('data/24.wav') +stft = np.abs(librosa.stft(x)) +freqs = librosa.fft_frequencies(sr=sr) + +f0, voiced_flag, voiced_probs = librosa.pyin(x, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7')) +f=np.nanmedian(f0) # Get the Hz of the fundamental frequency for nice labels +n=librosa.hz_to_note(f) # Convert Hz to note name + +X=np.arange(f,f*10,f) + +fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8.0, 5.0)) + +# collapse across time and plot a spectrum representation (energy across frequencies) +Dmean=stft.mean(axis=1)/max(stft.mean(axis=1)) +ax[0].plot(freqs,Dmean,color='blue') +#ax[0].label_outer() +#ax[0].set_title("Spectrum") +ax[0].set(xlim=[130, X.max()]) +ax[0].set_ylabel("Norm. Ampl.") +ax[0].set_xlabel("Frequency (Hz)") +ax[0].grid() +ax[0].set_xticks(X) +ax[0].set_title("Clarinet Spectrum") + +# calculate spectral centroid and plot it +centroid = librosa.feature.spectral_centroid(y=x, sr=sr) +centroidM = centroid.mean() +print(centroidM.round(0)) +centroidM_label = "Centroid " + str(int(centroidM.round(0)))+" Hz" +ax[0].annotate("",xy=(130, 0.75), xycoords='data',xytext=(centroidM, 0.75), arrowprops=dict(arrowstyle="<|-",connectionstyle="arc3",color="0.3"),size=4) +ax[0].annotate("",xy=(centroidM, 0.75), xycoords='data',xytext=(X.max(), 0.75), arrowprops=dict(arrowstyle="-|>",connectionstyle="arc3",color="0.3"),size=4) +ax[0].text(centroidM-120,0.83,centroidM_label,size=10,color='0.2') + +rms=librosa.feature.rms(y=x,frame_length=2048,hop_length=512) # Extra dynamics (RMS) +times = librosa.times_like(rms) +ax[1].plot(times, rms[0],color='red') + +librosa.display.waveshow(x, sr=sr, ax=ax[1],color='0.75',max_points=3000) +ax[1].grid() +ax[1].set(ylim=[-0.25, 0.25]) + +ax[1].text(0.00,0.17,"A",size=12,color='0.2') +#ax[0,1].text(0.50,0.99,"D",size=15) +ax[1].text(0.90,0.17,"S",size=12,color='0.2') +ax[1].text(1.85,0.17,"D",size=12,color='0.2') +ax[1].annotate("",xy=(0.00, 0.15), xycoords='data',xytext=(0.07, 0.15),arrowprops=dict(arrowstyle="|-|",connectionstyle="arc3",color='0.2'),size=4) +#ax[0,1].annotate("",xy=(0.05, 0.14), xycoords='data',xytext=(0.20, 0.14),arrowprops=dict(arrowstyle="<->",connectionstyle="arc3"),size=15) +ax[1].annotate("",xy=(0.07, 0.15), xycoords='data',xytext=(1.77, 0.15),arrowprops=dict(arrowstyle="|-|",connectionstyle="arc3",color='0.2'),size=4) +ax[1].annotate("",xy=(1.77, 0.15), xycoords='data',xytext=(2.0, 0.15),arrowprops=dict(arrowstyle="|-|",connectionstyle="arc3",color='0.2'),size=4) +ax[1].set_ylabel("Amplitude") +ax[1].set_xlabel("Time (s)") +ax[1].set_title("Clarinet Envelope") + +fig.tight_layout() +plt.show() +``` + + +### 3. Marimba properties (spectrum and envelope) + +```{python} +#| echo: true +#| eval: true +#| label: marimba +#| code-fold: true +#| code-summary: "Show the code" + +x, sr = librosa.load('data/90.wav') + +stft = np.abs(librosa.stft(x)) +freqs = librosa.fft_frequencies(sr=sr) + +f0, voiced_flag, voiced_probs = librosa.pyin(x, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7')) +f=np.nanmedian(f0) # Get the Hz of the fundamental frequency for nice labels +n=librosa.hz_to_note(f) # Convert Hz to note name +print(n) +X=np.arange(f,f*10,f) + +fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8.0, 5.0)) + + +# collapse across time and plot a spectrum representation (energy across frequencies) +Dmean=stft.mean(axis=1)/max(stft.mean(axis=1)) +ax[0].plot(freqs,Dmean,color='blue') +ax[0].set_title("Marimba Spectrum") +ax[0].set(xlim=[130, X.max()]) +ax[0].set_ylabel("Norm. Ampl.") +ax[0].set_xlabel("Frequency (Hz)") +ax[0].grid() +ax[0].set_xticks(X) + +# calculate spectral centroid and plot it +centroid = librosa.feature.spectral_centroid(y=x, sr=sr) +centroidM = centroid.mean() +print(centroidM.round(0)) +centroidM_label = "Centroid " + str(int(centroidM.round(0)))+" Hz" +ax[0].annotate("",xy=(130, 0.75), xycoords='data',xytext=(centroidM, 0.75), arrowprops=dict(arrowstyle="<|-",connectionstyle="arc3",color="0.3"),size=4) +ax[0].annotate("",xy=(centroidM, 0.75), xycoords='data',xytext=(X.max(), 0.75), arrowprops=dict(arrowstyle="-|>",connectionstyle="arc3",color="0.3"),size=4) +ax[0].text(centroidM-120,0.83,centroidM_label,size=10,color='0.2') + +rms=librosa.feature.rms(y=x,frame_length=2048,hop_length=512) +times = librosa.times_like(rms) +ax[1].plot(times, rms[0],color='red') +librosa.display.waveshow(x, sr=sr, ax=ax[1],color='0.75',max_points=3000) +ax[1].grid() +ax[1].set(ylim=[-0.25, 0.25]) +ax[1].set(xlim=[0, 0.70]) + +ax[1].text(0.00,0.17,"A",size=12,color='0.2') +ax[1].text(0.09,0.17,"S",size=12,color='0.2') +ax[1].text(0.40,0.17,"D",size=12,color='0.2') +ax[1].annotate("",xy=(0.00, 0.15), xycoords='data',xytext=(0.01, 0.15),arrowprops=dict(arrowstyle="|-|",connectionstyle="arc3",color='0.2'),size=4) +ax[1].annotate("",xy=(0.01, 0.15), xycoords='data',xytext=(0.18, 0.15),arrowprops=dict(arrowstyle="|-|",connectionstyle="arc3",color='0.2'),size=4) +ax[1].annotate("",xy=(0.18, 0.15), xycoords='data',xytext=(0.70, 0.15),arrowprops=dict(arrowstyle="|-|",connectionstyle="arc3",color='0.2'),size=4) +ax[1].set_ylabel("Amplitude") +ax[1].set_title("Marimba Envelope") +ax[1].set_xlabel("Time (s)") + +fig.tight_layout() +plt.show() +``` + +### References + +- Eerola, T., Ferrer, R., & Alluri, V. (2012). Timbre and affect dimensions: Evidence from affect and similarity ratings and acoustic correlates of isolated instrument sounds. _Music Perception, 30(1)_, 49-70. https://doi.org/10.1525/mp.2012.30.1.49 +- Eerola, T. & Ferrer, R. (2008). Instrument Library (MUMS) Revised. _Music Perception, 25(3)_, 253-255. http://caliber.ucpress.net/doi/abs/10.1525/mp.2008.25.3.253 +- Opolko, F., & Wapnick, J. (2006). _The McGill University master samples collection on DVD (3 DVDs)_. Quebec, Canada: McGill University. diff --git a/docs/Chapter10.3.qmd b/docs/Chapter10.3.qmd new file mode 100644 index 0000000..4687de0 --- /dev/null +++ b/docs/Chapter10.3.qmd @@ -0,0 +1,119 @@ + +# Ch. 10 – Physical + +## Figure 10.3. Waveform, loudness, and onset strengths (black curve) and estimated beats (dashed lines) of the intro to Help! by The Beatles. + +### Libraries + +```{python} +#| echo: true +#| eval: true +#| label: libraries +#| code-fold: true +#| code-summary: "Show the code" +import numpy as np +import librosa +import librosa.display +import IPython.display as ipd +from matplotlib import pyplot as plt +``` + +### 1. Read audio file + +```{python} +#| echo: true +#| eval: true +#| label: read-audio +#| warning: false +x, sr = librosa.load('data/help.mp3', offset=1.05, duration=10.087) +ipd.display(ipd.Audio(data=x, rate=sr)) +``` + +### 2. Plot waveform + +```{python} +#| echo: true +#| eval: true +#| label: plot-waveform + +fig, ax = plt.subplots(nrows=1,figsize=(7.5, 2.75)) +librosa.display.waveshow(x, sr=sr, ax=ax, color='indigo') + +ax.set_title("Waveform") +ax.set_xlabel("Time (s)") +ax.set_ylabel("Amplitude") +ax.set_xticks(range(0, 11, 1)) +ax.set_xlim([0, 10]) +ax.grid() +fig.tight_layout() +plt.show() +``` + +### 3. Plot loudness + +```{python} +#| echo: true +#| eval: true +#| label: features +fig, ax = plt.subplots(nrows=1, figsize=(7.5, 2.75)) +rms = librosa.feature.rms(y=x) # Extra dynamics (RMS) +db = librosa.amplitude_to_db(rms, ref=np.max) # Convert into dB. Note that this is a relative measure (loudest is now 0) +times = librosa.times_like(rms) + +ax.plot(times, db[0], color='darkblue') +ax.set_title("Loudness") +ax.set_ylim([-15,0]) +ax.set_ylabel("Decibel") +ax.set_xlabel("Time (s)") +ax.set_xticks(range(0, 11, 1)) +ax.set_xlim([0, 10]) +ax.grid() +fig.tight_layout() +plt.show() +``` + +### 4. Plot onset strength, estimated and annotated beats + +```{python} +#| echo: true +#| eval: true +#| label: onsets + +fig, ax = plt.subplots(nrows=1, figsize=(7.5, 2.75)) + + +onset_subbands = librosa.onset.onset_strength_multi(y=x, + sr=sr, + channels=[0, 32, 64, 96, 128]) + +onset_subbands_s = sum(onset_subbands, 1) +ax.plot(times, onset_subbands_s, 'maroon') + +tempo, beats = librosa.beat.beat_track(y=x, sr=sr, trim=False) +plt.vlines(times[beats], 0, onset_subbands_s.max(), color='0.40', alpha=0.80, + linestyle='--', label='Beats') + +o_env = librosa.onset.onset_strength(y=x, sr=sr) +times = librosa.times_like(o_env, sr=sr) +onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr) + +ax.set_title("Onset strength and estimated beats") +ax.set_ylabel("Strength") +ax.set_xlabel("Time (s)") +ax.set_ylim([0, 60]) +ax.set_xticks(range(0, 11, 1)) +ax.set_xlim([0, 10]) +ax.grid() +fig.tight_layout() + +data = np.loadtxt('data/Help_beats.csv') +ann_time = data[0:16, 0]-1.05 +ann_label = data[0:16, 1] +for x in range(16): + ax.text(ann_time[x], 53, int(ann_label[x]), size=6, + backgroundcolor='0.8', weight='bold', ha='center') + +plt.show() + +``` + diff --git a/docs/Chapter10.4.qmd b/docs/Chapter10.4.qmd new file mode 100644 index 0000000..0a1b4c8 --- /dev/null +++ b/docs/Chapter10.4.qmd @@ -0,0 +1,54 @@ + +# Ch. 10 – Perceptual + +## Figure 10.4 Auditory nerve fibre model and inner hair cells spiking for the earlier example (a complex tone consisting of 400 + 600 + 1600 Hz sine waves). + +For more information, see [Brian documentation](https://brian.readthedocs.io/en/stable/). + +### Libraries + +```{python} +#| echo: true +#| eval: true +#| label: libraries +#| code-fold: true +#| code-summary: "Show the code" +from brian2 import * +from brian2hears import * +import matplotlib.pyplot as plt +``` + + +```{python} +#| echo: true +#| eval: true +#| label: brian + +sound = loadsound('data/400_600_1600_hz.wav') + +# Inner hair cell model +cfmin, cfmax, cfN = 20*Hz, 20*kHz, 3000 # was 3000 +cf = erbspace(cfmin, cfmax, cfN) +gfb = Gammatone(sound, cf) +ihc = FunctionFilterbank(gfb, lambda x: 3*clip(x, 0, Inf)**(1.0/3.0)) +# Leaky integrate-and-fire model with noise and refractoriness +eqs = ''' +dv/dt = (I-v)/(1*ms)+0.2*xi*(2/(1*ms))**.5 : 1 (unless refractory) +I : 1 +''' +G = FilterbankGroup(ihc, 'I', eqs, reset='v=0', threshold='v>1', refractory=5*ms) +# Run, and raster plot of the spikes +M = SpikeMonitor(G) +run(sound.duration) + +# Plot the results +fig, ax = plt.subplots(figsize=(8.0, 4.0)) +ax.plot(M.t/ms, M.i, '.', alpha=0.5, color='tab:blue', ms=3) +ax.set_xlabel('Time (ms)') +ax.set_ylabel('Neuron number (inner hair cell)') +ylim(0, 2000) +xlabel('Time (ms)') +ylabel('Neuron index'); +plt.show() + +``` diff --git a/docs/Chapter10.5.qmd b/docs/Chapter10.5.qmd new file mode 100644 index 0000000..54efee4 --- /dev/null +++ b/docs/Chapter10.5.qmd @@ -0,0 +1,170 @@ + +# Ch. 10 – Semantic + +## Figure 10.6. Visualisation of the features of two songs (Help! and Yesterday) by the Beatles within the context of 500 other tracks from 1964-1966 using four high-level features retrieved from Spotify. + +This code requires that the user supplements their own `client_id` and `client_secret` which can be obtained after registering to developer account for Spotify API. The code will not run without these. However, the visualisation part of the code works with the save data (`data/top_n_track_features2.csv`). + +```{python} +#| echo: true +#| eval: false +#| label: libraries +#| code-fold: true +#| code-summary: "Show the code" +import pandas as pd +import numpy as np +import spotipy +from spotipy.oauth2 import SpotifyClientCredentials +``` + +## 1. Retrieve Spotify features + +```{python} +#| echo: true +#| eval: false +#| label: retrieve +#| warning: false + + +client_id = 'YOUR_CLIENT_ID_HERE' +client_secret = 'YOUR_SECRET_KEY_HERE' + +sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, + client_secret=client_secret)) + +results = sp.search(q='The Beatles', limit=20) +for idx, track in enumerate(results['tracks']['items']): + print(idx, track['name']) + +track = results['tracks']['items'][18] # help is 18 +print(track['name']) +print(track['href']) +print(track['popularity']) +print("===========PREVIEW===========") +print(track['preview_url']) +print("===========PREVIEW===========") + +a = sp.audio_features(track['id']) + +print(a[0]['valence']) # Help!: 0.763, Yesterday: 0.315 +print(a[0]['energy']) # Help!: 0.725, Yesterday: 0.179 +print(a[0]['tempo']) # Help!: 95.003, Yesterday: 96.53 +print(a[0]['loudness']) # Help!: -7.576, Yesterday: -11.83 +print(a[0]['acousticness']) # Help!: 0.188, Yesterday: 0.879 + +print("===========GENRE===========") +name = [] +Tid = [] +valence = [] +energy = [] +tempo = [] +loudness = [] +instrumentalness = [] +acousticness = [] +danceability = [] + +# get 500 tracks, 50 each time +offset_vals = np.arange(1, 500, 10) + +for i in range(50): + results = sp.search(q='genre:pop & year:1964-1966', limit=10, + offset=offset_vals[i]) + for idx, track in enumerate(results['tracks']['items']): + name.append(track['name']) + Tid.append(track['id']) + a = sp.audio_features(track['id']) + valence.append(a[0]['valence']) + energy.append(a[0]['energy']) + instrumentalness.append(a[0]['instrumentalness']) + acousticness.append(a[0]['acousticness']) + danceability.append(a[0]['danceability']) + tempo.append(a[0]['tempo']) + loudness.append(a[0]['loudness']) + print(i, ':', idx) + +# Store in data frame and save to a file +df = pd.DataFrame({'valence': valence, 'energy': energy, 'tempo': tempo, + 'acousticness': acousticness, + 'loudness': loudness, 'id': Tid}) +df.to_csv('data/top_n_track_features2.csv') + +``` + + +## 2. Visualise + +```{python} +#| echo: true +#| eval: true +#| label: visualise +#| warning: false + +import pandas as pd +import numpy as np +import seaborn as sns +from matplotlib import pyplot as plt + + +# Get data (from a previous process) +d = pd.read_csv('data/top_n_track_features2.csv') + +# set graphic (seaborn) theme +sns.set_theme() +sns.set_style("whitegrid") + +fig = plt.figure() +fig.set_figheight(8) +fig.set_figwidth(9) + +# Define multiple plots +ax1 = plt.subplot2grid(shape=(2, 2), loc=(0, 0)) +ax2 = plt.subplot2grid(shape=(2, 2), loc=(0, 1)) +ax3 = plt.subplot2grid(shape=(2, 2), loc=(1, 0)) +ax4 = plt.subplot2grid(shape=(2, 2), loc=(1, 1)) + +sns.histplot(x='valence', data=d, color='blue', ax=ax1) +ax1.set_xlabel('Valence (0-1)') +ax1.axes.axvline(0.763, color='red', linewidth=2, alpha=.7) +ax1.text(0.763, ax1.get_ylim()[1], "Help!", size=12, backgroundcolor='0.9', + ha="center", va="top", alpha=0.85) +ax1.axes.axvline(0.315, color='green', linewidth=2, alpha=.7) +ax1.text(0.315, ax1.get_ylim()[1], "Yesterday", size=12, backgroundcolor='0.9', + ha="center", va="top", alpha=0.85) +ax1.set_xlim([0, 1]) +ax1.set_xticks(np.arange(0, 1.1, 0.20)) + +sns.histplot(x='energy', data=d, color='blue', ax=ax2) +ax2.set_xlabel('Energy (0-1)') +ax2.axes.axvline(0.725, color='red', linewidth=2, alpha=.7) +ax2.text(0.725, ax2.get_ylim()[1], "Help!", size=12, backgroundcolor='0.9', + ha="center", va="top", alpha=0.85) +ax2.axes.axvline(0.179, color='green', linewidth=2, alpha=.7) +ax2.text(0.179, ax2.get_ylim()[1], "Yesterday", size=12, backgroundcolor='0.9', + ha="center", va="top", alpha=0.85) +ax2.set_xlim([0, 1]) +ax2.set_xticks(np.arange(0, 1.1, 0.20)) + +sns.histplot(x='tempo', data=d, color='blue', ax=ax3) +ax3.set_xlabel('Tempo (BPM)') +ax3.axes.axvline(95, color='red', linewidth=2, alpha=.7) +ax3.text(90, ax3.get_ylim()[1], "Help!", size=12, backgroundcolor='0.9', + ha="right", va="top", alpha=0.85) +ax3.axes.axvline(96.5, color='green', linewidth=2, alpha=.7) +ax3.text(100, ax3.get_ylim()[1], "Yesterday", size=12, backgroundcolor='0.9', + ha="left", va="top", alpha=0.85) + +sns.histplot(x='acousticness', data=d, color='blue', ax=ax4) +ax4.set_xlabel('Acousticness (0-1)') +ax4.axes.axvline(0.188, color='red', linewidth=2, alpha=.7) +ax4.text(0.188, ax4.get_ylim()[1], "Help!", size=12, backgroundcolor='0.9', + ha="center", va="top", alpha=0.85) +ax4.axes.axvline(0.879, color='green', linewidth=2, alpha=.7) +ax4.text(0.879, ax4.get_ylim()[1], "Yesterday", size=12, backgroundcolor='0.9', + ha="center", va="top", alpha=0.85) +ax4.set_xlim([0, 1]) +ax4.set_xticks(np.arange(0, 1.1, 0.20)) + +fig.tight_layout() +plt.show() + +``` \ No newline at end of file diff --git a/docs/Chapter11.3.qmd b/docs/Chapter11.3.qmd new file mode 100644 index 0000000..1eef46b --- /dev/null +++ b/docs/Chapter11.3.qmd @@ -0,0 +1,44 @@ + +## Ch. 11 – Synchrony + +Corpus analysis example of onsets. + +### Load libraries + +```{r} +#| eval: true +#| echo: false +#| label: libraries +#| warning: false +library(onsetsync) # to handle onsets +library(dplyr) # to handle summaries +``` + +```{r} +#| eval: false +#| echo: true +#| label: libraries2 +library(onsetsync) # to handle onsets +library(dplyr) # to handle summaries +``` + +### Get Cuban Salsa and Son materials + +These are build into the onsetsync package and come from [IEMP](https://osf.io/37fws/) collection. The code runs an analysis of asynchrony across different Cuban Salsa and Son tracks (five in total) and create a table of the Bass asynchronies with Guitar and Tres (in milliseconds). + +```{r} +#| eval: true +#| echo: true +#| label: onsetsynccorpus +#| output: asis +#| warning: false +#| message: false +corpus <- onsetsync::CSS_IEMP # Cuban Salsa & Son +D <- sync_sample_paired(corpus,'Bass','Guitar',0,1,'SD') +RES <-summarise(group_by(D$asynch,name), M = mean(asynch*1000)) +D2 <- sync_sample_paired(corpus,'Bass','Tres',0,1,'SD') +RES2 <- summarise(group_by(D2$asynch,name), M = mean(asynch*1000)) +names(RES)[2] <- 'Bass - Guitar (in ms)' # rename for clarity +RES$`Bass - Tres (in ms)` <- RES2$M # rename for clarity +print(knitr::kable(RES,digits=1)) # create table +``` diff --git a/docs/Chapter3.qmd b/docs/Chapter3.qmd new file mode 100644 index 0000000..d7b72ed --- /dev/null +++ b/docs/Chapter3.qmd @@ -0,0 +1,73 @@ + +# Ch. 3 – Historic profiles + +## Load libraries + +Load or install necessary R packages. + +```{r} +#| echo: true +#| eval: false +#| warning: false +#| message: false +#| label: libraries + +if (!require(devtools)) install.packages("devtools", + repos = "http://cran.us.r-project.org") +devtools::install_github("tuomaseerola/inconMore") +``` + +```{r} +#| echo: true +#| eval: true +#| warning: false +#| message: false +#| label: libraries2 +library(inconMore) +library(ggplot2, quietly = TRUE) +library(tidyverse, quietly = TRUE) +options(repr.plot.width = 6, repr.plot.height = 4) # Default plot size for Colab +``` + +## Code 3.1 + +Frequency of intervals in Bach sinfonias (bars) and ratings of consonance of the intervals (lines, from Bowling, Purves & Gill, 2018). Interval frequencies recreated from Huron 2001. + +```{r} +#| echo: true +#| eval: true +#| label: counts + +IV<-c("P1","m2","M2","m3","M3","P4","TT","P5","m6","M6","m7","M7","P8") +Frequency <- c(15,7,26,87,58,50,35,52,65,88,32,3,23)/100 # approx. from Huron 2001, p. 20 +library(inconMore) # Let's use more recent data +a <- inconMore::bowl18 # Bowling et al., 2018 ratings for 12 intervals +Consonance <- scales::rescale(c(NA,a$rating[1:12]),to = c(0,1)) # No unison +df <- data.frame(IV,Consonance,Frequency) +df$Nro <- 1:13 +``` + +Plot both. + +```{r} +#| echo: true +#| eval: true +#| label: plot +#| warning: false + +g1 <- ggplot(df) + + geom_bar(aes(x=Nro, y=Frequency),stat="identity", fill="gray40",colour='black')+ + geom_line(aes(x=Nro, y=Consonance),stat="identity", group=1,linewidth=1.25,colour="gray80",alpha=0.80)+ + geom_point(aes(x=Nro, y=Consonance),stat="identity", group=1,size=3,alpha=0.80)+ + theme_bw()+ + xlab('Interval')+ + ylab('Normalized Freq./Consonance')+ + scale_x_continuous(breaks = seq(1,13,by=1),labels = IV,expand = c(0.01,0.01))+ + scale_y_continuous(breaks = seq(0,1,by=0.25),expand = c(0.01,0.01),limits = c(0,1)) +g1 +``` + +## References + +- Bowling, D. L., Purves, D., & Gill, K. Z. (2018). Vocal similarity predicts the relative attraction of musical chords. _Proceedings of the National Academy of Sciences, 115(1)_, 216–221. +- Huron, D. (2001). Tone and voice: A derivation of the rules of voice-leading from perceptual principles. _Music Perception, 19(1)_, 1–64. diff --git a/docs/Chapter4.qmd b/docs/Chapter4.qmd new file mode 100644 index 0000000..87475ce --- /dev/null +++ b/docs/Chapter4.qmd @@ -0,0 +1,142 @@ + +# Ch. 4 – Correlations + +## Figure 4.1 Illustration of correlations + +This data illustrates different correlation coefficients by taking the inspiration from a study by Maruskin et al. (2012), who collected self-reports related to chills. As we don't have the access to the data, the correlations are created by simulating a multivariate normal distribution (see `generate_data.R`) just to illustrate the way the pattern of correlation changes. + +```{r} +#### Libraries ------------------------------------------------ +library(MASS) +library(ggplot2) +options(repr.plot.width = 6, repr.plot.height = 6) # Default plot size for Colab + +#### define a function ------------------------------------- +generate_data <- function(N=NULL,r=NULL,m_x=NULL,range_x=NULL,m_y=NULL,range_y=NULL){ + # Generate data + out <- as.data.frame(mvrnorm(N, mu = c(0,0), + Sigma = matrix(c(1,r,r,1), ncol = 2), + empirical = TRUE)) + # Calculations to create multiplication and addition factors for mean and range of X and Y + mx.factor <- range_x/6 + addx.factor <- m_x - (mx.factor*3) + my.factor <- range_y/6 + addy.factor <- m_y - (my.factor*3) + + # Adjust so that values are positive and include factors to match desired means and ranges + out$V1.s <- (out$V1 - min(out$V1))*mx.factor + addx.factor + out$V2.s <- (out$V2 - min(out$V2))*my.factor + addy.factor + return<-out +} +``` + +```{r} +#### Correlations we want to simulate ------------------ + +N <- 362 +r <- 0.32 # Desired correlation +d.mx <- 10 # Desired mean of X +d.rangex <- 17 # Desired range of X +d.my <- 10 # Desired mean of Y +d.rangey <- 17 # Desired range of Y +``` + +```{r} +#### Coldshivers and negative emotionality -------------- +d1 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey) + +# Plot scatterplot along with regression line +g1 <- ggplot(d1, aes(x=V1.s, y=V2.s)) + + geom_point(colour='gray25') + + xlab('Coldshivers')+ + ylab('Negative emotionality')+ + annotate("text",x = 3.0, y=16,label = "italic(r)==0.32", parse=TRUE,size=4.5)+ + geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+ + scale_x_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+ + scale_y_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+ + coord_fixed()+ + theme_bw() +print(g1) +``` + +```{r} +#### Coldshivers and Goosetingles -------------- +set.seed(101) +r <- 0.65 # Desired correlation +d2 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey) +g2 <- ggplot(d2, aes(x=V1.s, y=V2.s)) + + geom_point(colour='gray25') + + xlab('Goosetingles')+ + ylab('Coldshivers')+ + annotate("text",x = 3.0, y=16,label = "italic(r)==0.65", parse=TRUE,size=4.5)+ + geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+ + scale_x_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+ + scale_y_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+ + coord_fixed()+ + theme_bw() +print(g2) +``` + +```{r} +#### Overall chills and Goosetingles -------------- +set.seed(101) +r <- 0.91 +d3 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey) +# Plot scatterplot along with regression line +g3 <- ggplot(d3, aes(x=V1.s, y=V2.s)) + + geom_point(colour='gray25') + + xlab('Overall chills')+ + ylab('Goosetingles')+ + annotate("text",x = 3.0, y=16,label = "italic(r)==0.91", parse=TRUE,size=4.5)+ + geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+ + scale_x_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+ + scale_y_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+ + coord_fixed()+ + theme_bw() +print(g3) +``` + +```{r} +#### Neuroticism and Goosetingles -------------- +set.seed(101) +r <- 0.02 +d4 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey) +# Plot scatterplot along with regression line +g4 <- ggplot(d4, aes(x=V1.s, y=V2.s)) + + geom_point(colour='gray25') + + xlab('Neuroticism')+ + ylab('Goosetingles')+ + annotate("text",x = 3.0, y=16,label = "italic(r)==0.02", parse=TRUE,size=4.5)+ + geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+ + scale_x_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+ + scale_y_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+ + coord_fixed()+ + theme_bw() +print(g4) +``` + +## Example 4.2 Meta-analysis + +This recreates forest plot with a small sample of studies (10) from 105 experiments analysed by Cooper (2020). + +```{r} +#install.packages("metafor",repos='http://cran.us.r-project.org',quiet=TRUE) +library(metafor,quiet=TRUE) + +# 10 example studies from Cooper 2020 +dat <- data.frame( + id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), + yi = c(-0.71, -0.45, -0.25, 0.03, 0.17, 0.20, 0.36, 0.56, 0.72, 0.96), + vi = c(0.143, 0.203, 0.039, 0.057, 0.107, 0.059, 0.046, 0.109, 0.12, 0.050), + author = c("Mehr et al.", "Bhide et al.", "Gromko", "Rickar et al.", "Hanson et al.", "Schellenberg", "Ho et al.", "Bilhartz et al.", "Dege et al.", "Flaugnacco et al."), + year = c(2013, 2013, 2005, 2012, 2003, 2004, 2003, 2000, 2011, 2015)) + +res.ee <- rma(yi, vi, data=dat, method="EE") + +forest(res.ee, header=c("Experiment", "g [95% CR]"), top=2, xlab="Effect Size and Credibility Intervals",slab=paste(author, year, sep=", "),cex=0.9) +``` + +## References +- Cooper, P. K. (2020). It’s all in your head: A meta-analysis on the effects of music training on cognitive measures in schoolchildren. _International Journal of Music Education, 38(3)_, 321–336. +- Maruskin, L. A., Thrash, T. M., & Elliot, A. J. (2012). The chills as a psychological construct: Content universe, factor structure, affective composition, elicitors, trait antecedents, and consequences. _Journal of Personality and Social Psychology, 103(1)_, 135–157. + diff --git a/docs/Chapter6.1.qmd b/docs/Chapter6.1.qmd new file mode 100644 index 0000000..7e832cc --- /dev/null +++ b/docs/Chapter6.1.qmd @@ -0,0 +1,129 @@ + +# Ch. 6 – Using R + +This notebook demonstrates how to use R. + + +## Using R and RStudio + +There are plenty of tools for data analysis and statistics available. I will only consider those that are open source and free to use as this is the only way to guarantee that people are able to access the tools. Sadly some fine tools such as _SPSS_, _JMP_, _Minitab_, _SAS_, or _Stata_ don't fulfil these principles. _R_ and _RStudio_, _JASP_, _Jamovi_ and _Python_ (made better with libraries such as _scipy_) and some others are free and open source software that have become common research tools in empirical sciences. In addition to being free and easily available, they have excellent capacities to share the analysis workflow and some have tools to ensure replicability over years and different versions. I will focus on _R_ in the statistical analysis and explain why I think this is a good option for analysis empirical data. + + + +## Why use R? + +R is a versatile environment for analysing any data. It is interactive and suits well for casual exploration of data that comes in many different forms (numbers, text strings) and different shapes (long and wide data). What is even more important in R is that it is fundamentally based on scripts that serve as a blueprint of the analysis you have done and allows you, or anyone else, to replicate your analyses simply by running the same script. R is also: + +1. Free and open source. +2. It works well on all operating systems: Windows, Mac Os, UNIX/Linux. +3. the Community of R users is broad and active and for this reason the resources for learning and asking questions are impressive and well-developed (see Appendix). +4. It already has several music-specific tools (e.g., `incon`,`gm`,`hrep` libraries) and datasets (`MusicScienceData`). +5. It is excellent for any statistical analysis. + +Here is a quick tutorial for using R, but I would also recommend [Getting Started with R](https://support.posit.co/hc/en-us/articles/201141096-Getting-Started-with-R) and [The Basics](https://posit.cloud/learn/primers/1) guides by the company who brings us RStudio ([Posit](https://posit.co/)). + + +### Nuts and bolts of R + +The basic interface in R is called _R console_ that will carry out commands as you type them. There are several ways to gain access to an R console. One way is to simply start R on your computer. The console looks something like this: + +![](images/Screenshot0.png) + +As an easy example, try using the console in _R_ to calculate the duration in seconds of an eight note (or a quaver in the U.K.) in common time signature (4/4) when the tempo is quarter note = 120 beats per minute (BPM): + +```{r} +0.5 * (60 / 120) +``` + +The code boxes show R code typed into the R console. Anything followed by hashtag (#) is a comment and will not be interpreted by _R_. Here we see that one eight note is 0.25 seconds (or 250 ms if you want) in this tempo. We got this by remembering that one quarter note lasts for 60 / tempo in BPM seconds. And since eighth note is a half of a quarter note, we expressed this as 0.5 and multiplied (* ) this with the tempo expression (60/120). You would get the same result with `1/2 * 60/120`. Or you could define the duration and tempo as an equation: + +```{r} +note <- 0.5 +tempo <- 120 +dur_in_seconds <- note * 60 / tempo # Calculate +print(dur_in_seconds * 1000) # Convert to milliseconds +``` + +This example demonstrates that there are several ways of doing calculations in R. In the last example we tried to be clear about the operations and defined variables (`note` and `tempo`) and used them to calculate the duration of the note and finally to transform the value into milliseconds. + +### RStudio, scripts and the whole development environment + +_RStudio_ is a sleek and interactive _integrated development environment_ (IDE) that offers a number of great features to use R efficiently as it offers the console, and panes for folders, help files, an index of what it is the memory, and a separate window for plots. + +![](images/Screenshot2.png) + +There are three main panes in RStudio. The left pane shows the R console. On the right, the top pane includes tabs such as _Environment_ and _History_, while the bottom pane shows five tabs: _File_, _Plots_, _Packages_, _Help_, and _Viewer_. + +#### Scripts + +Scripts are a collection of commands that can be edited with a text editor and executed in R. + +To start a new script, you can click on _File_, then _New File_, then _R Script_. This starts a new pane on the left and it is here where you can start writing your script. This will contain your analysis. By documenting the analysis in the script, you, or anyone else, can replicate the steps in the script very easily and get the same results afterwards just by running the script. + +#### Running commands while editing scripts + +_RStudio_ helps to make the code more readable and easy to use by making different parts of the code in different colour and the indentation is automatically modified as you write the code. It will also close the parenthesis and suggest the variable names as you go along, and warn if a line of code contains an error. If we look at our new script, we first need to give it a name. A good practice is to name scripts in a descriptive fashion, use lower case letters, avoid spaces, only to use hyphens to separate words, and then followed by the suffix `.R`. We will call this script `code6.1.R`. This one will grab data from a published dataset of emotions expressed by film soundtracks and plot the rated mean valence and energy of each track. + +See [Chapter7.R](https://tuomaseerola.github.io/emr/Chapter7.ipynb) R code at +[https://tuomaseerola.github.io/emr/](https://tuomaseerola.github.io/emr/) site. + +```{r} +# code6.1.R +#library(devtools) # add libraries from github +#devtools::install_github("tuomaseerola/MusicScienceData") +library(MusicScienceData) # loads library w data +library(ggplot2) # loads library +ggplot(MusicScienceData::soundtrack, # defines the data + aes(x=Valence, + y=Energy, + colour=TARGET_EMOTION))+ # variables + geom_point(size=4)+ # plots points + theme_MusicScience() # applies style +``` + +Let's review this script. The first four lines of code in an R script are used to load the libraries that are needed. Here we load the data from `soundtrack` study, which is part of the `MusicScienceData` library. As an example, we will make a graph showing the position of 110 music excerpts in terms of their rated Valence and Energy. Once you have copied or typed the code above, you can run it by _executing_ the code by clicking on the _Run_ button on the upper right side of the editing pane. + +Once you run the code, you will see it appear in the R console and, in this case, the generated plot appears in the plots console. The plot console has a useful interface that permits you to click back and forward across different plots, zoom in to the plot, or save the plots as files. I recommend learning to save the plots as graphics directly in the script so that it is easier to control their size (width and height, and resolution) and type (pdf, tiff, or png), and to replicate the identical plots afterwards. + +![](images/Screenshot3.png) + +To run one line at a time instead of the entire script, you can use _Control-Enter_ on Windows and _command-return_ on the Mac Os. + +### Installing R packages + +R has thousands of libraries available that offer data, libraries and functions. Not everybody needs all these functionalities, so R offers these as _packages_ that are easy to install within R. We will need a few of these so let's see how this works. + +In RStudio, the _Tools_ tab contains an option to install packages. We can load a package into our R sessions using the `library` function: + +```{r} +library(ggplot2) +``` + +If this command gives you an error, you probably do not have this fabulous plotting library installed in your R yet. This can be fixed by typing: + +```{r} +#| echo: false +#| eval: false +#| label: hidden +install.packages("ggplot2", repos = "http://cran.us.r-project.org") +``` + +```{r} +#| echo: true +#| eval: false +install.packages("ggplot2") +``` + +After installation, you still need to make the library active in your session by invoking the `library` command described earlier. Different examples in this book will utilise different libraries. We only install the libraries once, because they remain installed and only need to be loaded with the `library` command. + +::: {.callout-note} +The library where many examples come from, `MusicScienceData`, needs to be installed with the following code. This is because I have only released that library at Github for easier development, and R is not able to find it unless you have this extra package called `devtools` installed. +::: + + +```{r} +#| echo: true +#| eval: false +install.packages("devtools") # add libraries from github +devtools::install_github("tuomaseerola/MusicScienceData") +``` diff --git a/docs/Chapter6.2.qmd b/docs/Chapter6.2.qmd new file mode 100644 index 0000000..994b4ff --- /dev/null +++ b/docs/Chapter6.2.qmd @@ -0,0 +1,97 @@ + +# Ch. 6 – Data Organisation + +This notebook demonstrates Data Diagnostics and Summaries. + +This section is based on R template for analysing data from experiments and surveys and justification to follow certain conventions and structures. This document is available as a rendered html at [https://tuomaseerola.github.io/R_template/](https://tuomaseerola.github.io/R_template/). + +## Organisation + +For each project, you should establish one repository that you can clone/share using appropriate service (such as Github, Gitlab, or other services, even Dropbox works for collaborations). Name the repository with a compact but informative name (`chord_priming` or `emotion_recognition5`) that separates it from the potential other projects. + +Within this repository, it is a good idea to establish separate folders for separate elements: + +* `/data` Data in read-only format (preferably CSV or TSV format) +* `/munge` All operations to pre-process, recode, or trim data +* `/scr` All actual R scripts used in the analysis +* `/figures` Outputs from the scripts (optional if use reporting languages) +* `/docs` Outputs from the reports (optional if use reporting languages) + +In this repository, `contents.R` is the file that compiles the full analysis and allows you to reproduce evertyhing in R. Alternatively this file can Rmarkdown file, which is neat analysis and reporting format or even Quarto document, which is more advanced version of this. Nevertheless, the summary document contains all the stages, structures and processes of the project. This is structured to be executed in a coherent order and manner (i.e., loading, transforming, screening the data, and then visualising, applying statistical analyses, creating figures, and tables). + +`report.Rmd` will create the report that incorporates comments and the actual analyses and produces either html or pdf file (`report.html`, `report.pdf`) in the `docs` folder. + +### Data + +Typically the data is in CSV (comma-separated values) or TSV (tab-separated values) as this is the output from most experiment software solutions and also easily exported from _Qualtrics_, psychophysiological measures and so on. Sometimes the data might be in Excel format, which can also be read easily into R, and I would advice against large amount of edits in Excel as you would lose the ability to tell what has been changed and why. **The rule is that we never edit or manipulate or fix or alter the raw data**, no matter what the format is. + +It is good to store the raw original data with time stamps to the data folder and if you get a newer datasets or more observations, you add a new data file to the data folder with a new timestamp and keep the old one for reference. There are situations when the data has excess observations (pilot participants), typos and other issues, but it is easier and more transparent to handle these in the processing (munging) stage. + +### Munging + +Munging refers to preprocessing the raw data to be useful for the actual analysis. Often this means relabelling the names of the variables (columns) and possibly recoding the observations (as numeric responses or as factors). It is also very typical to pivot the data from wide format (numerous variables in columns) to long format so the key variables contain all manipulations. + +### Scripts + +Often you develop the analysis in stages, starting with some form of quality control and moving onto descriptives and then inferential statistics. For enhanced clarity and debugging, it is a good idea to develop these as separate scripts (and also possibly as functions) and store them in the scripts folder. The production of tables and figures can also be explicitly done with separate scripts. + +In the end, you should have one file (that I call `contents.R`) that is able to produce the full analysis from reading the data, preprocessing the data, calculating quality control indicators, summarising the data, producing the analyses and creating tables and figures. + +## Example repository and template analysis + +Proceed to either a fuller explanation of the process at [https://tuomaseerola.github.io/R_template/](https://tuomaseerola.github.io/R_template/) or check the quarto slides about [R_template in action](https://tuomaseerola.github.io/R_template/R_template_in_action.html) to explore the steps of the analysis process. + +### Initialise the analysis + +Start R and open up the `contents.R` file using your preferred editor. Check that the directory after the first command setwd is pointing the location of your analysis directory and run the first lines of the code: + +```{r} +#| echo: true +#| eval: false +#| label: libraries +# contents.R +## INITIALISE: LOAD LIBRARIES +library(tidyverse, quietly = TRUE) # Loads the necessary R libraries +``` + +If you get errors at this stage with new installation of R, they might refer to the special libraries that were loaded or installed in libraries.R. This script should install the required libraries for you such as ggplot2, but there might be issues with your particular setup. + +### Load, preprocess and diagnose the data + +Next, it is time to load the data with a scripts, the first one read_data_survey.R is simply reading an TSV file exported from Qualtrics stored in data folder. I’ve taken the second, descriptive header row out of the data to simply the process, but different datasets will have slightly different structures. + +```{r} +#| echo: true +#| eval: false +#| label: read-data +source('scr/read_data_survey.R') # Produces data frame v +``` + +This should retrieve a data frame into a variable called v in R, which contains a complex data frame. In the next step this raw data will be munged, that is, pre-processed in several ways. Pre-processing can have multiple steps, here these have broken into two: + +First operation carries out a long list of renaming the variables (columns in the data, rename_variables.R). This can be avoided if the data has these names already, and it is quite useful to try to embed meaningful variables names to the data collection (experiment or survey or manual coding). + +Recoding instruments (recode_instruments.R) has several steps and it might be useful to study the steps separately. Finally the responses are reshaped into a form called long-form that is better suited for the analyses. This dataframe will be called df. + +```{r} +#| echo: true +#| eval: false +#| label: munge +source('munge/rename_variables.R') # Renames the columns of the v +source('munge/recode_instruments.R')# Produces df (long-form) from v +``` + +After the munging, it is prudent to check various aspects of the data. + +1. Descriptives such as the N, age, gender are echoed in order to remind us of the dataset properties (`demographics_info.R`). + +2. We can also explore the consistency of the ratings across the people to check whether people agreed on the ratings and generally understood the task (`interrater_reliability.R`). + +3. We also want to look at the distributions of the collected data in order to learn whether one needs to use certain operations (transformations or resort to non-parametric statistics) in the subsequent analyses (visualise.R). This step will also include displaying correlations between the emotion scales which is a useful operation to learn about the overlap of the concepts used in the tasks. + +```{r} +#| echo: true +#| eval: false +#| label: demographics +source('scr/demographics_info.R') # Reports N, Age and other details +``` diff --git a/docs/Chapter6.3.qmd b/docs/Chapter6.3.qmd new file mode 100644 index 0000000..52d7dc5 --- /dev/null +++ b/docs/Chapter6.3.qmd @@ -0,0 +1,293 @@ + +# Ch. 6 – Diagnostics + +This notebook demonstrates Data Diagnostics and Summaries. + +## Preliminaries + +Load or install the necessary R packages. + +```{r} +#| label: libraries1 +#| eval: true +#| echo: true +#| warning: false +library(tidyverse,quiet=TRUE) +library(ggplot2,quiet=TRUE) +#install.packages("cowplot",quiet=TRUE) +library(cowplot,quiet=TRUE) +options(repr.plot.width = 7, repr.plot.height = 5) +``` + +```{r} +#| eval: false +#| label: libraries2 +#| warning: false +if (!require(devtools)) install.packages("devtools",quiet=TRUE) +devtools::install_github("tuomaseerola/MusicScienceData@main",quiet=TRUE) +library(MusicScienceData,quiet=TRUE) +``` + +```{r} +#| eval: true +#| label: libraries3 +#| echo: false +library(MusicScienceData,quiet=TRUE) +``` + + +## Code 6.1 + +```{r} +#| label: 6-1 +#| warning: false +print(MusicScienceData::sadness[1:4,1:7]) +``` + +## Code 6.2 + +```{r} +#| label: 6-2 +#| warning: false +print(MusicScienceData::priming[1:3,1:6]) +``` + +## Code 6.3 + +Figure 6.1. A histogram showing the distribution of responses to a particular question (no. 25) in Attitudes towards Sad Music (ASM) instrument. + +```{r} +#| label: 6-3 +#| warning: false +sadness <- MusicScienceData::sadness +g1 <- sadness %>% + drop_na(ASM25) %>% # drop missing values + ggplot(aes(x = ASM25))+ + geom_histogram(bins=7,fill="grey50", colour='black')+ + scale_x_continuous(breaks = seq(1,7,by=1))+ + ylab('Count')+ + xlab('1 = Strongly disagree, 7 = Strongly agree')+ + theme_MusicScience() +g1 +``` + +## Code 6.5 + +Figure 6.4. A box plot showing the distribution of responses to a particular question (no. 23) in Attitudes towards Sad Music (ASM) instrument split across gender. + +```{r} +#| label: 6-5 +#| warning: false +g5 <- sadness %>% + drop_na(ASM25) %>% # drop missing values + ggplot(aes(y = ASM25,fill=gender))+ + geom_boxplot()+ + scale_y_continuous(breaks = seq(1,7,by=1))+ + scale_x_discrete()+ + scale_fill_grey(start = .4,end = .8,name='Gender')+ + ylab('1 = Strongly disagree, 7 = Strongly agree')+ + theme_MusicScience() +print(g5) +``` + +## Code 6.6 + +Figure 6.5. Alternative visualisations of data. A: density plot across gender, B: multiple boxplots, C: boxplot overlaid with original data, D: violin plot with mean and median overlaid. + +```{r} +#| label: 6-6 +#| warning: false +options(repr.plot.width = 12, repr.plot.height = 10) +d <- MusicScienceData::priming + +g1<-ggplot(d,aes(x=RT))+ + geom_histogram(binwidth=100,colour='grey50',fill='white')+ + ggtitle('Bin width 100')+ + ylab('Count')+ + xlab('Reaction time (ms)')+ + scale_x_continuous(breaks=seq(0,2000,by=400))+ + theme_MusicScience() + + +g2<-ggplot(d,aes(x=RT))+ + geom_histogram(binwidth=10,colour='grey50',fill='white')+ + ggtitle('Bin width 10')+ + ylab('Count')+ + xlab('Reaction time (ms)')+ + scale_x_continuous(breaks=seq(0,2000,by=400))+ + theme_MusicScience() + +g3<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+ + geom_histogram(binwidth=10,colour='grey50',fill='white')+ + ggtitle('Bin width 10 with trimming')+ + ylab('Count')+ + xlab('Reaction time (ms)')+ + scale_x_continuous(breaks=seq(200,1500,by=200),limits = c(0,2000))+ + geom_vline(xintercept = c(200,1500),linetype='dashed')+ + theme_MusicScience() + + +g4<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+ + geom_histogram(binwidth=10,colour='grey50',fill='white')+ + geom_density(aes(y=10 * after_stat(count)),alpha=0.5,colour='black',fill=NA)+ + ggtitle('Bin width 10 density with trimming')+ + ylab('Count')+ + xlab('Reaction time (ms)')+ + scale_x_continuous(breaks=seq(200,1500,by=200))+ + theme_MusicScience() + +G1 <- plot_grid(g1, g2, g3, g4, nrow = 2) +print(G1) +``` + +## Code 6.7 + +Table 6.1: The means of the ASM question 20 across the age. + +```{r} +#| label: 6-7 +#| warning: false +library(Hmisc,quietly = TRUE) +table1 <- MusicScienceData::sadness %>% + drop_na(ASM20) %>% # drop missing values + group_by(age) %>% + summarise(n=n(),mean_cl_normal(ASM20)) + +colnames(table1) <- c('Age','N','M','95% CI LL','95% CI UL') +knitr::kable(table1,digits = 2, format='simple', + caption = 'The means of the ASM question 20 across the age.') +``` + +## Code 6.8 + +```{r} +#| label: 6-8 +#| warning: false +mean(MusicScienceData::sadness$ASM20, na.rm=TRUE) # Mean (ignore missing values) +sd(MusicScienceData::sadness$ASM20,na.rm=TRUE) +``` + +## Code 6.9 + +Figure 6.6. A bar graph showing the means of the responses to the question no. 20 in Attitudes towards Sad Music (ASM) instrument across gender. + +```{r} +#| label: 6-9 +#| warning: false +g6 <- sadness %>% + drop_na(ASM20) %>% # drop missing values + group_by(gender) %>% + summarise(mean= mean(ASM20),ci = mean_cl_normal(ASM20)) %>% + ggplot(aes(x = gender,y = mean,fill=gender))+ + geom_col(colour='black',show.legend = FALSE)+ + geom_errorbar(aes(ymin=ci$ymin,ymax=ci$ymax),width=0.5)+ + scale_y_continuous(breaks = seq(1,7,by=1), expand = c(0,0))+ + scale_fill_grey(start=.25,end=.75)+ + coord_cartesian(ylim = c(1, 7)) + + ylab('Mean ± 95% CI')+ + xlab('Gender')+ + theme_MusicScience() +print(g6) +``` + +## Code 6.10 + +Figure 6.7. A bar graph showing the means of the responses to the question no. 6 in Attitudes towards Sad Music (ASM) instrument across musical expertise. + +```{r} +#| label: 6-10 +#| warning: false +g1 <- MusicScienceData::sadness %>% + drop_na(ASM1) %>% # drop missing values + ggplot(aes(x= ASM1,color=gender))+ + geom_density(adjust=1.25)+ + scale_color_grey(name='Gender')+ + scale_x_continuous(breaks = seq(1,7,by=1))+ + ggtitle(sadness_ASM_labels[1])+ + ylab('Density')+ + theme_bw()+ + theme(legend.justification=c(1,0), legend.position=c(0.95,0.75))+ + theme(plot.title = element_text(size=11)) + +tmp<-as_tibble(MusicScienceData::sadness) +tmp2<-tmp[,c(3,7:10)] +dfl <- pivot_longer(tmp2,cols = c(2:5)) + +g2 <- dfl %>% + drop_na(value) %>% # drop missing values + ggplot(aes(x=name,y = value,fill=gender))+ + geom_boxplot(outlier.shape ="")+ + scale_y_continuous(breaks = seq(1,7,by=1))+ + scale_x_discrete()+ + scale_fill_grey(start = .75, end=.25, name="Gender")+ + ggtitle('ASM items 1 to 4')+ + ylab('1 = Strongly disagree, 7 = Strongly agree')+ + xlab('Item')+ + theme_bw()+ + theme(legend.justification=c(1,0), legend.position=c(0.95,0.70)) + +g3 <- MusicScienceData::sadness %>% + drop_na(ASM12) %>% # drop missing values + ggplot(aes(x=1,y = ASM12))+ + geom_boxplot(fill='gray70')+ + geom_jitter(alpha=0.13,colour='black', width = 0.33)+ + scale_y_continuous(breaks = seq(1,7,by=1))+ + scale_x_discrete()+ + ggtitle(sadness_ASM_labels[12])+ + ylab('1 = Strongly disagree, 7 = Strongly agree')+ + xlab('ASM12')+ + theme_bw() + +g4 <- MusicScienceData::sadness %>% + drop_na(ASM13) %>% # drop missing values + ggplot(aes(x=1,y = ASM13))+ + geom_violin(fill='grey70',adjust=1.2,alpha=0.50)+ + scale_y_continuous(breaks = seq(1,7,by=1))+ + scale_x_discrete()+ + stat_summary(fun = median, fun.min = median, fun.max = median, + geom = "crossbar", width = 0.9)+ + stat_summary(fun = mean, fun.min = mean, fun.max = mean, + geom = "crossbar", width = 0.9,colour='gray50')+ + ggtitle(sadness_ASM_labels[13])+ + annotate("text",x=1.6,y=mean(MusicScienceData::sadness$ASM13,na.rm = TRUE),label='Mean',hjust=0)+ + annotate("text",x=1.6,y=median(MusicScienceData::sadness$ASM13,na.rm = TRUE),label='Median',hjust=0)+ + ylab('1 = Strongly disagree, 7 = Strongly agree')+ + xlab('ASM13')+ + theme_bw() + +G2 <- plot_grid(g1,g2,g3,g4,labels = c("A", "B", "C", "D"),ncol = 2, nrow = 2) +print(G2) +``` + +## Code 6.11 + +Figure 6.8. A scatterplot showing the means of the ratings to 110 film soundtrack excerpts using scales tension and valence in Eerola and Vuoskoski (2011). + +```{r} +#| label: 6-11 +#| warning: false +g9 <- ggplot(soundtrack) + + aes(x = Valence, y = Tension, colour = TARGET_EMOTION, + label=Number, + shape= TARGET_FRAMEWORK) + + geom_point(size=4,alpha=0.80,show.legend=FALSE) + + coord_fixed(ratio = 1)+ + geom_smooth(aes(shape = NULL,colour=NULL),method="lm", + formula='y ~x',se=FALSE, fullrange=TRUE, + level=0.95, colour='grey50', # adds trendline + linetype='dashed',show.legend = FALSE)+ + geom_text(show.legend=FALSE,color='white',size=1.7)+ # labels + scale_colour_grey(name='Emotion',start = .6,end = 0)+ + scale_shape(name='Framework')+ + scale_x_continuous(breaks=seq(1,9,by=2),limits=c(1,9))+ + scale_y_continuous(breaks=seq(1,9,by=2),limits=c(1,9))+ + theme_MusicScience() +print(g9) +``` + +## References + +- Eerola, T., & Peltola, H.-R. (2016). Memorable experiences with sad music - reasons, reactions and mechanisms of three types of experiences. _PloS ONE, 11(6)_, e0157444. + +- Eerola, T., & Vuoskoski, J. K. (2011). A comparison of the discrete and dimensional models of emotion in music. _Psychology of Music, 39(1)_, 18–49. + diff --git a/docs/Chapter6.4.qmd b/docs/Chapter6.4.qmd new file mode 100644 index 0000000..71d1cff --- /dev/null +++ b/docs/Chapter6.4.qmd @@ -0,0 +1,133 @@ + +# Ch. 6 – Outliers + +This notebook is about outliers. + +## Outliers + +It is not uncommon to obtain data which has some missing observations, perhaps even some nearly impossible values and funky distributions. There are remedies and procedures to diagnose and tackle these kinds of issues in statistics, some easy, some more tricky, and controversial. This text will not get deeply involved in the analysis of these issues, but I will present the basics. It is worth saying here that sometimes these problems may lead to insurmountable challenges for the analysis but more often there are simple techniques that can mitigate the problem. The key in all these operations is that you are transparent in what you do and explain why an operation was done to the data and what the operation was. + +Unusually high or low value in the data may have catastrophic impact on the descriptive and inferential statistics. The source of the unusual value, which we call an outlier could have been caused by a typo in the data, or a conversion error (mistaking comma with the full stop as the decimal separator or something else), or sometimes in a large sample extreme value just appear in the data. Outliers will cause problems for the traditional analysis operations such as calculating the means, carrying out t-tests, correlations, and regressions, as these calculations usually assume a normal distribution of values and an extreme value will likely violate this assumption. The practical reason for treating outliers in the first place is that they may render these calculations misleading as the extremes wield a high leverage on otherwise relatively stable values that tend to centre around a mean. + +The first diagnostic action towards findings out the potential outliers is to visualise the data. If you plot the histograms or boxplots of your variables or scatterplots between two variables, the outliers are usually quite easily visible in these visualisations (see Figure 6.3 for an example). It is a sensible idea always to get a feel for the distribution of the data by plotting the values in a meaningful fashion (boxplots are always a good starting point). The shape of the distribution might reveal other unwanted issues such as all values being clustered near one end of a scale (called a ceiling effect or a floor effect where the measurement scale is attenuated because it is not sensitive enough, or it is oversensitive, scoring only few values at the positive extreme of the scale). Or visualisation between two variables might reveal that the relationship between the two variables is not a linear one but still clear and regular but in a polynomial relation (e.g. U-shaped or inverted U-shaped pattern). It is possible to diagnose the potential outliers using several quantitative techniques, but before mentioning two options, let me warn that there is no definite recommendation on what is classified as an outlier, as different types of data, distributions, and disciplinary orientations might have slightly different practices for dealing with these offending values. One of the most used measure already introduced earlier with relation to boxplots is to use interquartile range (IQR) to define the range of acceptable values (outliers are above 75% quantile plus 1.5 × IQR or below 25% quantile minus 1.5 × IQR). + +```{r} +#| label: libraries1 +#| warning: false + +library(tidyverse,quiet=TRUE) +library(ggplot2,quiet=TRUE) +library(cowplot,quiet=TRUE) +library(MusicScienceData,quiet=TRUE) +options(repr.plot.width = 7, repr.plot.height = 4) # Default plot size for Colab +``` + +## Boxplot and outliers + +### Code 6.3 + +Figure 6.1. A histogram showing the distribution of responses to a particular question (no. 25) in Attitudes towards Sad Music (ASM) instrument. + +```{r} +#| label: 6-3 +#| warning: false +sadness <- MusicScienceData::sadness +g1 <- sadness %>% + drop_na(ASM25) %>% # drop missing values + ggplot(aes(x = ASM25))+ + geom_boxplot(fill="grey50", colour='black')+ + scale_x_continuous(breaks = seq(1,7,by=1))+ + ylab('Count')+ + xlab('1 = Strongly disagree, 7 = Strongly agree')+ + theme_MusicScience() +print(g1) +``` + +The boxplot shows the distribution of answers of attitudes towards sad music (question 25). Boxplot diagnoses ratings of 1 as outliers (indicated by the dot at the value of 1). The median of that distribution is 6 and lower end of the IQR is 5 and the interquartile range is 2, so the lower threshold for the outliers is 2 (5-2×1.5) and therefore the few values of 1 are singled out as potential outliers. Let's verify the calculation so we understand the routine. + +### Code 6.4 + +```{r} +#| label: 6-4 +#| warning: false +MD <- median(sadness$ASM25,na.rm=TRUE) # median +print(paste('Median:',MD)) +IQR_range <- IQR(sadness$ASM25,na.rm=TRUE) # interquartile range +print(paste('Interquartile range:',IQR_range)) +lower_threshold <- (MD - IQR_range/2) - (IQR_range * 1.5) # combine lower end of the IQR and IQR range x 1.5 +print(paste('Lower threshold for outliers:',lower_threshold)) +``` + +The second example comes from the priming study and the reaction time responses (see Figure 6.2). The mean response time was 632 ms, the upper threshold for outliers using the IQR-based technique is 930 ms and the lower threshold is 254 ms, so any value below 254 ms or above 930 ms could be considered as a potential outlier. To be fair, reaction times are not even supposed to be normally distributed and they have strong right skewed shape caused by participants tendency to respond asymmetrically (more responses towards the slow end of the response than the fast). There is a specific way to eliminate too fast reactions (<200ms) or slow reactions (>1500ms) (Brysbaert & Stevens, 2018), and even after this, the analysis of the reaction time data will utilise a statistical operation that is suited to the specific distribution of the data (e.g. GLMM with shifted log-normal distribution) or apply a log transformation of the data. But as we can see from Figure below, eliminating over 40 timed out responses (>2000 ms) does make the data much cleaner. + +### Code 6.6 + +```{r} +#| label: 6-6 +#| warning: false +options(repr.plot.width = 12, repr.plot.height = 10) # Default plot size for Colab +d <- MusicScienceData::priming + +g1<-ggplot(d,aes(x=RT))+ + geom_histogram(binwidth=100,colour='grey50',fill='white')+ + ggtitle('Bin width 100')+ + ylab('Count')+ + xlab('Reaction time (ms)')+ + scale_x_continuous(breaks=seq(0,2000,by=400))+ + theme_MusicScience() + +g2<-ggplot(d,aes(x=RT))+ + geom_histogram(binwidth=10,colour='grey50',fill='white')+ + ggtitle('Bin width 10')+ + ylab('Count')+ + xlab('Reaction time (ms)')+ + scale_x_continuous(breaks=seq(0,2000,by=400))+ + theme_MusicScience() + +g3<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+ + geom_histogram(binwidth=10,colour='grey50',fill='white')+ + ggtitle('Bin width 10 with trimming')+ + ylab('Count')+ + xlab('Reaction time (ms)')+ + scale_x_continuous(breaks=seq(200,1500,by=200),limits = c(0,2000))+ + geom_vline(xintercept = c(200,1500),linetype='dashed')+ + theme_MusicScience() + +g4<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+ + geom_histogram(binwidth=10,colour='grey50',fill='white')+ + geom_density(aes(y=10 * after_stat(count)),alpha=0.5,colour='black',fill=NA)+ + ggtitle('Bin width 10 density with trimming')+ + ylab('Count')+ + xlab('Reaction time (ms)')+ + scale_x_continuous(breaks=seq(200,1500,by=200))+ + theme_MusicScience() + +G1 <- plot_grid(g1, g2, g3, g4, nrow = 2) +print(G1) +``` + +If the IQR-based method is the first way to diagnose outliers, the other common way of diagnosing outliers is to convert the variables into Z-scores, where the mean is 0 and the standard deviation is 1. A z-score of -4 would mean that it is four standard deviations from the mean. One rule of thumb suggests that observations ±3 standard deviations from the mean are potential outliers. Besides these two simple metrics, there are more sophisticated ways to identify outliers such as using normal distribution (Grubb’s method) or a distance measure (Mahalanobis method) but ultimately the yardstick for making decisions based on any technique is subjective and must be clearly explained and motivated. + +## Dealing with outliers + +After diagnosing that there are outliers in the data, you need to decide what to do with them. It is possible to keep the outliers in the data if the analysis can work with outliers and not to be disruptively influenced by them. For instance, if the analysis operations can be done with non-parameteric inferential statistics that rely on ranks (the order of the values) – not the actual distances – between the observations, this can avoid the detrimental effect of the outliers to statistical inferences (see Chapter 7). There are also variant techniques to carry out correlation and regression analyses that are designed to work with data that partially violate assumptions of normality (e.g., rank correlations, robust regression, and lasso regression). Similar operations exist for comparing means, ranging from non-parametric variants of t-test (Mann-Whitney U test) and ANOVA (Kruskal-Wallis test) to generalised linear mixed models (GLMMs), where one can change the underlying assumptions of the distribution from normal distribution to something else that reflects the underlying data better. +A simpler option is either to eliminate the outliers or replace them with the nearest plausible data (sometimes called Winsorising) where you trim the values to the edge of the definition of the outliers. The decision of what is an appropriate way to deal with the problematic observations depends on many issues, but the idea of trimming them to the edge of the outliers is to preserve the observations in the data but just to remove their leverage (the distance from the mean) by moving them to the acceptable range. Again, there is no hard guidance on what the best practice for is dealing with outliers as sometimes data is extremely rare and throwing parts of it away can handicap the analysis. In any case, reporting the diagnosis (what diagnosis operation was used and how many outliers were detected) and the treatment of the outliers is always necessary. + + +## Missing Data + +Sometimes there are missing observations in the data and the reasons for these might be as varied as the reasons for outliers. Also, you might have created more missing data if you decide to eliminate outliers, which could mean that those offending extreme observations are considered missing. If the missing is in the original observations, it is worth considering the reason for the missing data before deciding what to do with it; It might tell you that a survey question was badly formed or related to a private issue that many people did not want to respond to, they skipped a question, or perhaps the experiment data had an erroneous coding for a trial. When the missing observations are clearly linked to such a data collection issue, it might be best to report this as it is. When the amount of missing data is low and not clearly linked to any known issue, there are several ways to deal with them. One of them is to allow them to be missing, and most of the analysis operations is competent statistical software suites can deal with the omissions. These missing observations, if they are coded properly in the statistical software (e.g., NA in R) and not as values of any kind (coding missing values as zeros is downright dangerous as further calculations will then start to treat them as actual values). For instance, in the examples above, I have dropped missing observations when constructing plots (line 3 in Code 6.6) and tables (Table 6.3) and calculating means (the example just below the table above). + +In most cases, reporting how many missing observations there are and whether they are specifically affecting the study design is sufficient to press on with the analysis with keeping the data as it (with the missing observations in the data frame if they are properly coded as missing). More advanced ways of dealing with the missing observations is to infer the missing values from the other variables (imputation) or to interpolate the missing observations from the other data (Howell, 2008), but the prudent use of either of these techniques requires sophisticated data analysis skills and I would not recommend following the route of filling in the gaps in the data with educated guesses, unless this is absolutely necessary and you know exactly what you are doing. + +## Non-Normal Distributions + +The final issue of data quality relates to the distribution of the data. Most of the operations I have talked about – and will be talking about in the Chapter 7 – assumes that the observations fall into the normal distribution, which is symmetric and governed by mean and variance of a specific kind (σ^2). When the observations have wildly different distribution from this one, skewed in one direction (asymmetrical), or heteroscedastic (where the variation is uneven across the range of a variable), one might need to revert to statistical operations designed to handle non-normal distributions (non-parametric operations) or to try to transform the observations into something closer to normal distribution. There is nothing suspicious or problematic in the act of transforming a variable if it makes the analysis and interpretation easier, but again one must report and justify such operations carefully. +We have already come across one common transformation that is often applied to reaction time data, namely logarithmic transform. Other transformations for data that has positive skew is a square root transformation. There is also a technique called power transformation which attempts to find the best transformation that creates the closest match to the normal distribution (also known as _Box-Cox_ technique). A statistical software packages come with routines that can identify violations from normality such as Kolmogorov-Smirnov or Shapiro-Wilk’s tests. Rather than blindly attempting to use a neat transformation to rescue a problematic variable, I would recommend common sense approach where the underlying reason for the non-normality of the distribution is considered. If it is something that typically happens with the measurements (such as reaction time data) and not just a poorly designed measure with a ceiling or floor effects, the transformation is easy to motivate and apply. In other cases, it probably wise to take a deep breath and consult advanced statistics guide, e.g., Howell (2016), or see recommendations at the end of the book. + +## References + +- Eerola, T., & Peltola, H.-R. (2016). Memorable experiences with sad music - reasons, reactions and mechanisms of three types of experiences. _PloS ONE, 11(6)_, e0157444. + +- Eerola, T., & Vuoskoski, J. K. (2011). A comparison of the discrete and dimensional models of emotion in music. _Psychology of Music, 39(1)_, 18–49. + diff --git a/docs/Chapter7.qmd b/docs/Chapter7.qmd new file mode 100644 index 0000000..8f15dea --- /dev/null +++ b/docs/Chapter7.qmd @@ -0,0 +1,180 @@ + +# Ch. 7 – Inferential statistics + +This notebook demonstrates running inferential statistical tests in R. + +## Preliminaries + +Load libraries and install `MusicScienceData` package where the example data is stored. + +```{r} +#| label: libraries +#| warning: false +library(ggplot2,quietly = TRUE) +library(tidyverse,quietly = TRUE) +library(MusicScienceData,quiet=TRUE) +``` + +## Code 7.1 + +See text for the explanation. + +```{r} +#| label: 7-1 +#| warning: false +df <- MusicScienceData::sadness # define data +t <- t.test(ASM20 ~ gender, data=df) # t test +print(t$statistic) # show the t value + +print(scales::pvalue(t$p.value)) +dplyr::summarise(dplyr::group_by(df, gender), # means and SDs + M=mean(ASM20,na.rm=TRUE), + SD=sd(ASM20,na.rm=TRUE)) +``` + +## Code 7.2 + +```{r} +#| label: 7-2 +#| warning: false +df <- MusicScienceData::sadness # define data +model.aov <- aov(ASM20 ~ age, data=df) # run anova +F <- summary(model.aov) # summarise +print(F) +``` + +## Code 7.3 + +```{r} +#| label: 7-3 +#| warning: false +TABLE<-TukeyHSD(model.aov,conf.level = 0.95) +print(knitr::kable(TABLE$age,digits = 3, + caption = 'Comparison of age groups + for Item 20 in ASM survey.', + format = 'simple')) +``` + +## Code 7.4 + +```{r} +#| label: 7-4 +#| warning: false +df <- MusicScienceData::sadness # define data +model2.aov <- aov(ASM20 ~ age * gender, data=df) # run anova +F2 <- summary(model2.aov) +print(F2) +``` + +## Code 7.5 + +This analysis requires extra libraries and raw data read from github. The installation might be slow in Colab because of package dependencies. + +```{r} +#| label: 7-5 +#| eval: false +#| warning: false +#install.packages("lme4",quiet=TRUE) # Required for LMM analysis +#install.packages("lmerTest",quiet=TRUE) # Optional +library(lme4,quiet=TRUE) +library(lmerTest,quiet=TRUE) +``` + +```{r} +#| label: load-data +#| eval: true +#| warning: false +library(lme4,quiet=TRUE) +library(lmerTest,quiet=TRUE) +d <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv') +d2 <- dplyr::filter(d,Emotion=='Dimensional') # +d3 <- dplyr::filter(d2, Category=='Anger' | + Category=='Fear' | + Category=='Happy' | + Category=='Sad' | + Category=='Tender') +m1 <- lmer(Valence ~ Category * Gender + (1|id) + (1|Track), data = d3) +s <- summary(m1,corr=FALSE) +S<-s$coefficients; S<-round(S,2); S[,5]<-scales::pvalue(S[,5]) +print(knitr::kable(S,format = 'simple', + caption = 'LMM results of Valence ratings.')) +``` + +## Code 7.6 + +```{r} +#| label: code7-6 +#| warning: false +d <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv') +S <- d %>% + filter(Category=='Sad') %>% + group_by(Category,Gender) %>% + summarise(M=mean(Valence,na.rm=T),SD=sd(Valence,na.rm=T), + .groups = 'drop') +print(S) +``` + +## Code 7.7 + +```{r} +#| label: 7-7 +#| warning: false +library(MusicScienceData) # loads library w data +gender_age_xtab <- table(MusicScienceData::sadness$age, + MusicScienceData::sadness$gender) +print(gender_age_xtab) + +result <- chisq.test(gender_age_xtab) # Chi^2 test +print(result) +``` + +## Code 7.8 + +```{r} +#| label: 7-8 +#| warning: false +library(MusicScienceData) # load library w data +data <- MusicScienceData::soundtrack # define data +r<-cor.test(data$Valence, data$Tension) # calculate correlation +print(r$estimate) # print coefficient +## cor +## -0.827 +print(scales::pvalue(r$p.value)) # print pretty p value +## [1] "<0.001" +print(r$parameter) # print df +``` + +## Code 7.9 + +```{r} +#| label: 7-9 +#| warning: false +library(MusicScienceData) # loads library w data +d1 <- MusicScienceData::soundtrack # get ratings +d2 <- MusicScienceData::soundtrack_features[,c(2:3,5:6)] # select only some features +d1[,17:21] <- as.data.frame(scale(d2)) # normalise + +tmp <- cor(d1[,c(3,17:20)]) # get correlations +print(round(tmp[2:5,1],2)) # display first line +``` + +## Code 7.10 + +```{r} +#| label: 7-10 +#| warning: false +model.reg <- lm(Energy ~ RMS + sp_centr + spec_rolloff + + spec_zcr, data = d1) +s <- summary(model.reg) # R2adj = 0.424 (Energy) +print(s) +``` + +## Code 7.11 + +```{r} +#| label: 7-11 +#| warning: false +r <- cor(d1$Energy, d1$RMS) +print( r^2 ) # print the squared correlation +summary(lm(Energy ~ RMS,data=d1)) # Summarise regression +``` diff --git a/docs/Chapter9.1.qmd b/docs/Chapter9.1.qmd new file mode 100644 index 0000000..3b2295f --- /dev/null +++ b/docs/Chapter9.1.qmd @@ -0,0 +1,107 @@ +--- +toc: true +--- + +Open In Colab + +# Ch. 9 - Music analysis + +This is a very simple demonstration of several built-in analytical functions in music21. We carry out a reduction of Bach chorale, analysis of harmony using Roman numerals and add Lerdahl and Jackendof type of metrical hierarchy. + + +## Install Music21 (in Colab) + +::: {.callout-important} +The first code segment is to install `music21` and other elements needed to run the environment. In Colab, press `play` and wait for all commands to be executed - this initial command might take some time as it needs to build the `musi21` environment. +::: + +## Install Music21 and Musescore in local machine + +::: {.callout-important} +For instructions of how to get music21 and Musescore working on a local machine, see [Installation guidelines from music21](https://web.mit.edu/music21/doc/installing/index.html). +::: + +::: {.callout-note} +The script below uses a workaround where the excerpts are first written to a xml file and then converted to png image. +::: + +```{python} +#| echo: false +#| eval: false +#| id: install + +!pip install --upgrade music21 +!add-apt-repository ppa:mscore-ubuntu/mscore-stable -y +!apt-get update +!apt-get install musescore +!apt-get install xvfb +!sh -e /etc/init.d/x11-common start +import os +os.putenv('DISPLAY', ':99.0') +!start-stop-daemon --start --pidfile /var/run/xvfb.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1024x768x24 -ac +extension GLX +render -noreset +from music21 import * +us = environment.UserSettings() +us['musescoreDirectPNGPath'] = '/usr/bin/mscore' +us['directoryScratch'] = '/tmp' +``` + +## Harmonic and metrical analysis + +Harmonic and metrical analysis of an example excerpt `bach/bwv30.6` using `music21`. + +### Harmonic analysis – Reduction + +First get a Bach chorale. + +```{python} +#| echo: true +#| eval: true +#| label: analyse + +from music21 import * # activate library + +### 1 Select one example from Bach chorales +bwv30_6 = corpus.parse('bach/bwv30.6.xml')# Take an example +#bwv30_6.measures(1, 3).show() # Display 3 bars +bwv30_6.measures(1, 3).write('xml', fp='output.xml') +!mscore output.xml -o images/score1.png --trim-image 0 +``` + +![](images/score1-1.png) + +### Harmonic analysis + +Analyse chords using Roman numerals. + +```{python} +#| label: harmonic +bChords = bwv30_6.chordify() # Slice the chords +for c in bChords.recurse().getElementsByClass('Chord'): + c.closedPosition(forceOctave=4, inPlace=True) +# Run analysis and add Roman numerals as lyrics +for c in bChords.recurse().getElementsByClass('Chord'): + rn = roman.romanNumeralFromChord(c, key.Key('A')) + c.addLyric(str(rn.figure)) +bChords.measures(0, 3).show() # Display the result +bChords.measures(0, 3).write('xml', fp='output.xml') +!mscore output.xml -o images/score2.png --trim-image 0 +``` + +![](images/score2-1.png) + +### Metrical analysis + +Carry out metrical analysis. + +```{python} +#| label: metrical +bass = bwv30_6.getElementById('Bass') # Get the bass part +excerpt = bass.measures(1,3) # Bar 1 through 3 +analysis.metrical.labelBeatDepth(excerpt)# Metrical analysis +#excerpt.show() # Display the results +excerpt.write('xml', fp='output.xml') +!mscore output.xml -o images/score3.png --trim-image 0 +!rm output.xml +``` + +![](images/score3-1.png) \ No newline at end of file diff --git a/docs/Chapter9.2.qmd b/docs/Chapter9.2.qmd new file mode 100644 index 0000000..328631a --- /dev/null +++ b/docs/Chapter9.2.qmd @@ -0,0 +1,264 @@ + +# Ch. 9 - Event counts + +Install `music21` and other elements needed to run this in Colab environment. Press play and wait for all commands to be executed - this initial command might take some time as it needs to build the musi21 environment. + +```{python} +#| echo: false +#| eval: false +!pip install --upgrade music21 +!add-apt-repository ppa:mscore-ubuntu/mscore-stable -y +!apt-get update +!apt-get install musescore +!apt-get install xvfb +!sh -e /etc/init.d/x11-common start +import os +os.putenv('DISPLAY', ':99.0') +!start-stop-daemon --start --pidfile /var/run/xvfb.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1024x768x24 -ac +extension GLX +render -noreset +from music21 import * +us = environment.UserSettings() +us['musescoreDirectPNGPath'] = '/usr/bin/mscore' +us['directoryScratch'] = '/tmp' +``` + +## Event counts + +```{python} +#| echo: true +#| eval: false +from music21 import * # activate library +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +``` + +```{python} +#| echo: true +#| eval: true +from music21 import * # activate library +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt +# Define pieces +# These two piece are related, same piece, different harmonisation +bwv110_7 = corpus.parse('bach/bwv110.7.xml') # bwv110.7 +bwv40_3 = corpus.parse('bach/bwv40.3.xml') +``` + +### Extract key and trasponse to common tonic + +```{python} +#| echo: true +#| eval: true +k = bwv110_7.analyze('key') +print(k) +i = interval.Interval(k.tonic, pitch.Pitch('C')) +print(i) +bwv110_7 = bwv110_7.transpose(i) + +k = bwv40_3.analyze('key') +print(k) +i = interval.Interval(k.tonic, pitch.Pitch('C')) +print(i) +bwv40_3 = bwv40_3.transpose(i) + +print('====== Transposed') +t = bwv110_7.analyze('key') +print(t) +t = bwv40_3.analyze('key') +print(t) +``` + +### Calculate pitch-class distribution + +```{python} +#| echo: true +#| eval: true +# pitch-class +pcCount = analysis.pitchAnalysis.pitchAttributeCount(bwv110_7, 'pitchClass') +pc = range(0, 12) +pitchclass = ('C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B') +l_pcCount = [[i, pcCount[i]]for i in pc] +d = pd.DataFrame(data=l_pcCount, columns=['pc_nro', 'count']) +d['Percentage'] = d['count'] / sum(d['count']) +d["Pitch-Class"] = pitchclass +d["Piece"] = 'BWV 110/7' + +pcCount2 = analysis.pitchAnalysis.pitchAttributeCount(bwv40_3, 'pitchClass') +l_pcCount2 = [[i, pcCount2[i]]for i in pc] +d2 = pd.DataFrame(data=l_pcCount2, columns=['pc_nro', 'count']) +d2['Percentage'] = d2['count'] / sum(d2['count']) +d2["Pitch-Class"] = pitchclass +d2["Piece"] = 'BWV 40/3' + +PC = pd.concat([d, d2]) +``` + +### Calculate interval distribution + +```{python} +# intervals +#| echo: true +#| eval: true +df=[] +df2=[] +counts = dict() # add empty dictionary +for x in range(-12, 13): + counts[x] = 0 + +for part in bwv110_7.recurse().parts: + p = part.recurse(classFilter=('Note', 'Rest')) # this is ok but loses rests + intervalStream1 = p.melodicIntervals(skipOctaves=True,skipRests=True) + items = [] + for i in intervalStream1.recurse(): + items.append(i.semitones) + for j in items: + counts[j] = counts.get(j, 0) + 1 + +df = pd.DataFrame({'Interval': list(counts.keys()), + 'Counts': list(counts.values())}) +df['Percentage'] = df['Counts'] / sum(df['Counts']) +df["Piece"] = 'BWV 110/7' + +for part in bwv40_3.recurse().parts: + p = part.recurse(classFilter=('Note', 'Rest')) # this is ok but loses rests + intervalStream1 = p.melodicIntervals() + items = [] + for i in intervalStream1.recurse(): + items.append(i.semitones) + for j in items: + counts[j] = counts.get(j, 0) + 1 + +df2 = pd.DataFrame({'Interval': list(counts.keys()), + 'Counts': list(counts.values())}) +df2['Percentage'] = df2['Counts'] / sum(df2['Counts']) +df2["Piece"] = 'BWV 40/3' + +IV = pd.concat([df, df2]) +``` +### Calculate duration distribution + +```{python} +#| echo: true +#| eval: true +# durations + +part = bwv110_7.recurse().parts +p = part.recurse() +durCount = analysis.elements.attributeCount(p, 'quarterLength') +du = pd.DataFrame({'Duration': list(durCount.keys()), + 'Counts': list(durCount.values())}) +du['Percentage'] = du['Counts'] / sum(du['Counts']) + +filter = (du['Duration'] < 10) +du = du[filter] +filter = (du['Duration'] >= 0.25) +du = du[filter] +du["Piece"] = 'BWV 110/7' + +part = bwv40_3.recurse().parts +p = part.recurse() +durCount = analysis.elements.attributeCount(p, 'quarterLength') +du2 = pd.DataFrame({'Duration': list(durCount.keys()), + 'Counts': list(durCount.values())}) +du2['Percentage'] = du2['Counts'] / sum(du2['Counts']) + +filter = (du2['Duration'] < 10) +du2 = du2[filter] +filter = (du2['Duration'] >= 0.25) +du2 = du2[filter] +du2["Piece"] = 'BWV 40/3' + +DU = pd.concat([du, du2]) +``` + +## Create plots + +```{python} +#| echo: true +#| eval: true +#| warning: false + +## Set graphic params +sns.set_theme() +sns.set_style("whitegrid") +colors = ["#b8b6b6", "#636362"] +customPalette = sns.set_palette(sns.color_palette(colors)) +sns.set_palette(customPalette) + +plt.rcParams["figure.figsize"] = [7.6, 10.0] +plt.rcParams["figure.autolayout"] = True + +f, axes = plt.subplots(3, 1) +g = sns.barplot(x='Pitch-Class', y='Percentage', data=PC, + orient='v', ax=axes[0], hue='Piece') +g.legend_.remove() +axes[0].text(11, 0.18, "$\chi^2=7.2, p=0.70$", horizontalalignment='right', size='x-small', color='black') + +bar_plot = sns.barplot(x='Interval', y='Percentage', + data=IV, orient='v', ax=axes[1], hue='Piece') +for index, label in enumerate(bar_plot.get_xticklabels()): + if index % 2 == 1: + label.set_visible(True) + else: + label.set_visible(False) + +axes[1].text(25, 0.12, "$\chi^2=17.2, p=0.37$", horizontalalignment='right', size='x-small', color='black') + +h = sns.barplot(x='Duration', y='Percentage', data=DU, + orient='v', ax=axes[2], hue='Piece') + +axes[2].text(5.25, 0.45, "$\chi^2=3.9, p=0.55$", horizontalalignment='right', size = 'x-small', color='black') + +h.legend_.remove() +plt.show() +``` +## Statistics + +### Pitch-class + +```{python} +#| echo: true +#| eval: true +#| output: asis + +from scipy import stats +import numpy as np + +PC2 = PC.pivot(index='pc_nro', columns='Piece', values='count') +PC2['Sum'] = PC2.sum(axis=1) +PC3 = PC2[PC2.Sum != 0] +obs2 = np.array([PC3['BWV 110/7'], PC3['BWV 40/3']]) +c, p, dof, exp = stats.chi2_contingency(obs2) +print(f"_Chi_$^2$ value = {round(c,2)}, _p_-value = {round(p,3)}, _df_ = {dof}") +``` + +### Intervals + +```{python} +#| echo: true +#| eval: true +#| output: asis + +IV2 = IV.pivot(index='Interval', columns='Piece', values='Counts') +IV2['Sum'] = IV2.sum(axis=1) +IV3 = IV2[IV2.Sum != 0] +obs2 = np.array([IV3['BWV 110/7'], IV3['BWV 40/3']]) +c, p, dof, exp = stats.chi2_contingency(obs2) +print(f"_Chi_$^2$ value = {round(c,2)}, _p_-value = {round(p,3)}, _df_ = {dof}") +``` +### Durations + +```{python} +#| echo: true +#| eval: true +#| output: asis +DU2 = DU.pivot(index='Duration', columns='Piece', values='Counts') +DU2 = DU2.replace(np.nan,0) +DU2['Sum'] = DU2.sum(axis=1) +DU3 = DU2[DU2.Sum != 0] + +obs2 = np.array([DU3['BWV 110/7'], DU3['BWV 40/3']]) +c, p, dof, exp = stats.chi2_contingency(obs2) +print(f"_Chi_$^2$ value = {round(c,2)}, _p_-value = {round(p,3)}, _df_ = {dof}") +``` diff --git a/docs/Chapter9.3.qmd b/docs/Chapter9.3.qmd new file mode 100644 index 0000000..a083668 --- /dev/null +++ b/docs/Chapter9.3.qmd @@ -0,0 +1,87 @@ + +# Ch. 9 – Key-finding + +Install `music21` and other elements needed to run the environment +Press play and wait for all commands to be executed - this initial command might take some time as it needs to build the `music21` environment. + +```{python} +#| echo: false +#| eval: false +!pip install --upgrade music21 +!add-apt-repository ppa:mscore-ubuntu/mscore-stable -y +!apt-get update +!apt-get install musescore +!apt-get install xvfb +!sh -e /etc/init.d/x11-common start +import os +os.putenv('DISPLAY', ':99.0') +!start-stop-daemon --start --pidfile /var/run/xvfb.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1024x768x24 -ac +extension GLX +render -noreset +from music21 import * +us = environment.UserSettings() +us['musescoreDirectPNGPath'] = '/usr/bin/mscore' +us['directoryScratch'] = '/tmp' + +!wget https://raw.githubusercontent.com/tuomaseerola/music21/master/bwv306.musicxml +``` + +## Key-finding + +Key-finding algorithm applied to an example excerpt (`bach/bwv30.6`). + +```{python} +import sys +from music21 import * # activate library +import pandas as pd +``` + +```{python} +bwv30_6 = corpus.parse('bach/bwv30.6.xml')# 30.6 +print(bwv30_6.analyze('key.krumhanslkessler')) +bwv30_6_3meas = bwv30_6.measures(1,4) # First 3 measures + +KK = analysis.discrete.KrumhanslKessler() # Key profile +wa = analysis.windowed.WindowedAnalysis(bwv30_6_3meas, KK) +a,b = wa.analyze(2, windowType='overlap') + +keyclar=[]; mode=[]; key=[] +for x in range(len(a)): + key.append(a[x][0]) + mode.append(a[x][1]) + keyclar.append(a[x][2]) +data=pd.DataFrame({'key':key,'mode':mode,'r':keyclar}) +print(data) +``` + +## Tension + +Analysis of tonal tension using the model by Herremans and Chew (2016), implemented in `partitura` library for Python. + +```{python} +#| echo: false +#| eval: false +! pip install partitura +``` + +```{python} +#| echo: true +#| eval: true +import partitura +import numpy as np +``` + +```{python} +part = partitura.load_musicxml('data/bwv306.musicxml') +tonal_tension = partitura.musicanalysis.estimate_tonaltension(part, ss='onset') +x = getattr(tonal_tension['onset_beat'][0:50], "tolist", lambda: value)() +y = tonal_tension['cloud_momentum'][0:50] + +d = {'beat': x,'tension': y} +df = pd.DataFrame(data=d) +print(df) +``` + +### References + +- Herremans, D., & Chew, E. (2016). Tension ribbons: Quantifying and visualising tonal tension. Second International Conference on Technologies for Music Notation and Representation. _TENOR, 2_. + + diff --git a/docs/Chapter9.4.qmd b/docs/Chapter9.4.qmd new file mode 100644 index 0000000..4624c1e --- /dev/null +++ b/docs/Chapter9.4.qmd @@ -0,0 +1,123 @@ + + +# Ch. 9 - Expressive Timing + +This notebook demonstrates expressive timing profiles from real performances from [https://github.com/fosfrancesco/asap-dataset](https://github.com/fosfrancesco/asap-dataset). + +## Load libraries + +```{r} +#| echo: true +#| eval: true +#| warning: false +#| message: false +#| label: libraries +library(ggplot2, quietly = TRUE) +library(tidyverse, quietly = TRUE) +library(dplyr, quietly = TRUE) +``` + +## Get data from ASAP project + +This gets the metadata from ASAP project (see Foscarin et al., 2020) and selects Preludes op 23.4. + +```{r} +#| echo: true +#| eval: true +#| output: asis +#| id: data +d <- read.csv("https://raw.githubusercontent.com/fosfrancesco/asap-dataset/master/metadata.csv",header = TRUE,sep = ',') +df<-dplyr::filter(d,title=='Preludes_op_23_4') +df<-df[1:3,] +print(knitr::kable(head(df[,1:3],3))) +``` + +## Read score annotations + +```{r} +#| eval: true +#| output: asis +#| label: read +basedir <-'https://raw.githubusercontent.com/tuomaseerola/emr/master/' +deadpan <- read.csv(paste0(basedir,'data/midi_score_annotations.txt'),header = FALSE, sep = '\t') +print(knitr::kable(head(deadpan,3))) +fn <- NULL +fn[1]<-'data/ChenGuang12M_annotations.txt' +fn[2]<-'data/MorozovS09_annotations.txt' +fn[3]<-'data/WuuE07M_annotations.txt' +Performer <- c('Chen Guang','Yevgeny Morozov','Elliot Wuu') +``` + +```{r} +#| echo: false +#| eval: true +#| label: add_function + +normperf <- function(d=NULL) { + # function to normalise performance timing given score + # And calculate deviation in ms + d$perf_N<-d$perf + d$perf_N<-d$perf_N - min(d$perf_N) + d$perf_N <- d$perf_N * (max(d$score)/max(d$perf_N)) + d$delta <- d$perf_N - d$score + d$delta2 <- c(0,diff(d$delta)*1000) + d$scoredelta <- c(0,diff(d$score)*1000) + d$rawperfdelta <- c(0,diff(d$perf)*1000) + d$rawperf_Ndelta <- c(0,diff(d$perf_N)*1000) + d$scoredelta_rawperf_Ndelta <- d$rawperf_Ndelta - d$scoredelta + d$annotation<-d$annotation + result <- d + return(result) +} +``` + +## Choose extract from all performers + +```{r} +#| echo: true +#| eval: true +#| output: asis +#| label: extract +D <- NULL +for (k in 1:length(fn)) { + perf<-read.csv(paste0(basedir,fn[k]),header=F,sep='\t') + DF<-data.frame(score=deadpan$V1,perf=perf$V1, + annotation=deadpan$V3) + DF <- dplyr::filter(DF,score < 30) # Limit to first 10 bars = 3*10 beats + DF2 <- normperf(DF) # Defined previouslys + DF2$Performer<-Performer[k] + D<-rbind(D,DF2) +} + +options(encoding = "UTF-8") +#library(dplyr) +DF <- dplyr::filter(D,score < 30) # First 10 bars = 3*10 beats +print(knitr::kable(head(DF[,1:6],3))) +``` +## Plot expressive timing deviations + +```{r} +#| echo: true +#| eval: true +#| warning: false +#| label: plot +options(repr.plot.width = 12, repr.plot.height = 5) +g1 <- ggplot(DF,aes(x=perf_N,y=scoredelta_rawperf_Ndelta,colour=Performer,shape=Performer))+ + geom_line(alpha=0.85)+ + geom_point(alpha=0.85,size=2.5)+ + scale_color_grey(start = 0.30,end = 0.8)+ + geom_smooth(aes(colour = NULL,shape=NULL), method = "loess", span=0.2,se=FALSE,colour='black',linewidth=1.25)+ + scale_x_continuous(limits=c(0,30),breaks = seq(0,30,by=3),expand = c(0.02,0.002),labels=(seq(0,30,by=3)/3)+1) + + xlab('Bar')+ + ylab('Deviation in ms')+ + theme_bw()+ + theme(legend.position=c(.85, .80))+ + theme(legend.background = element_blank()) + # Remove overall border + theme(legend.key = element_blank()) +print(g1) +``` + +## References + +- Foscarin, F., Mcleod, A., Rigaux, P., Jacquemard, F., & Sakai, M. (2020). ASAP: a dataset of aligned scores and performances for piano transcription. In _International Society for Music Information Retrieval Conference_ (pp. 534-541). + diff --git a/docs/Chapter9.5.qmd b/docs/Chapter9.5.qmd new file mode 100644 index 0000000..cb2c490 --- /dev/null +++ b/docs/Chapter9.5.qmd @@ -0,0 +1,83 @@ + +# Ch. 9 – Synchronization + +## Load libraries + +```{r} +#| echo: true +#| eval: true +#| warning: false +#| message: false +#| label: libraries +#if (!require(devtools)) install.packages("devtools",quiet=TRUE) +#devtools::install_github("tuomaseerola/onsetsync") +library(onsetsync) +library(dplyr,quiet=TRUE) +#install.packages("cowplot",quiet=TRUE) +library(cowplot) +``` + +## Explore synchronisation in Cuban Salsa and Son + +Take an example track from IEMP corpus and visualise beats and calculate the synchronies. + +```{r} +#| echo: true +#| eval: true +#| warning: false +#| message: false +#| label: plot +set.seed(1234) +CSS_Song2 <- onsetsync::CSS_IEMP[[2]] +fig1 <- plot_by_beat(df = CSS_Song2, + instr = c('Bass','Clave','Guitar','Tres'), + beat = 'SD', + virtual = 'Isochronous.SD.Time', + pcols = 2) +inst <- c('Clave','Bass','Guitar','Tres') # Define instruments +dn <- sync_execute_pairs(CSS_Song2,inst,100,1,'SD') +fig2 <- plot_by_pair(dn) # plot +G <- cowplot::plot_grid(fig1,fig2,nrow = 2) +print(G) +``` + +```{r} +#| echo: true +#| eval: true +#| warning: false +#| message: false +#| label: output1 +round(mean(dn$asynch$`Clave - Guitar`)*1000,1) +``` + +```{r} +#| echo: true +#| eval: true +#| warning: false +#| message: false +#| label: output2 +round(mean(dn$asynch$`Clave - Bass`)*1000,1) +``` + +```{r} +#| echo: true +#| eval: true +#| warning: false +#| message: false +#| label: output3 +round(mean(dn$asynch$`Bass - Guitar`)*1000,1) +``` + +```{r} +#| echo: true +#| eval: true +#| warning: false +#| message: false +#| label: output4 +round(mean(dn$asynch$`Bass - Tres`)*1000,1) +``` + +## References + +Poole, A. (2021). Groove in Cuban Son and Salsa Performance. _Journal of the Royal Musical Association, 146(1)_, 117-145. doi:10.1017/rma.2021.2 + diff --git a/docs/Citation.qmd b/docs/Citation.qmd new file mode 100755 index 0000000..15580a7 --- /dev/null +++ b/docs/Citation.qmd @@ -0,0 +1,41 @@ + +# Citation + +## Book bibliographic details in APA format: + +::: {.callout-note} +Eerola, T. (in press). _Music and Science – Guide to Empirical Music Research_. SEMPRE Studies in the Psychology of Music. London, UK: Routledge. +::: + +## Book bibliographic details in `bibtex` format: + +```{python} +#| eval: false +@book{eerola2024, + address = {London, UK}, + author = {Eerola, T.}, + publisher = {Routledge}, + series = {SEMPRE Studies in the Psychology of Music}, + title = {Music and Science -- Guide to Empirical Music Research}, + year = {2024} +} +``` + +## Website bibliographic details in APA format: + +::: {.callout-note} +Eerola, T. (2024). _Music and Science – Guide to Empirical Music Research_. [https://tuomaseerola.github.io/emr/](https://tuomaseerola.github.io/emr/) +::: + +## Website bibliographic details in `bibtex` format: + +```{python} +#| eval: false +@misc{eerola2024online, + author = {Eerola, T.}, + title = {Music and Science -- Guide to Empirical Music Research}, + year = {2024}, + url = {https://tuomaseerola.github.io/emr/}, + urldate = {2024-1-1} +} +``` diff --git a/docs/Links.qmd b/docs/Links.qmd new file mode 100644 index 0000000..218dd96 --- /dev/null +++ b/docs/Links.qmd @@ -0,0 +1,46 @@ + +# Links + +This notebook gives links to online sources related to R and Python and statistics with complete online examples.. + + +## Online Tutorials (R and Python) + +[An Introduction to R](http://cran.r-project.org/doc/manuals/r-release/R-intro.html) +: The official guidance from _The Comprehensive R Archive Network (CRAN)_. May not be always the most compelling introduction but exhaustive at least. + +[Quick-R](http://www.statmethods.net) +: Really good source of R examples for almost all operations (manipulation, representation, functions, syntax, stats, figures, etc.). + +[R tutorials](http://www.cyclismo.org/tutorial/R/) +: Another fairly clear collection of tutorials. + +[RStudio online learning pages](http://www.rstudio.com/resources/training/online-learning/#R) +: [R Studio](http://www.rstudio.com) is fancy and great visual GUI on top of the R for all platforms and they have released very useful documentations, tutorials, demos, etc. + +[Advanced R](http://adv-r.had.co.nz) +: Author of the best packages, Hadley Wickham, has created this resource (book and online version). + +## Statistics Handbooks with complete R scripts (online) + +[Practical Regression and Anova using R](http://cran.r-project.org/doc/contrib/Faraway-PRA.pdf) +: A handbook of the basic statistical operations written by Julian Faraway. + +[Data Analysis and Graphics Using R - An Example-Based Approach](http://maths-people.anu.edu.au/~johnm/r-book/daagur3.html) +: Handbook in 3rd printing, written by John Maindonald and John Braun. This source contains exercises, slides, the scripts for all graphs of the book, etc. + +## Other Online Resources + +[R blogger](http://www.r-bloggers.com) +: Multipurpose source for news and latest issues in R. + +[Collection of Resources at CRAN](http://cran.r-project.org/other-docs.html) +: Large collection of different resources (e.g. R for Matlab-minded, Fitting Distributions with R, Reference Cards, Data-mining with R, and so on). + +[R Documentation](http://www.rdocumentation.org) +: Searchable online documentation + +[StackOverflow](http://stackoverflow.com/questions/tagged/r) +: Forum of questions and answers about computer programming, including R. Contains over 120,000 questions related to R. + + diff --git a/docs/Technical.qmd b/docs/Technical.qmd new file mode 100755 index 0000000..698ec70 --- /dev/null +++ b/docs/Technical.qmd @@ -0,0 +1,32 @@ + +# Technical Notes + +_Jupyter notebooks_ have been made to be compatible with Colab. For _Python_, the libraries included are in December 2022: + +- Python version 3.10.12 +- matplotlib version 3.7.1 +- librosa version 0.10.1 +- numpy version 1.25.2 + +and for _R_, the version in the Colab are: + +- `R version 4.2.2` +- `tidyverse 1.3.1` +- `ggplot2 3.4.0` + +Some R notebooks install extra libraries. Most of the R notebooks require installing `MusicScienceData` library available at https://github.com/tuomaseerola/MusicScienceData. + +In R, this library can be installed: + +```{r} +#| echo: true +#| eval: false +#| label: libraries +if (!require(devtools)) install.packages("devtools",quiet=TRUE) +devtools::install_github("tuomaseerola/MusicScienceData@main",quiet=TRUE) + +library(MusicScienceData) +``` + +- See [Python](Version_Python.qmd) and [R version](Version_R.qmd). + diff --git a/docs/Version_Python.qmd b/docs/Version_Python.qmd new file mode 100644 index 0000000..3a37031 --- /dev/null +++ b/docs/Version_Python.qmd @@ -0,0 +1,25 @@ + +# Establish Python library versions within Colab + +In March 2024 Colab had: + +- Python version 3.10.12 +- matplotlib version 3.7.1 +- librosa version 0.10.1 +- numpy version 1.25.2 + +```{python} +#| echo: true +#| eval: true +#| label: python-versions +print('Current system has:') +import platform +print('Python version ' + platform.python_version()) +import matplotlib +print('matplotlib version ' + matplotlib.__version__) +import librosa +import librosa.display +print('librosa version ' + librosa.__version__) +import numpy as np +print('numpy version ' + np.__version__) +``` diff --git a/docs/Version_R.qmd b/docs/Version_R.qmd new file mode 100644 index 0000000..060bbac --- /dev/null +++ b/docs/Version_R.qmd @@ -0,0 +1,43 @@ + +# Establish R library versions within Colab + +In March 2024 Colab version of R is: + +```{r} +#| echo: true +#| eval: false +R version 4.3.3 (2024-02-29) +tidyverse 2.0.0’ +ggplot2 3.4.4’ +``` + +Test the Colab version of R. + +```{r} +#| echo: true +#| eval: true +#| label: versions +print(R.version.string) +``` + +## Libraries + +Show libraries + +```{r} +#| echo: true +#| eval: true +#| label: libraries +#print(installed.packages()) +packageVersion("tidyverse") +packageVersion("ggplot2") +``` + +## Show other information + +```{r} +#| echo: true +#| eval: true +#| label: sessioninfo +sessionInfo() +``` diff --git a/docs/index.html b/docs/index.html index fd5450f..fd4660b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -116,24 +116,6 @@ @@ -340,8 +208,8 @@

Music and Science – Guide to Empirical Music Research (Book) I am currently working towards finishing this file. -->

This repository contains the electronic materials for Routledge book titled Music and Science – Guide to Empirical Music Research by Tuomas Eerola, at Durham University and Music and Science Lab. The book is scheduled to be published in 2024.

-

The materials are Jupyter notebooks with code examples in R or Python). The notebooks are designed to be run in Google Colab within a browser. Colab service is free and does not require any installations or software. It is also possible to run the notebooks within a Jupyter server in your own computer or to use the code directly in R or in Python. See technical notes about the notebooks and package versions in Colab.

-

The public-facing version of this repository is at https://tuomaseerola.github.io/emr/. See citation for bibliographic details of the book.

+

The materials are Jupyter notebooks with code examples in R or Python). The notebooks are designed to be run in Google Colab within a browser. Colab service is free and does not require any installations or software. It is also possible to run the notebooks within a Jupyter server in your own computer or to use the code directly in R or in Python. See technical notes about the notebooks and package versions in Colab.

+

The public-facing version of this repository is at https://tuomaseerola.github.io/emr/. See citation for bibliographic details of the book.

ver 5/4/2024

Contents

@@ -362,7 +230,7 @@

Contents

-  Ch. 1 - Notebook basics +  Ch. 1 - Notebook basics Chapter1.ipynb Colab R @@ -386,7 +254,7 @@

Contents

-  Ch. 3 - Historic profiles +  Ch. 3 - Historic profiles Chapter3.ipynb Colab R @@ -398,7 +266,7 @@

Contents

-  Ch. 4 - Correlations +  Ch. 4 - Correlations Chapter4.ipynb Colab R @@ -416,25 +284,25 @@

Contents

-  Ch. 6 - Using R +  Ch. 6 - Using R Chapter6.1.ipynb Colab R -  Ch. 6 - Data organisation +  Ch. 6 - Data organisation Chapter6.2.ipynb Colab R -  Ch. 6 - Diagnostics +  Ch. 6 - Diagnostics Chapter6.3.ipynb Colab R -  Ch. 6 - Outliers +  Ch. 6 - Outliers Chapter6.4.ipynb Colab R @@ -446,7 +314,7 @@

Contents

-  Ch. 7 - Inferential statistics +  Ch. 7 - Inferential statistics Chapter7.ipynb Colab R @@ -464,31 +332,31 @@

Contents

-  Ch. 9 - Music analysis +  Ch. 9 - Music analysis Chapter9.1.ipynb Colab Python -  Ch. 9 - Event counts +  Ch. 9 - Event counts Chapter9.2.ipynb Colab Python -  Ch. 9 - Key-Finding +  Ch. 9 - Key-Finding Chapter9.3.ipynb Colab Python -  Ch. 9 - Expressive timing +  Ch. 9 - Expressive timing Chapter9.4.ipynb Colab R -  Ch. 9 - Synchronisation +  Ch. 9 - Synchronisation Chapter9.5.ipynb Colab R @@ -500,31 +368,31 @@

Contents

-  Ch. 10 - Basics (sines) +  Ch. 10 - Basics (sines) Chapter10.1.ipynb Colab Python -  Ch. 10 - Spectrum and envelope +  Ch. 10 - Spectrum and envelope Chapter10.2.ipynb Colab Python -  Ch. 10 - Physical +  Ch. 10 - Physical Chapter10.3.ipynb Colab Python -  Ch. 10 - Perceptual +  Ch. 10 - Perceptual Chapter10.4.ipynb Colab Python -  Ch. 10 - Semantic +  Ch. 10 - Semantic Chapter10.5.ipynb Colab Python @@ -542,7 +410,7 @@

Contents

Python -  Ch. 11 - Synchrony +  Ch. 11 - Synchrony Chapter11.3.qmd Colab R @@ -560,31 +428,31 @@

Contents

-  Citation +  Citation Markdown -  Technical +  Technical Markdown -  Links +  Links Markdown -  Version (R) +  Version (R) Version_R.ipynb Colab R -  Version (Python) +  Version (Python) Version_Python.ipynb Colab Python @@ -677,7 +545,7 @@

Contents

}); var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//); var mailtoRegex = new RegExp(/^mailto:/); - var filterRegex = new RegExp('/' + window.location.host + '/'); + var filterRegex = new RegExp("https:\/\/tuomaseerola\.github\.io\/emr\/index\.html"); var isInternal = (href) => { return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href); } @@ -1019,12 +887,12 @@

Contents

diff --git a/docs/robots.txt b/docs/robots.txt new file mode 100644 index 0000000..5201bee --- /dev/null +++ b/docs/robots.txt @@ -0,0 +1 @@ +Sitemap: https://tuomaseerola.github.io/emr/index.html/sitemap.xml diff --git a/docs/search.json b/docs/search.json index be19430..ab722b0 100644 --- a/docs/search.json +++ b/docs/search.json @@ -1,1181 +1,4 @@ [ - { - "objectID": "Technical.html", - "href": "Technical.html", - "title": "Technical Notes", - "section": "", - "text": "Technical Notes\nJupyter notebooks have been made to be compatible with Colab. For Python, the libraries included are in December 2022:\n\nPython version 3.10.12\nmatplotlib version 3.7.1\nlibrosa version 0.10.1\nnumpy version 1.25.2\n\nand for R, the version in the Colab are:\n\nR version 4.2.2\ntidyverse 1.3.1\nggplot2 3.4.0\n\nSome R notebooks install extra libraries. Most of the R notebooks require installing MusicScienceData library available at https://github.com/tuomaseerola/MusicScienceData.\nIn R, this library can be installed:\n\nif (!require(devtools)) install.packages(\"devtools\",quiet=TRUE)\ndevtools::install_github(\"tuomaseerola/MusicScienceData@main\",quiet=TRUE)\n\nlibrary(MusicScienceData)\n\n\nSee Python and R version.\n\n\n\n\n\n Back to top", - "crumbs": [ - "Other", - "Technical Notes" - ] - }, - { - "objectID": "Version_Python.html", - "href": "Version_Python.html", - "title": "Establish Python library versions within Colab", - "section": "", - "text": "In March 2024 Colab had:\n\nPython version 3.10.12\nmatplotlib version 3.7.1\nlibrosa version 0.10.1\nnumpy version 1.25.2\n\n\nprint('Current system has:')\nimport platform\nprint('Python version ' + platform.python_version())\nimport matplotlib\nprint('matplotlib version ' + matplotlib.__version__)\nimport librosa\nimport librosa.display\nprint('librosa version ' + librosa.__version__)\nimport numpy as np\nprint('numpy version ' + np.__version__)\n\nCurrent system has:\nPython version 3.9.18\nmatplotlib version 3.7.2\nlibrosa version 0.10.1\nnumpy version 1.24.4\n\n\n\n\n\n Back to top" - }, - { - "objectID": "Links.html", - "href": "Links.html", - "title": "Links", - "section": "", - "text": "This notebook gives links to online sources related to R and Python and statistics with complete online examples..\n\n\n\nAn Introduction to R\n\nThe official guidance from The Comprehensive R Archive Network (CRAN). May not be always the most compelling introduction but exhaustive at least.\n\nQuick-R\n\nReally good source of R examples for almost all operations (manipulation, representation, functions, syntax, stats, figures, etc.).\n\nR tutorials\n\nAnother fairly clear collection of tutorials.\n\nRStudio online learning pages\n\nR Studio is fancy and great visual GUI on top of the R for all platforms and they have released very useful documentations, tutorials, demos, etc.\n\nAdvanced R\n\nAuthor of the best packages, Hadley Wickham, has created this resource (book and online version).\n\n\n\n\n\n\nPractical Regression and Anova using R\n\nA handbook of the basic statistical operations written by Julian Faraway.\n\nData Analysis and Graphics Using R - An Example-Based Approach\n\nHandbook in 3rd printing, written by John Maindonald and John Braun. This source contains exercises, slides, the scripts for all graphs of the book, etc.\n\n\n\n\n\n\nR blogger\n\nMultipurpose source for news and latest issues in R.\n\nCollection of Resources at CRAN\n\nLarge collection of different resources (e.g. R for Matlab-minded, Fitting Distributions with R, Reference Cards, Data-mining with R, and so on).\n\nR Documentation\n\nSearchable online documentation\n\nStackOverflow\n\nForum of questions and answers about computer programming, including R. Contains over 120,000 questions related to R.", - "crumbs": [ - "Other", - "Links" - ] - }, - { - "objectID": "Links.html#online-tutorials-r-and-python", - "href": "Links.html#online-tutorials-r-and-python", - "title": "Links", - "section": "", - "text": "An Introduction to R\n\nThe official guidance from The Comprehensive R Archive Network (CRAN). May not be always the most compelling introduction but exhaustive at least.\n\nQuick-R\n\nReally good source of R examples for almost all operations (manipulation, representation, functions, syntax, stats, figures, etc.).\n\nR tutorials\n\nAnother fairly clear collection of tutorials.\n\nRStudio online learning pages\n\nR Studio is fancy and great visual GUI on top of the R for all platforms and they have released very useful documentations, tutorials, demos, etc.\n\nAdvanced R\n\nAuthor of the best packages, Hadley Wickham, has created this resource (book and online version).", - "crumbs": [ - "Other", - "Links" - ] - }, - { - "objectID": "Links.html#statistics-handbooks-with-complete-r-scripts-online", - "href": "Links.html#statistics-handbooks-with-complete-r-scripts-online", - "title": "Links", - "section": "", - "text": "Practical Regression and Anova using R\n\nA handbook of the basic statistical operations written by Julian Faraway.\n\nData Analysis and Graphics Using R - An Example-Based Approach\n\nHandbook in 3rd printing, written by John Maindonald and John Braun. This source contains exercises, slides, the scripts for all graphs of the book, etc.", - "crumbs": [ - "Other", - "Links" - ] - }, - { - "objectID": "Links.html#other-online-resources", - "href": "Links.html#other-online-resources", - "title": "Links", - "section": "", - "text": "R blogger\n\nMultipurpose source for news and latest issues in R.\n\nCollection of Resources at CRAN\n\nLarge collection of different resources (e.g. R for Matlab-minded, Fitting Distributions with R, Reference Cards, Data-mining with R, and so on).\n\nR Documentation\n\nSearchable online documentation\n\nStackOverflow\n\nForum of questions and answers about computer programming, including R. Contains over 120,000 questions related to R.", - "crumbs": [ - "Other", - "Links" - ] - }, - { - "objectID": "Chapter11.2.html#summary", - "href": "Chapter11.2.html#summary", - "title": "Ch. 11 – Genre classification", - "section": "Summary", - "text": "Summary\nThis notebook will look at a classic genre categorization study and dataset by Tzanetakis & Cook (2002) and will conduct a simple classification of genre based on acoustic features extracted from the audio files. The full data contains 100 audio excerpts from 10 different genres (1000 clips in total), but we are going to start with a smaller set to keep this light to run. It should be noted that the selection of the excerpts for this dataset were not particularly rigorous and represented the collection of music that George Tzanetakis had at his disposal at the time. And this dataset has some quirks and imperfections, but I think it is still a fun, classic and illustrative example to explore.", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#load-dataset", - "href": "Chapter11.2.html#load-dataset", - "title": "Ch. 11 – Genre classification", - "section": "Load dataset", - "text": "Load dataset\nWe first install mirdata library to the computer.\n\nimport sys\n!{sys.executable} -m pip install mirdata\n\nThen we initialise the library and download the audio excerpts needed. I only take 100 excerpts here but you can take all 1000 excerpts by altering the script below.\n\nimport mirdata\n#print(mirdata.list_datasets())\ngtzan_genre = mirdata.initialize('gtzan_genre')\ngtzan = mirdata.initialize('gtzan_genre', version='mini') # This is 100 excerpts\n#gtzan = mirdata.initialize('gtzan_genre') # This is 1000 excerpts (uncomment if you want to analyse the full data)\ngtzan.download()\nprint('Downloaded',len(gtzan.track_ids),'tracks')\n\nINFO: Downloading ['mini', 'tempo_beat_annotations'] to /Users/tuomaseerola/mir_datasets/gtzan_genre\nINFO: [mini] downloading main.zip\nINFO: /Users/tuomaseerola/mir_datasets/gtzan_genre/main.zip already exists and will not be downloaded. Rerun with force_overwrite=True to delete this file and force the download.\nINFO: [tempo_beat_annotations] downloading annot.zip\nINFO: /Users/tuomaseerola/mir_datasets/gtzan_genre/annot.zip already exists and will not be downloaded. Rerun with force_overwrite=True to delete this file and force the download.\n\n\nDownloaded 100 tracks\n\n\nLet’s look at an example (track ID 88).\n\nID = 88\ntracks = gtzan.load_tracks()\nex = tracks[gtzan.track_ids[ID]]\nprint([\"Genre:\", ex.genre, \"Name:\", ex.track_id, \"Tempo:\",ex.tempo,])\nprint(ex.audio[1])\nplt.figure(figsize=(8, 2))\nlibrosa.display.waveshow(y = ex.audio[0], sr = ex.audio[1])\nipd.display(ipd.Audio(data = ex.audio[0], rate = ex.audio[1]))\n\n['Genre:', 'pop', 'Name:', 'pop.00008', 'Tempo:', 84.1]\n22050\n\n\n\n \n \n Your browser does not support the audio element.", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#explore-features", - "href": "Chapter11.2.html#explore-features", - "title": "Ch. 11 – Genre classification", - "section": "Explore features", - "text": "Explore features\nLet’s look at some of features across genres. Are there differences in dynamics or brightness?\n\n\n rmse spec_cent spec_bw rolloff zcr spec_ctr\n0 0.036233 1505.357461 1559.228895 2717.238764 0.098223 23.372866\n1 0.030610 1361.006486 1441.739951 2389.011463 0.087766 25.186866\n2 0.043828 1490.274810 1600.005082 2785.418914 0.090046 22.894315\n3 0.029426 1526.628932 1499.462050 2916.150271 0.108946 25.663545", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#feature-space", - "href": "Chapter11.2.html#feature-space", - "title": "Ch. 11 – Genre classification", - "section": "Feature space", - "text": "Feature space\nHow large is our feature space and do have features that are redundant, that is highly similar to each other? This can be easily explored by visualising the correlations between all features.\n\ncorr = df.iloc[0:99,1:27].corr() # Compute the correlation matrix\nmask = np.triu(np.ones_like(corr, dtype=bool)) # Generate a mask for the upper triangle\nf, ax = plt.subplots(figsize=(8, 8)) # Define matplotlib figure\ncmap = sns.diverging_palette(230, 20, as_cmap=True) # Custom colormap\nsns.heatmap(corr, mask=mask, cmap=cmap, vmax=1.00, center=0,\n square=True, linewidths=.5, cbar_kws={\"shrink\": .5}) # Draw the heatmap with the mask and correct aspect ratio\nplt.show()\n\n\n\n\n\n\n\n\nClassification and machine-learning algorithms typically deal well with numerous features, but here we have only 100 observations and 39 variables, which is not a healthy proportion (too many variables compared to observations). Usually it is a good idea to have 10:1 or 15:1 or even 20:1 of observations to predictors. Based on the correlation matrix, what would you eliminate?\nFor instance, all chroma features have high positive correlations and some of the timbral features seem to be related. Let’s trim the selection as we have quite a little data when using the mini dataset.\n\ndf_trimmed = df.iloc[:,0:22]", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#cross-validation-of-the-model", - "href": "Chapter11.2.html#cross-validation-of-the-model", - "title": "Ch. 11 – Genre classification", - "section": "Cross-validation of the model", - "text": "Cross-validation of the model\nWe cross-validate the model, which means that we split the data into training and testing sets. We first train the model on the training set, which here is a randomly select 70% of the data. Once we have trained the model, we test it against the unseen data (test set, 30% of the data) to assess how the model performs. This could be done by alterning the selection of the training and testing set and we could do this 10 times and average the results (this is called k-fold cross-validation).", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#stratifying-the-sample", - "href": "Chapter11.2.html#stratifying-the-sample", - "title": "Ch. 11 – Genre classification", - "section": "Stratifying the sample", - "text": "Stratifying the sample\nWhen we randomly split the data into training and testing sets, we might want to stratify the data according to genre, which makes sure that we have similar proportion of examples from each genre at both sets.", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#normalize-variables", - "href": "Chapter11.2.html#normalize-variables", - "title": "Ch. 11 – Genre classification", - "section": "Normalize variables", - "text": "Normalize variables\nWe also want to normalize the variables. This is not so crucial for the random forest model that we are going to use, but usually it is good idea to eliminate the differences the feature ranges have to the model. To normalize the variables, we turn them into z-scores, where the mean is 0 and standard deviation is 1.\n\nimport pandas as pd\nimport sklearn as sk\nfrom sklearn.metrics import confusion_matrix\nfrom sklearn import preprocessing\nfrom sklearn.model_selection import train_test_split\nfrom sklearn import metrics\nfrom sklearn.ensemble import RandomForestClassifier\n\nX = df_trimmed.drop('genre', axis = 1)\nXn = preprocessing.normalize(X)\ny = df_trimmed['genre']\n\ntest_size = 0.30 # taking 70:30 training and test set\nseed = 9 # Random numbmer seeding for reapeatability of the code\nX_train, X_test, y_train, y_test = train_test_split(Xn, y, test_size=test_size, random_state=seed,stratify=y)\n\nRF = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0).fit(X_train, y_train)\n#RF.predict(X_test)\n#print(round(RF.score(X_test, y_test), 4))\ny_pred_test = RF.predict(X_test)\n\nAnd we have the results:\n\nprint('Correct classification rate:',round(RF.score(X_test, y_test), 4))\n\nCorrect classification rate: 0.7\n\n\nIn order to answer this question, you should think what a model that predicts nonsense would achieve by chance? You could also check how this model compares to the work published by Tzanetakis. Finally, might want to consider what is level of accuracy expected from listeners and there might be even studies about this to give you a benchmark.", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#visualise-confusion-matrix", - "href": "Chapter11.2.html#visualise-confusion-matrix", - "title": "Ch. 11 – Genre classification", - "section": "Visualise confusion matrix", - "text": "Visualise confusion matrix\nLet’s explore what kind of mistakes the model makes. Confusion matrix is a useful way to visualise this.\n\nimport seaborn as sns\n\n# Reshape\nmatrix = confusion_matrix(y_test, y_pred_test)\nmatrix = matrix.astype('float') / matrix.sum(axis=1)[:, np.newaxis]\n\n# Build the plot\nplt.figure(figsize=(8,5))\nsns.set(font_scale=1.4)\nsns.heatmap(matrix, annot=True, annot_kws={'size':10},\n cmap=plt.cm.Blues, linewidths=0.2)\n\n# Add labels to the plot\nclass_names = RF.classes_ #np.unique(y_test)\ntick_marks = np.arange(len(class_names))\ntick_marks2 = tick_marks + 0.5\nplt.xticks(tick_marks+0.5, class_names, rotation=90)\nplt.yticks(tick_marks2, class_names, rotation=0)\nplt.xlabel('Predicted label')\nplt.ylabel('True label')\nplt.title('Confusion Matrix for Random Forest Model')\nplt.show()", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#feature-importance", - "href": "Chapter11.2.html#feature-importance", - "title": "Ch. 11 – Genre classification", - "section": "Feature importance", - "text": "Feature importance\nLet’s plot the feature importance from random forest classification.\n\nimport matplotlib.pyplot as plt\nfrom matplotlib.pyplot import figure\n\nimportance = RF.feature_importances_\nn = df_trimmed.columns[1:len(df.columns)]\nim = pd.DataFrame({'data': importance,'names': n})\nim2 = im.sort_values(by='data',ascending=False)\n# plot feature importance\nfig, ax = plt.subplots(figsize=(8, 5))\n#figure(figsize=(10, 5))\nplt.scatter(im2.names[0:9],im2.data[0:9],color='red')\nplt.plot(im2.names[0:9],im2.data[0:9])\nax.set_title('10 strongest features')\nplt.show()\n\n\n\n\n\n\n\n\nThe plot show the best 10 features and the first four seem to bring greater benefit to the model.", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#simplify-model", - "href": "Chapter11.2.html#simplify-model", - "title": "Ch. 11 – Genre classification", - "section": "Simplify model", - "text": "Simplify model\nWhat happens if we take the four best features and try building a simpler model with these features?\n\nfrom sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n\nX2 = df_trimmed.filter(['mfcc8','rmse', 'spec_ctr', 'mfcc7'])\n\ntest_size = 0.30 # taking 70:30 training and test set\nseed = 2022 # Random numbmer seeding for reapeatability of the code\nX_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=test_size, random_state=seed,stratify=y)\n\nRF = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=seed).fit(X_train, y_train)\nRF.predict(X_test)\n# Make predictions for the test set\ny_pred_test = RF.predict(X_test)\nprint(round(RF.score(X_test, y_test), 4))\n\n0.6333\n\n\nWhat do you think about the simplified model with 5 features? Is the model still good? You could look at the confusion to see what kind of mistakes the slimmer model starts to make.\nThere is concept call principle of parsimony or the idea behind that simpler models are more parsimonius than complex models, which stems from Occam’s razor. There are several statistical measures that assess the model fit and parsimoniousness (Akaike Information Criterion etc.). We are not entering into those calculations here but usually it is better to have a simple model and compromise the model accuracy a little bit than to gain few points in accuracy but having a complex model.", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter11.2.html#summary-1", - "href": "Chapter11.2.html#summary-1", - "title": "Ch. 11 – Genre classification", - "section": "Summary", - "text": "Summary\nThere numerous other algorithms to classify the materials, SVMs (Support Vector Machines), K-nearest neighbour models (KNNs), Neural networks, and many others.\nWe could have focussed more on features, their calculation, the summary measures, and subsets, but overall we achieved a good success with a small set of features. We have to remember that this is a mini-version of the original dataset. You are welcome to try how the full dataset would improve the results.\nThis process is pretty generic for all kinds of classification tasks, so the same procedure could be applied to prediction emotion categories, meter, instrumentation and other properties of music.", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Genre classification" - ] - }, - { - "objectID": "Chapter10.4.html", - "href": "Chapter10.4.html", - "title": "Ch. 10 – Perceptual", - "section": "", - "text": "For more information, see Brian documentation.\n\n\n\n\nShow the code\nfrom brian2 import *\nfrom brian2hears import *\nimport matplotlib.pyplot as plt\n\n\n\nsound = loadsound('data/400_600_1600_hz.wav')\n\n# Inner hair cell model\ncfmin, cfmax, cfN = 20*Hz, 20*kHz, 3000 # was 3000\ncf = erbspace(cfmin, cfmax, cfN)\ngfb = Gammatone(sound, cf)\nihc = FunctionFilterbank(gfb, lambda x: 3*clip(x, 0, Inf)**(1.0/3.0))\n# Leaky integrate-and-fire model with noise and refractoriness\neqs = '''\ndv/dt = (I-v)/(1*ms)+0.2*xi*(2/(1*ms))**.5 : 1 (unless refractory)\nI : 1\n'''\nG = FilterbankGroup(ihc, 'I', eqs, reset='v=0', threshold='v>1', refractory=5*ms)\n# Run, and raster plot of the spikes\nM = SpikeMonitor(G)\nrun(sound.duration)\n\n# Plot the results\nfig, ax = plt.subplots(figsize=(8.0, 4.0))\nax.plot(M.t/ms, M.i, '.', alpha=0.5, color='tab:blue', ms=3)\nax.set_xlabel('Time (ms)')\nax.set_ylabel('Neuron number (inner hair cell)')\nylim(0, 2000)\nxlabel('Time (ms)')\nylabel('Neuron index');\nplt.show()\n\nINFO No numerical integration method specified for group 'neurongroup', using method 'euler' (took 0.03s, trying other methods took 0.00s). [brian2.stateupdaters.base.method_choice]", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Perceptual" - ] - }, - { - "objectID": "Chapter10.4.html#figure-10.4-auditory-nerve-fibre-model-and-inner-hair-cells-spiking-for-the-earlier-example-a-complex-tone-consisting-of-400-600-1600-hz-sine-waves.", - "href": "Chapter10.4.html#figure-10.4-auditory-nerve-fibre-model-and-inner-hair-cells-spiking-for-the-earlier-example-a-complex-tone-consisting-of-400-600-1600-hz-sine-waves.", - "title": "Ch. 10 – Perceptual", - "section": "", - "text": "For more information, see Brian documentation.\n\n\n\n\nShow the code\nfrom brian2 import *\nfrom brian2hears import *\nimport matplotlib.pyplot as plt\n\n\n\nsound = loadsound('data/400_600_1600_hz.wav')\n\n# Inner hair cell model\ncfmin, cfmax, cfN = 20*Hz, 20*kHz, 3000 # was 3000\ncf = erbspace(cfmin, cfmax, cfN)\ngfb = Gammatone(sound, cf)\nihc = FunctionFilterbank(gfb, lambda x: 3*clip(x, 0, Inf)**(1.0/3.0))\n# Leaky integrate-and-fire model with noise and refractoriness\neqs = '''\ndv/dt = (I-v)/(1*ms)+0.2*xi*(2/(1*ms))**.5 : 1 (unless refractory)\nI : 1\n'''\nG = FilterbankGroup(ihc, 'I', eqs, reset='v=0', threshold='v>1', refractory=5*ms)\n# Run, and raster plot of the spikes\nM = SpikeMonitor(G)\nrun(sound.duration)\n\n# Plot the results\nfig, ax = plt.subplots(figsize=(8.0, 4.0))\nax.plot(M.t/ms, M.i, '.', alpha=0.5, color='tab:blue', ms=3)\nax.set_xlabel('Time (ms)')\nax.set_ylabel('Neuron number (inner hair cell)')\nylim(0, 2000)\nxlabel('Time (ms)')\nylabel('Neuron index');\nplt.show()\n\nINFO No numerical integration method specified for group 'neurongroup', using method 'euler' (took 0.03s, trying other methods took 0.00s). [brian2.stateupdaters.base.method_choice]", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Perceptual" - ] - }, - { - "objectID": "Chapter10.2.html", - "href": "Chapter10.2.html", - "title": "Ch. 10 – Spectrum and envelope", - "section": "", - "text": "The instrument samples are taken from McGill University Master Samples (MUMS, Opolko & Wapnick, 2006), polished by Eerola and Ferrer (2008) and used in subsequent experiments (Eerola et al., 2012).\n\n\n\n\nShow the code\nimport numpy as np\nimport librosa\nimport librosa.display\nfrom matplotlib import pyplot as plt \n\n\n\n\n\n\n\nShow the code\nx, sr = librosa.load('data/63.wav') \nstft = np.abs(librosa.stft(x))\nfreqs = librosa.fft_frequencies(sr=sr)\n\nf0, voiced_flag, voiced_probs = librosa.pyin(x, fmin=librosa.note_to_hz('C2'),\n fmax=librosa.note_to_hz('C7'))\nf = np.nanmedian(f0) # Get the Hz of the F0 for nice labels\nn = librosa.hz_to_note(f) # Convert Hz to note name\nprint(n)\nX = np.arange(f, f*10, f)\n\nfig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8.0, 5.0))\n\n# 1. Spectrum of a tone\n# collapse across time and plot a spectrum \nDmean = stft.mean(axis=1)/max(stft.mean(axis=1))\nax[0].plot(freqs, Dmean, color='blue')\nax[0].set_title(\"Violin Spectrum\")\nax[0].set(xlim=[130, X.max()])\nax[0].set_ylabel(\"Norm. Ampl.\")\nax[0].set_xlabel(\"Frequency (Hz)\")\nax[0].grid()\nax[0].set_xticks(X)\n\n# calculate spectral centroid and plot it\ncentroid = librosa.feature.spectral_centroid(y=x, sr=sr)\ncentroidM = centroid.mean()\nprint(centroidM.round(0))\ncentroidM_label = \"Centroid \" + str(int(centroidM.round(0)))+\" Hz\"\nax[0].annotate(\"\", xy=(130, 0.75), xycoords='data', xytext=(centroidM, 0.75),\n arrowprops=dict(arrowstyle=\"<|-\", connectionstyle=\"arc3\", \n color=\"0.3\"), size=4)\nax[0].annotate(\"\", xy=(centroidM, 0.75), xycoords='data', \n xytext=(X.max(), 0.75),\n arrowprops=dict(arrowstyle=\"-|>\", connectionstyle=\"arc3\", \n color=\"0.3\"), size=4)\nax[0].text(centroidM-120, 0.83, centroidM_label, size=10, color='0.2')\n\n# Envelope\nrms = librosa.feature.rms(y=x, frame_length=2048, hop_length=512) \ntimes = librosa.times_like(rms)\nax[1].plot(times, rms[0], color='red')\n\nlibrosa.display.waveshow(x, sr=sr, ax=ax[1], color='0.75', max_points=3000)\nax[1].grid()\nax[1].set(ylim=[-0.25, 0.25])\nax[1].text(0.25, 0.17, \"A\", size=12, color='0.2')\nax[1].text(1.20, 0.17, \"S\", size=12, color='0.2')\nax[1].text(1.85, 0.17, \"D\", size=12, color='0.2')\nax[1].annotate(\"\", xy=(0.00, 0.15), xycoords='data', xytext=(0.50, 0.15),\n arrowprops=dict(arrowstyle=\"|-|\", connectionstyle=\"arc3\",\n color='0.2'), size=4)\nax[1].annotate(\"\", xy=(0.50, 0.15), xycoords='data', xytext=(1.79, 0.15),\n arrowprops=dict(arrowstyle=\"|-|\", connectionstyle=\"arc3\",\n color='0.2'), size=4)\nax[1].annotate(\"\", xy=(1.79, 0.15), xycoords='data', xytext=(2.0, 0.15),\n arrowprops=dict(arrowstyle=\"|-|\", connectionstyle=\"arc3\",\n color='0.2'), size=4)\nax[1].set_ylabel(\"Amplitude\")\nax[1].set_title(\"Violin Envelope\")\nax[1].set_xlabel(\"Time (s)\")\n\nfig.tight_layout()\nplt.show()\n\n\nD♯4\n1623.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nShow the code\nx, sr = librosa.load('data/24.wav') \nstft = np.abs(librosa.stft(x))\nfreqs = librosa.fft_frequencies(sr=sr)\n\nf0, voiced_flag, voiced_probs = librosa.pyin(x, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))\nf=np.nanmedian(f0) # Get the Hz of the fundamental frequency for nice labels\nn=librosa.hz_to_note(f) # Convert Hz to note name\n\nX=np.arange(f,f*10,f)\n\nfig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8.0, 5.0))\n\n# collapse across time and plot a spectrum representation (energy across frequencies)\nDmean=stft.mean(axis=1)/max(stft.mean(axis=1))\nax[0].plot(freqs,Dmean,color='blue')\n#ax[0].label_outer()\n#ax[0].set_title(\"Spectrum\")\nax[0].set(xlim=[130, X.max()])\nax[0].set_ylabel(\"Norm. Ampl.\")\nax[0].set_xlabel(\"Frequency (Hz)\")\nax[0].grid()\nax[0].set_xticks(X)\nax[0].set_title(\"Clarinet Spectrum\")\n\n# calculate spectral centroid and plot it\ncentroid = librosa.feature.spectral_centroid(y=x, sr=sr)\ncentroidM = centroid.mean()\nprint(centroidM.round(0))\ncentroidM_label = \"Centroid \" + str(int(centroidM.round(0)))+\" Hz\"\nax[0].annotate(\"\",xy=(130, 0.75), xycoords='data',xytext=(centroidM, 0.75), arrowprops=dict(arrowstyle=\"<|-\",connectionstyle=\"arc3\",color=\"0.3\"),size=4)\nax[0].annotate(\"\",xy=(centroidM, 0.75), xycoords='data',xytext=(X.max(), 0.75), arrowprops=dict(arrowstyle=\"-|>\",connectionstyle=\"arc3\",color=\"0.3\"),size=4)\nax[0].text(centroidM-120,0.83,centroidM_label,size=10,color='0.2')\n\nrms=librosa.feature.rms(y=x,frame_length=2048,hop_length=512) # Extra dynamics (RMS)\ntimes = librosa.times_like(rms)\nax[1].plot(times, rms[0],color='red')\n\nlibrosa.display.waveshow(x, sr=sr, ax=ax[1],color='0.75',max_points=3000)\nax[1].grid()\nax[1].set(ylim=[-0.25, 0.25])\n\nax[1].text(0.00,0.17,\"A\",size=12,color='0.2')\n#ax[0,1].text(0.50,0.99,\"D\",size=15)\nax[1].text(0.90,0.17,\"S\",size=12,color='0.2')\nax[1].text(1.85,0.17,\"D\",size=12,color='0.2')\nax[1].annotate(\"\",xy=(0.00, 0.15), xycoords='data',xytext=(0.07, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\n#ax[0,1].annotate(\"\",xy=(0.05, 0.14), xycoords='data',xytext=(0.20, 0.14),arrowprops=dict(arrowstyle=\"<->\",connectionstyle=\"arc3\"),size=15)\nax[1].annotate(\"\",xy=(0.07, 0.15), xycoords='data',xytext=(1.77, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].annotate(\"\",xy=(1.77, 0.15), xycoords='data',xytext=(2.0, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].set_ylabel(\"Amplitude\")\nax[1].set_xlabel(\"Time (s)\")\nax[1].set_title(\"Clarinet Envelope\")\n\nfig.tight_layout()\nplt.show()\n\n\n1701.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nShow the code\nx, sr = librosa.load('data/90.wav') \n\nstft = np.abs(librosa.stft(x))\nfreqs = librosa.fft_frequencies(sr=sr)\n\nf0, voiced_flag, voiced_probs = librosa.pyin(x, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))\nf=np.nanmedian(f0) # Get the Hz of the fundamental frequency for nice labels\nn=librosa.hz_to_note(f) # Convert Hz to note name\nprint(n)\nX=np.arange(f,f*10,f)\n\nfig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8.0, 5.0))\n\n\n# collapse across time and plot a spectrum representation (energy across frequencies)\nDmean=stft.mean(axis=1)/max(stft.mean(axis=1))\nax[0].plot(freqs,Dmean,color='blue')\nax[0].set_title(\"Marimba Spectrum\")\nax[0].set(xlim=[130, X.max()])\nax[0].set_ylabel(\"Norm. Ampl.\")\nax[0].set_xlabel(\"Frequency (Hz)\")\nax[0].grid()\nax[0].set_xticks(X)\n\n# calculate spectral centroid and plot it\ncentroid = librosa.feature.spectral_centroid(y=x, sr=sr)\ncentroidM = centroid.mean()\nprint(centroidM.round(0))\ncentroidM_label = \"Centroid \" + str(int(centroidM.round(0)))+\" Hz\"\nax[0].annotate(\"\",xy=(130, 0.75), xycoords='data',xytext=(centroidM, 0.75), arrowprops=dict(arrowstyle=\"<|-\",connectionstyle=\"arc3\",color=\"0.3\"),size=4)\nax[0].annotate(\"\",xy=(centroidM, 0.75), xycoords='data',xytext=(X.max(), 0.75), arrowprops=dict(arrowstyle=\"-|>\",connectionstyle=\"arc3\",color=\"0.3\"),size=4)\nax[0].text(centroidM-120,0.83,centroidM_label,size=10,color='0.2')\n\nrms=librosa.feature.rms(y=x,frame_length=2048,hop_length=512) \ntimes = librosa.times_like(rms)\nax[1].plot(times, rms[0],color='red')\nlibrosa.display.waveshow(x, sr=sr, ax=ax[1],color='0.75',max_points=3000)\nax[1].grid()\nax[1].set(ylim=[-0.25, 0.25])\nax[1].set(xlim=[0, 0.70])\n\nax[1].text(0.00,0.17,\"A\",size=12,color='0.2')\nax[1].text(0.09,0.17,\"S\",size=12,color='0.2')\nax[1].text(0.40,0.17,\"D\",size=12,color='0.2')\nax[1].annotate(\"\",xy=(0.00, 0.15), xycoords='data',xytext=(0.01, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].annotate(\"\",xy=(0.01, 0.15), xycoords='data',xytext=(0.18, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].annotate(\"\",xy=(0.18, 0.15), xycoords='data',xytext=(0.70, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].set_ylabel(\"Amplitude\")\nax[1].set_title(\"Marimba Envelope\")\nax[1].set_xlabel(\"Time (s)\")\n\nfig.tight_layout()\nplt.show()\n\n\nD♯4\n1219.0\n\n\n\n\n\n\n\n\n\n\n\n\n\nEerola, T., Ferrer, R., & Alluri, V. (2012). Timbre and affect dimensions: Evidence from affect and similarity ratings and acoustic correlates of isolated instrument sounds. Music Perception, 30(1), 49-70. https://doi.org/10.1525/mp.2012.30.1.49\nEerola, T. & Ferrer, R. (2008). Instrument Library (MUMS) Revised. Music Perception, 25(3), 253-255. http://caliber.ucpress.net/doi/abs/10.1525/mp.2008.25.3.253\nOpolko, F., & Wapnick, J. (2006). The McGill University master samples collection on DVD (3 DVDs). Quebec, Canada: McGill University.", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Spectrum and envelope" - ] - }, - { - "objectID": "Chapter10.2.html#figure-10.2.-spectrum-and-the-envelope-of-three-instruments-violin-clarinet-and-marimba.", - "href": "Chapter10.2.html#figure-10.2.-spectrum-and-the-envelope-of-three-instruments-violin-clarinet-and-marimba.", - "title": "Ch. 10 – Spectrum and envelope", - "section": "", - "text": "The instrument samples are taken from McGill University Master Samples (MUMS, Opolko & Wapnick, 2006), polished by Eerola and Ferrer (2008) and used in subsequent experiments (Eerola et al., 2012).\n\n\n\n\nShow the code\nimport numpy as np\nimport librosa\nimport librosa.display\nfrom matplotlib import pyplot as plt \n\n\n\n\n\n\n\nShow the code\nx, sr = librosa.load('data/63.wav') \nstft = np.abs(librosa.stft(x))\nfreqs = librosa.fft_frequencies(sr=sr)\n\nf0, voiced_flag, voiced_probs = librosa.pyin(x, fmin=librosa.note_to_hz('C2'),\n fmax=librosa.note_to_hz('C7'))\nf = np.nanmedian(f0) # Get the Hz of the F0 for nice labels\nn = librosa.hz_to_note(f) # Convert Hz to note name\nprint(n)\nX = np.arange(f, f*10, f)\n\nfig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8.0, 5.0))\n\n# 1. Spectrum of a tone\n# collapse across time and plot a spectrum \nDmean = stft.mean(axis=1)/max(stft.mean(axis=1))\nax[0].plot(freqs, Dmean, color='blue')\nax[0].set_title(\"Violin Spectrum\")\nax[0].set(xlim=[130, X.max()])\nax[0].set_ylabel(\"Norm. Ampl.\")\nax[0].set_xlabel(\"Frequency (Hz)\")\nax[0].grid()\nax[0].set_xticks(X)\n\n# calculate spectral centroid and plot it\ncentroid = librosa.feature.spectral_centroid(y=x, sr=sr)\ncentroidM = centroid.mean()\nprint(centroidM.round(0))\ncentroidM_label = \"Centroid \" + str(int(centroidM.round(0)))+\" Hz\"\nax[0].annotate(\"\", xy=(130, 0.75), xycoords='data', xytext=(centroidM, 0.75),\n arrowprops=dict(arrowstyle=\"<|-\", connectionstyle=\"arc3\", \n color=\"0.3\"), size=4)\nax[0].annotate(\"\", xy=(centroidM, 0.75), xycoords='data', \n xytext=(X.max(), 0.75),\n arrowprops=dict(arrowstyle=\"-|>\", connectionstyle=\"arc3\", \n color=\"0.3\"), size=4)\nax[0].text(centroidM-120, 0.83, centroidM_label, size=10, color='0.2')\n\n# Envelope\nrms = librosa.feature.rms(y=x, frame_length=2048, hop_length=512) \ntimes = librosa.times_like(rms)\nax[1].plot(times, rms[0], color='red')\n\nlibrosa.display.waveshow(x, sr=sr, ax=ax[1], color='0.75', max_points=3000)\nax[1].grid()\nax[1].set(ylim=[-0.25, 0.25])\nax[1].text(0.25, 0.17, \"A\", size=12, color='0.2')\nax[1].text(1.20, 0.17, \"S\", size=12, color='0.2')\nax[1].text(1.85, 0.17, \"D\", size=12, color='0.2')\nax[1].annotate(\"\", xy=(0.00, 0.15), xycoords='data', xytext=(0.50, 0.15),\n arrowprops=dict(arrowstyle=\"|-|\", connectionstyle=\"arc3\",\n color='0.2'), size=4)\nax[1].annotate(\"\", xy=(0.50, 0.15), xycoords='data', xytext=(1.79, 0.15),\n arrowprops=dict(arrowstyle=\"|-|\", connectionstyle=\"arc3\",\n color='0.2'), size=4)\nax[1].annotate(\"\", xy=(1.79, 0.15), xycoords='data', xytext=(2.0, 0.15),\n arrowprops=dict(arrowstyle=\"|-|\", connectionstyle=\"arc3\",\n color='0.2'), size=4)\nax[1].set_ylabel(\"Amplitude\")\nax[1].set_title(\"Violin Envelope\")\nax[1].set_xlabel(\"Time (s)\")\n\nfig.tight_layout()\nplt.show()\n\n\nD♯4\n1623.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nShow the code\nx, sr = librosa.load('data/24.wav') \nstft = np.abs(librosa.stft(x))\nfreqs = librosa.fft_frequencies(sr=sr)\n\nf0, voiced_flag, voiced_probs = librosa.pyin(x, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))\nf=np.nanmedian(f0) # Get the Hz of the fundamental frequency for nice labels\nn=librosa.hz_to_note(f) # Convert Hz to note name\n\nX=np.arange(f,f*10,f)\n\nfig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8.0, 5.0))\n\n# collapse across time and plot a spectrum representation (energy across frequencies)\nDmean=stft.mean(axis=1)/max(stft.mean(axis=1))\nax[0].plot(freqs,Dmean,color='blue')\n#ax[0].label_outer()\n#ax[0].set_title(\"Spectrum\")\nax[0].set(xlim=[130, X.max()])\nax[0].set_ylabel(\"Norm. Ampl.\")\nax[0].set_xlabel(\"Frequency (Hz)\")\nax[0].grid()\nax[0].set_xticks(X)\nax[0].set_title(\"Clarinet Spectrum\")\n\n# calculate spectral centroid and plot it\ncentroid = librosa.feature.spectral_centroid(y=x, sr=sr)\ncentroidM = centroid.mean()\nprint(centroidM.round(0))\ncentroidM_label = \"Centroid \" + str(int(centroidM.round(0)))+\" Hz\"\nax[0].annotate(\"\",xy=(130, 0.75), xycoords='data',xytext=(centroidM, 0.75), arrowprops=dict(arrowstyle=\"<|-\",connectionstyle=\"arc3\",color=\"0.3\"),size=4)\nax[0].annotate(\"\",xy=(centroidM, 0.75), xycoords='data',xytext=(X.max(), 0.75), arrowprops=dict(arrowstyle=\"-|>\",connectionstyle=\"arc3\",color=\"0.3\"),size=4)\nax[0].text(centroidM-120,0.83,centroidM_label,size=10,color='0.2')\n\nrms=librosa.feature.rms(y=x,frame_length=2048,hop_length=512) # Extra dynamics (RMS)\ntimes = librosa.times_like(rms)\nax[1].plot(times, rms[0],color='red')\n\nlibrosa.display.waveshow(x, sr=sr, ax=ax[1],color='0.75',max_points=3000)\nax[1].grid()\nax[1].set(ylim=[-0.25, 0.25])\n\nax[1].text(0.00,0.17,\"A\",size=12,color='0.2')\n#ax[0,1].text(0.50,0.99,\"D\",size=15)\nax[1].text(0.90,0.17,\"S\",size=12,color='0.2')\nax[1].text(1.85,0.17,\"D\",size=12,color='0.2')\nax[1].annotate(\"\",xy=(0.00, 0.15), xycoords='data',xytext=(0.07, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\n#ax[0,1].annotate(\"\",xy=(0.05, 0.14), xycoords='data',xytext=(0.20, 0.14),arrowprops=dict(arrowstyle=\"<->\",connectionstyle=\"arc3\"),size=15)\nax[1].annotate(\"\",xy=(0.07, 0.15), xycoords='data',xytext=(1.77, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].annotate(\"\",xy=(1.77, 0.15), xycoords='data',xytext=(2.0, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].set_ylabel(\"Amplitude\")\nax[1].set_xlabel(\"Time (s)\")\nax[1].set_title(\"Clarinet Envelope\")\n\nfig.tight_layout()\nplt.show()\n\n\n1701.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nShow the code\nx, sr = librosa.load('data/90.wav') \n\nstft = np.abs(librosa.stft(x))\nfreqs = librosa.fft_frequencies(sr=sr)\n\nf0, voiced_flag, voiced_probs = librosa.pyin(x, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))\nf=np.nanmedian(f0) # Get the Hz of the fundamental frequency for nice labels\nn=librosa.hz_to_note(f) # Convert Hz to note name\nprint(n)\nX=np.arange(f,f*10,f)\n\nfig, ax = plt.subplots(nrows=2, ncols=1, figsize=(8.0, 5.0))\n\n\n# collapse across time and plot a spectrum representation (energy across frequencies)\nDmean=stft.mean(axis=1)/max(stft.mean(axis=1))\nax[0].plot(freqs,Dmean,color='blue')\nax[0].set_title(\"Marimba Spectrum\")\nax[0].set(xlim=[130, X.max()])\nax[0].set_ylabel(\"Norm. Ampl.\")\nax[0].set_xlabel(\"Frequency (Hz)\")\nax[0].grid()\nax[0].set_xticks(X)\n\n# calculate spectral centroid and plot it\ncentroid = librosa.feature.spectral_centroid(y=x, sr=sr)\ncentroidM = centroid.mean()\nprint(centroidM.round(0))\ncentroidM_label = \"Centroid \" + str(int(centroidM.round(0)))+\" Hz\"\nax[0].annotate(\"\",xy=(130, 0.75), xycoords='data',xytext=(centroidM, 0.75), arrowprops=dict(arrowstyle=\"<|-\",connectionstyle=\"arc3\",color=\"0.3\"),size=4)\nax[0].annotate(\"\",xy=(centroidM, 0.75), xycoords='data',xytext=(X.max(), 0.75), arrowprops=dict(arrowstyle=\"-|>\",connectionstyle=\"arc3\",color=\"0.3\"),size=4)\nax[0].text(centroidM-120,0.83,centroidM_label,size=10,color='0.2')\n\nrms=librosa.feature.rms(y=x,frame_length=2048,hop_length=512) \ntimes = librosa.times_like(rms)\nax[1].plot(times, rms[0],color='red')\nlibrosa.display.waveshow(x, sr=sr, ax=ax[1],color='0.75',max_points=3000)\nax[1].grid()\nax[1].set(ylim=[-0.25, 0.25])\nax[1].set(xlim=[0, 0.70])\n\nax[1].text(0.00,0.17,\"A\",size=12,color='0.2')\nax[1].text(0.09,0.17,\"S\",size=12,color='0.2')\nax[1].text(0.40,0.17,\"D\",size=12,color='0.2')\nax[1].annotate(\"\",xy=(0.00, 0.15), xycoords='data',xytext=(0.01, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].annotate(\"\",xy=(0.01, 0.15), xycoords='data',xytext=(0.18, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].annotate(\"\",xy=(0.18, 0.15), xycoords='data',xytext=(0.70, 0.15),arrowprops=dict(arrowstyle=\"|-|\",connectionstyle=\"arc3\",color='0.2'),size=4)\nax[1].set_ylabel(\"Amplitude\")\nax[1].set_title(\"Marimba Envelope\")\nax[1].set_xlabel(\"Time (s)\")\n\nfig.tight_layout()\nplt.show()\n\n\nD♯4\n1219.0\n\n\n\n\n\n\n\n\n\n\n\n\n\nEerola, T., Ferrer, R., & Alluri, V. (2012). Timbre and affect dimensions: Evidence from affect and similarity ratings and acoustic correlates of isolated instrument sounds. Music Perception, 30(1), 49-70. https://doi.org/10.1525/mp.2012.30.1.49\nEerola, T. & Ferrer, R. (2008). Instrument Library (MUMS) Revised. Music Perception, 25(3), 253-255. http://caliber.ucpress.net/doi/abs/10.1525/mp.2008.25.3.253\nOpolko, F., & Wapnick, J. (2006). The McGill University master samples collection on DVD (3 DVDs). Quebec, Canada: McGill University.", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Spectrum and envelope" - ] - }, - { - "objectID": "Chapter9.5.html", - "href": "Chapter9.5.html", - "title": "Ch. 9 – Synchronization", - "section": "", - "text": "#if (!require(devtools)) install.packages(\"devtools\",quiet=TRUE)\n#devtools::install_github(\"tuomaseerola/onsetsync\")\nlibrary(onsetsync)\nlibrary(dplyr,quiet=TRUE)\n#install.packages(\"cowplot\",quiet=TRUE)\nlibrary(cowplot)\n\n\n\n\nTake an example track from IEMP corpus and visualise beats and calculate the synchronies.\n\nset.seed(1234)\nCSS_Song2 <- onsetsync::CSS_IEMP[[2]]\nfig1 <- plot_by_beat(df = CSS_Song2, \n instr = c('Bass','Clave','Guitar','Tres'), \n beat = 'SD', \n virtual = 'Isochronous.SD.Time',\n pcols = 2)\ninst <- c('Clave','Bass','Guitar','Tres') # Define instruments \ndn <- sync_execute_pairs(CSS_Song2,inst,100,1,'SD')\nfig2 <- plot_by_pair(dn) # plot\nG <- cowplot::plot_grid(fig1,fig2,nrow = 2)\nprint(G)\n\n\n\n\n\n\n\n\n\nround(mean(dn$asynch$`Clave - Guitar`)*1000,1)\n\n[1] 3.6\n\n\n\nround(mean(dn$asynch$`Clave - Bass`)*1000,1)\n\n[1] 15.9\n\n\n\nround(mean(dn$asynch$`Bass - Guitar`)*1000,1)\n\n[1] -17.6\n\n\n\nround(mean(dn$asynch$`Bass - Tres`)*1000,1)\n\n[1] -2.7\n\n\n\n\n\nPoole, A. (2021). Groove in Cuban Son and Salsa Performance. Journal of the Royal Musical Association, 146(1), 117-145. doi:10.1017/rma.2021.2", - "crumbs": [ - "Score analysis", - "Ch. 9 – Synchronization" - ] - }, - { - "objectID": "Chapter9.5.html#load-libraries", - "href": "Chapter9.5.html#load-libraries", - "title": "Ch. 9 – Synchronization", - "section": "", - "text": "#if (!require(devtools)) install.packages(\"devtools\",quiet=TRUE)\n#devtools::install_github(\"tuomaseerola/onsetsync\")\nlibrary(onsetsync)\nlibrary(dplyr,quiet=TRUE)\n#install.packages(\"cowplot\",quiet=TRUE)\nlibrary(cowplot)", - "crumbs": [ - "Score analysis", - "Ch. 9 – Synchronization" - ] - }, - { - "objectID": "Chapter9.5.html#explore-synchronisation-in-cuban-salsa-and-son", - "href": "Chapter9.5.html#explore-synchronisation-in-cuban-salsa-and-son", - "title": "Ch. 9 – Synchronization", - "section": "", - "text": "Take an example track from IEMP corpus and visualise beats and calculate the synchronies.\n\nset.seed(1234)\nCSS_Song2 <- onsetsync::CSS_IEMP[[2]]\nfig1 <- plot_by_beat(df = CSS_Song2, \n instr = c('Bass','Clave','Guitar','Tres'), \n beat = 'SD', \n virtual = 'Isochronous.SD.Time',\n pcols = 2)\ninst <- c('Clave','Bass','Guitar','Tres') # Define instruments \ndn <- sync_execute_pairs(CSS_Song2,inst,100,1,'SD')\nfig2 <- plot_by_pair(dn) # plot\nG <- cowplot::plot_grid(fig1,fig2,nrow = 2)\nprint(G)\n\n\n\n\n\n\n\n\n\nround(mean(dn$asynch$`Clave - Guitar`)*1000,1)\n\n[1] 3.6\n\n\n\nround(mean(dn$asynch$`Clave - Bass`)*1000,1)\n\n[1] 15.9\n\n\n\nround(mean(dn$asynch$`Bass - Guitar`)*1000,1)\n\n[1] -17.6\n\n\n\nround(mean(dn$asynch$`Bass - Tres`)*1000,1)\n\n[1] -2.7", - "crumbs": [ - "Score analysis", - "Ch. 9 – Synchronization" - ] - }, - { - "objectID": "Chapter9.5.html#references", - "href": "Chapter9.5.html#references", - "title": "Ch. 9 – Synchronization", - "section": "", - "text": "Poole, A. (2021). Groove in Cuban Son and Salsa Performance. Journal of the Royal Musical Association, 146(1), 117-145. doi:10.1017/rma.2021.2", - "crumbs": [ - "Score analysis", - "Ch. 9 – Synchronization" - ] - }, - { - "objectID": "Chapter9.3.html", - "href": "Chapter9.3.html", - "title": "Ch. 9 – Key-finding", - "section": "", - "text": "Install music21 and other elements needed to run the environment Press play and wait for all commands to be executed - this initial command might take some time as it needs to build the music21 environment.", - "crumbs": [ - "Score analysis", - "Ch. 9 – Key-finding" - ] - }, - { - "objectID": "Chapter9.3.html#key-finding", - "href": "Chapter9.3.html#key-finding", - "title": "Ch. 9 – Key-finding", - "section": "Key-finding", - "text": "Key-finding\nKey-finding algorithm applied to an example excerpt (bach/bwv30.6).\n\nimport sys\nfrom music21 import * # activate library\nimport pandas as pd\n\n\nbwv30_6 = corpus.parse('bach/bwv30.6.xml')# 30.6\nprint(bwv30_6.analyze('key.krumhanslkessler'))\nbwv30_6_3meas = bwv30_6.measures(1,4) # First 3 measures\n\nKK = analysis.discrete.KrumhanslKessler() # Key profile\nwa = analysis.windowed.WindowedAnalysis(bwv30_6_3meas, KK)\na,b = wa.analyze(2, windowType='overlap')\n\nkeyclar=[]; mode=[]; key=[]\nfor x in range(len(a)):\n key.append(a[x][0])\n mode.append(a[x][1])\n keyclar.append(a[x][2])\ndata=pd.DataFrame({'key':key,'mode':mode,'r':keyclar})\nprint(data)\n\nA major\n key mode r\n0 E major 0.881687\n1 E major 0.892883\n2 A major 0.588537\n3 B major 0.833787\n4 E major 0.972757\n5 E major 0.901069\n6 F# minor 0.717810\n7 E major 0.847699\n8 E major 0.882310\n9 E major 0.807233\n10 F# minor 0.746200\n11 B major 0.694972\n12 B minor 0.684539\n13 B minor 0.696579\n14 E major 0.813827", - "crumbs": [ - "Score analysis", - "Ch. 9 – Key-finding" - ] - }, - { - "objectID": "Chapter9.3.html#tension", - "href": "Chapter9.3.html#tension", - "title": "Ch. 9 – Key-finding", - "section": "Tension", - "text": "Tension\nAnalysis of tonal tension using the model by Herremans and Chew (2016), implemented in partitura library for Python.\n\nimport partitura\nimport numpy as np\n\n\npart = partitura.load_musicxml('data/bwv306.musicxml')\ntonal_tension = partitura.musicanalysis.estimate_tonaltension(part, ss='onset')\nx = getattr(tonal_tension['onset_beat'][0:50], \"tolist\", lambda: value)()\ny = tonal_tension['cloud_momentum'][0:50]\n\nd = {'beat': x,'tension': y}\ndf = pd.DataFrame(data=d)\nprint(df)\n\n beat tension\n0 0.0 0.000000\n1 1.0 0.132809\n2 2.0 0.132809\n3 2.5 0.031124\n4 3.0 0.192431\n5 3.5 0.046758\n6 4.0 0.142699\n7 4.5 0.055152\n8 5.0 0.082517\n9 5.5 0.072674\n10 6.0 0.088245\n11 7.0 0.158890\n12 7.5 0.023576\n13 8.0 0.135350\n14 10.0 0.126068\n15 11.0 0.111489\n16 11.5 0.031124\n17 12.0 0.092913\n18 12.5 0.036120\n19 13.0 0.125584\n20 13.5 0.073635\n21 14.0 0.168273\n22 14.5 0.114459\n23 15.0 0.116256\n24 15.5 0.080099\n25 16.0 0.061819\n26 20.0 0.032064\n27 21.0 0.111489\n28 21.5 0.031124\n29 22.0 0.043444\n30 22.5 0.109472\n31 23.0 0.086467\n32 23.5 0.080719\n33 24.0 0.218836\n34 24.5 0.064623\n35 25.0 0.236635\n36 25.5 0.092383\n37 26.0 0.236347\n38 28.0 0.177259\n39 28.5 0.046247\n40 29.0 0.034470\n41 29.5 0.052403\n42 30.0 0.097112\n43 30.5 0.051889\n44 31.0 0.131294\n45 31.5 0.046758\n46 32.0 0.127003\n47 32.5 0.059613\n48 33.0 0.085597\n49 33.5 0.075891\n\n\n/Users/tuomaseerola/miniconda3/envs/relative_mode/lib/python3.9/site-packages/partitura/io/importmusicxml.py:421: UserWarning: Found repeat without start\nStarting point 0 is assumed\n warnings.warn(\n\n\n\nReferences\n\nHerremans, D., & Chew, E. (2016). Tension ribbons: Quantifying and visualising tonal tension. Second International Conference on Technologies for Music Notation and Representation. TENOR, 2.", - "crumbs": [ - "Score analysis", - "Ch. 9 – Key-finding" - ] - }, - { - "objectID": "Chapter9.1.html#install-music21-in-colab", - "href": "Chapter9.1.html#install-music21-in-colab", - "title": "Ch. 9 - Music analysis", - "section": "Install Music21 (in Colab)", - "text": "Install Music21 (in Colab)\n\n\n\n\n\n\nImportant\n\n\n\nThe first code segment is to install music21 and other elements needed to run the environment. In Colab, press play and wait for all commands to be executed - this initial command might take some time as it needs to build the musi21 environment.", - "crumbs": [ - "Score analysis", - "Ch. 9 - Music analysis" - ] - }, - { - "objectID": "Chapter9.1.html#install-music21-and-musescore-in-local-machine", - "href": "Chapter9.1.html#install-music21-and-musescore-in-local-machine", - "title": "Ch. 9 - Music analysis", - "section": "Install Music21 and Musescore in local machine", - "text": "Install Music21 and Musescore in local machine\n\n\n\n\n\n\nImportant\n\n\n\nFor instructions of how to get music21 and Musescore working on a local machine, see Installation guidelines from music21.\n\n\n\n\n\n\n\n\nNote\n\n\n\nThe script below uses a workaround where the excerpts are first written to a xml file and then converted to png image.", - "crumbs": [ - "Score analysis", - "Ch. 9 - Music analysis" - ] - }, - { - "objectID": "Chapter9.1.html#harmonic-and-metrical-analysis", - "href": "Chapter9.1.html#harmonic-and-metrical-analysis", - "title": "Ch. 9 - Music analysis", - "section": "Harmonic and metrical analysis", - "text": "Harmonic and metrical analysis\nHarmonic and metrical analysis of an example excerpt bach/bwv30.6 using music21.\n\nHarmonic analysis – Reduction\nFirst get a Bach chorale.\n\nfrom music21 import * # activate library\n\n### 1 Select one example from Bach chorales\nbwv30_6 = corpus.parse('bach/bwv30.6.xml')# Take an example\n#bwv30_6.measures(1, 3).show() # Display 3 bars\nbwv30_6.measures(1, 3).write('xml', fp='output.xml')\n!mscore output.xml -o images/score1.png --trim-image 0\n\nzsh:1: command not found: mscore\n\n\n\n\n\nHarmonic analysis\nAnalyse chords using Roman numerals.\n\nbChords = bwv30_6.chordify() # Slice the chords\nfor c in bChords.recurse().getElementsByClass('Chord'):\n c.closedPosition(forceOctave=4, inPlace=True)\n# Run analysis and add Roman numerals as lyrics\nfor c in bChords.recurse().getElementsByClass('Chord'):\n rn = roman.romanNumeralFromChord(c, key.Key('A'))\n c.addLyric(str(rn.figure))\nbChords.measures(0, 3).show() # Display the result\nbChords.measures(0, 3).write('xml', fp='output.xml')\n!mscore output.xml -o images/score2.png --trim-image 0\n\nzsh:1: command not found: mscore\n\n\n\n\n\nMetrical analysis\nCarry out metrical analysis.\n\nbass = bwv30_6.getElementById('Bass') # Get the bass part\nexcerpt = bass.measures(1,3) # Bar 1 through 3\nanalysis.metrical.labelBeatDepth(excerpt)# Metrical analysis\n#excerpt.show() # Display the results\nexcerpt.write('xml', fp='output.xml')\n!mscore output.xml -o images/score3.png --trim-image 0\n!rm output.xml\n\nzsh:1: command not found: mscore", - "crumbs": [ - "Score analysis", - "Ch. 9 - Music analysis" - ] - }, - { - "objectID": "Chapter6.4.html", - "href": "Chapter6.4.html", - "title": "Ch. 6 – Outliers", - "section": "", - "text": "This notebook is about outliers.\n\n\nIt is not uncommon to obtain data which has some missing observations, perhaps even some nearly impossible values and funky distributions. There are remedies and procedures to diagnose and tackle these kinds of issues in statistics, some easy, some more tricky, and controversial. This text will not get deeply involved in the analysis of these issues, but I will present the basics. It is worth saying here that sometimes these problems may lead to insurmountable challenges for the analysis but more often there are simple techniques that can mitigate the problem. The key in all these operations is that you are transparent in what you do and explain why an operation was done to the data and what the operation was.\nUnusually high or low value in the data may have catastrophic impact on the descriptive and inferential statistics. The source of the unusual value, which we call an outlier could have been caused by a typo in the data, or a conversion error (mistaking comma with the full stop as the decimal separator or something else), or sometimes in a large sample extreme value just appear in the data. Outliers will cause problems for the traditional analysis operations such as calculating the means, carrying out t-tests, correlations, and regressions, as these calculations usually assume a normal distribution of values and an extreme value will likely violate this assumption. The practical reason for treating outliers in the first place is that they may render these calculations misleading as the extremes wield a high leverage on otherwise relatively stable values that tend to centre around a mean.\nThe first diagnostic action towards findings out the potential outliers is to visualise the data. If you plot the histograms or boxplots of your variables or scatterplots between two variables, the outliers are usually quite easily visible in these visualisations (see Figure 6.3 for an example). It is a sensible idea always to get a feel for the distribution of the data by plotting the values in a meaningful fashion (boxplots are always a good starting point). The shape of the distribution might reveal other unwanted issues such as all values being clustered near one end of a scale (called a ceiling effect or a floor effect where the measurement scale is attenuated because it is not sensitive enough, or it is oversensitive, scoring only few values at the positive extreme of the scale). Or visualisation between two variables might reveal that the relationship between the two variables is not a linear one but still clear and regular but in a polynomial relation (e.g. U-shaped or inverted U-shaped pattern). It is possible to diagnose the potential outliers using several quantitative techniques, but before mentioning two options, let me warn that there is no definite recommendation on what is classified as an outlier, as different types of data, distributions, and disciplinary orientations might have slightly different practices for dealing with these offending values. One of the most used measure already introduced earlier with relation to boxplots is to use interquartile range (IQR) to define the range of acceptable values (outliers are above 75% quantile plus 1.5 × IQR or below 25% quantile minus 1.5 × IQR).\n\nlibrary(tidyverse,quiet=TRUE)\nlibrary(ggplot2,quiet=TRUE)\nlibrary(cowplot,quiet=TRUE)\nlibrary(MusicScienceData,quiet=TRUE)\noptions(repr.plot.width = 7, repr.plot.height = 4) # Default plot size for Colab\n\n\n\n\n\n\nFigure 6.1. A histogram showing the distribution of responses to a particular question (no. 25) in Attitudes towards Sad Music (ASM) instrument.\n\nsadness <- MusicScienceData::sadness\ng1 <- sadness %>%\n drop_na(ASM25) %>% # drop missing values\n ggplot(aes(x = ASM25))+\n geom_boxplot(fill=\"grey50\", colour='black')+\n scale_x_continuous(breaks = seq(1,7,by=1))+\n ylab('Count')+\n xlab('1 = Strongly disagree, 7 = Strongly agree')+\n theme_MusicScience()\nprint(g1)\n\n\n\n\n\n\n\n\nThe boxplot shows the distribution of answers of attitudes towards sad music (question 25). Boxplot diagnoses ratings of 1 as outliers (indicated by the dot at the value of 1). The median of that distribution is 6 and lower end of the IQR is 5 and the interquartile range is 2, so the lower threshold for the outliers is 2 (5-2×1.5) and therefore the few values of 1 are singled out as potential outliers. Let’s verify the calculation so we understand the routine.\n\n\n\n\nMD <- median(sadness$ASM25,na.rm=TRUE) # median\nprint(paste('Median:',MD))\n\n[1] \"Median: 6\"\n\nIQR_range <- IQR(sadness$ASM25,na.rm=TRUE) # interquartile range\nprint(paste('Interquartile range:',IQR_range))\n\n[1] \"Interquartile range: 2\"\n\nlower_threshold <- (MD - IQR_range/2) - (IQR_range * 1.5) # combine lower end of the IQR and IQR range x 1.5\nprint(paste('Lower threshold for outliers:',lower_threshold))\n\n[1] \"Lower threshold for outliers: 2\"\n\n\nThe second example comes from the priming study and the reaction time responses (see Figure 6.2). The mean response time was 632 ms, the upper threshold for outliers using the IQR-based technique is 930 ms and the lower threshold is 254 ms, so any value below 254 ms or above 930 ms could be considered as a potential outlier. To be fair, reaction times are not even supposed to be normally distributed and they have strong right skewed shape caused by participants tendency to respond asymmetrically (more responses towards the slow end of the response than the fast). There is a specific way to eliminate too fast reactions (<200ms) or slow reactions (>1500ms) (Brysbaert & Stevens, 2018), and even after this, the analysis of the reaction time data will utilise a statistical operation that is suited to the specific distribution of the data (e.g. GLMM with shifted log-normal distribution) or apply a log transformation of the data. But as we can see from Figure below, eliminating over 40 timed out responses (>2000 ms) does make the data much cleaner.\n\n\n\n\noptions(repr.plot.width = 12, repr.plot.height = 10) # Default plot size for Colab\nd <- MusicScienceData::priming\n\ng1<-ggplot(d,aes(x=RT))+\n geom_histogram(binwidth=100,colour='grey50',fill='white')+\n ggtitle('Bin width 100')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(0,2000,by=400))+\n theme_MusicScience()\n\ng2<-ggplot(d,aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n ggtitle('Bin width 10')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(0,2000,by=400))+\n theme_MusicScience()\n\ng3<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n ggtitle('Bin width 10 with trimming')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(200,1500,by=200),limits = c(0,2000))+\n geom_vline(xintercept = c(200,1500),linetype='dashed')+\n theme_MusicScience()\n\ng4<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n geom_density(aes(y=10 * after_stat(count)),alpha=0.5,colour='black',fill=NA)+\n ggtitle('Bin width 10 density with trimming')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(200,1500,by=200))+\n theme_MusicScience()\n\nG1 <- plot_grid(g1, g2, g3, g4, nrow = 2)\nprint(G1)\n\n\n\n\n\n\n\n\nIf the IQR-based method is the first way to diagnose outliers, the other common way of diagnosing outliers is to convert the variables into Z-scores, where the mean is 0 and the standard deviation is 1. A z-score of -4 would mean that it is four standard deviations from the mean. One rule of thumb suggests that observations ±3 standard deviations from the mean are potential outliers. Besides these two simple metrics, there are more sophisticated ways to identify outliers such as using normal distribution (Grubb’s method) or a distance measure (Mahalanobis method) but ultimately the yardstick for making decisions based on any technique is subjective and must be clearly explained and motivated.\n\n\n\n\nAfter diagnosing that there are outliers in the data, you need to decide what to do with them. It is possible to keep the outliers in the data if the analysis can work with outliers and not to be disruptively influenced by them. For instance, if the analysis operations can be done with non-parameteric inferential statistics that rely on ranks (the order of the values) – not the actual distances – between the observations, this can avoid the detrimental effect of the outliers to statistical inferences (see Chapter 7). There are also variant techniques to carry out correlation and regression analyses that are designed to work with data that partially violate assumptions of normality (e.g., rank correlations, robust regression, and lasso regression). Similar operations exist for comparing means, ranging from non-parametric variants of t-test (Mann-Whitney U test) and ANOVA (Kruskal-Wallis test) to generalised linear mixed models (GLMMs), where one can change the underlying assumptions of the distribution from normal distribution to something else that reflects the underlying data better. A simpler option is either to eliminate the outliers or replace them with the nearest plausible data (sometimes called Winsorising) where you trim the values to the edge of the definition of the outliers. The decision of what is an appropriate way to deal with the problematic observations depends on many issues, but the idea of trimming them to the edge of the outliers is to preserve the observations in the data but just to remove their leverage (the distance from the mean) by moving them to the acceptable range. Again, there is no hard guidance on what the best practice for is dealing with outliers as sometimes data is extremely rare and throwing parts of it away can handicap the analysis. In any case, reporting the diagnosis (what diagnosis operation was used and how many outliers were detected) and the treatment of the outliers is always necessary.\n\n\n\nSometimes there are missing observations in the data and the reasons for these might be as varied as the reasons for outliers. Also, you might have created more missing data if you decide to eliminate outliers, which could mean that those offending extreme observations are considered missing. If the missing is in the original observations, it is worth considering the reason for the missing data before deciding what to do with it; It might tell you that a survey question was badly formed or related to a private issue that many people did not want to respond to, they skipped a question, or perhaps the experiment data had an erroneous coding for a trial. When the missing observations are clearly linked to such a data collection issue, it might be best to report this as it is. When the amount of missing data is low and not clearly linked to any known issue, there are several ways to deal with them. One of them is to allow them to be missing, and most of the analysis operations is competent statistical software suites can deal with the omissions. These missing observations, if they are coded properly in the statistical software (e.g., NA in R) and not as values of any kind (coding missing values as zeros is downright dangerous as further calculations will then start to treat them as actual values). For instance, in the examples above, I have dropped missing observations when constructing plots (line 3 in Code 6.6) and tables (Table 6.3) and calculating means (the example just below the table above).\nIn most cases, reporting how many missing observations there are and whether they are specifically affecting the study design is sufficient to press on with the analysis with keeping the data as it (with the missing observations in the data frame if they are properly coded as missing). More advanced ways of dealing with the missing observations is to infer the missing values from the other variables (imputation) or to interpolate the missing observations from the other data (Howell, 2008), but the prudent use of either of these techniques requires sophisticated data analysis skills and I would not recommend following the route of filling in the gaps in the data with educated guesses, unless this is absolutely necessary and you know exactly what you are doing.\n\n\n\nThe final issue of data quality relates to the distribution of the data. Most of the operations I have talked about – and will be talking about in the Chapter 7 – assumes that the observations fall into the normal distribution, which is symmetric and governed by mean and variance of a specific kind (σ^2). When the observations have wildly different distribution from this one, skewed in one direction (asymmetrical), or heteroscedastic (where the variation is uneven across the range of a variable), one might need to revert to statistical operations designed to handle non-normal distributions (non-parametric operations) or to try to transform the observations into something closer to normal distribution. There is nothing suspicious or problematic in the act of transforming a variable if it makes the analysis and interpretation easier, but again one must report and justify such operations carefully. We have already come across one common transformation that is often applied to reaction time data, namely logarithmic transform. Other transformations for data that has positive skew is a square root transformation. There is also a technique called power transformation which attempts to find the best transformation that creates the closest match to the normal distribution (also known as Box-Cox technique). A statistical software packages come with routines that can identify violations from normality such as Kolmogorov-Smirnov or Shapiro-Wilk’s tests. Rather than blindly attempting to use a neat transformation to rescue a problematic variable, I would recommend common sense approach where the underlying reason for the non-normality of the distribution is considered. If it is something that typically happens with the measurements (such as reaction time data) and not just a poorly designed measure with a ceiling or floor effects, the transformation is easy to motivate and apply. In other cases, it probably wise to take a deep breath and consult advanced statistics guide, e.g., Howell (2016), or see recommendations at the end of the book.\n\n\n\n\nEerola, T., & Peltola, H.-R. (2016). Memorable experiences with sad music - reasons, reactions and mechanisms of three types of experiences. PloS ONE, 11(6), e0157444. https://doi.org/http://dx.doi.org/10.1371/journal.pone.0157444\nEerola, T., & Vuoskoski, J. K. (2011). A comparison of the discrete and dimensional models of emotion in music. Psychology of Music, 39(1), 18–49." - }, - { - "objectID": "Chapter6.4.html#outliers", - "href": "Chapter6.4.html#outliers", - "title": "Ch. 6 – Outliers", - "section": "", - "text": "It is not uncommon to obtain data which has some missing observations, perhaps even some nearly impossible values and funky distributions. There are remedies and procedures to diagnose and tackle these kinds of issues in statistics, some easy, some more tricky, and controversial. This text will not get deeply involved in the analysis of these issues, but I will present the basics. It is worth saying here that sometimes these problems may lead to insurmountable challenges for the analysis but more often there are simple techniques that can mitigate the problem. The key in all these operations is that you are transparent in what you do and explain why an operation was done to the data and what the operation was.\nUnusually high or low value in the data may have catastrophic impact on the descriptive and inferential statistics. The source of the unusual value, which we call an outlier could have been caused by a typo in the data, or a conversion error (mistaking comma with the full stop as the decimal separator or something else), or sometimes in a large sample extreme value just appear in the data. Outliers will cause problems for the traditional analysis operations such as calculating the means, carrying out t-tests, correlations, and regressions, as these calculations usually assume a normal distribution of values and an extreme value will likely violate this assumption. The practical reason for treating outliers in the first place is that they may render these calculations misleading as the extremes wield a high leverage on otherwise relatively stable values that tend to centre around a mean.\nThe first diagnostic action towards findings out the potential outliers is to visualise the data. If you plot the histograms or boxplots of your variables or scatterplots between two variables, the outliers are usually quite easily visible in these visualisations (see Figure 6.3 for an example). It is a sensible idea always to get a feel for the distribution of the data by plotting the values in a meaningful fashion (boxplots are always a good starting point). The shape of the distribution might reveal other unwanted issues such as all values being clustered near one end of a scale (called a ceiling effect or a floor effect where the measurement scale is attenuated because it is not sensitive enough, or it is oversensitive, scoring only few values at the positive extreme of the scale). Or visualisation between two variables might reveal that the relationship between the two variables is not a linear one but still clear and regular but in a polynomial relation (e.g. U-shaped or inverted U-shaped pattern). It is possible to diagnose the potential outliers using several quantitative techniques, but before mentioning two options, let me warn that there is no definite recommendation on what is classified as an outlier, as different types of data, distributions, and disciplinary orientations might have slightly different practices for dealing with these offending values. One of the most used measure already introduced earlier with relation to boxplots is to use interquartile range (IQR) to define the range of acceptable values (outliers are above 75% quantile plus 1.5 × IQR or below 25% quantile minus 1.5 × IQR).\n\nlibrary(tidyverse,quiet=TRUE)\nlibrary(ggplot2,quiet=TRUE)\nlibrary(cowplot,quiet=TRUE)\nlibrary(MusicScienceData,quiet=TRUE)\noptions(repr.plot.width = 7, repr.plot.height = 4) # Default plot size for Colab" - }, - { - "objectID": "Chapter6.4.html#boxplot-and-outliers", - "href": "Chapter6.4.html#boxplot-and-outliers", - "title": "Ch. 6 – Outliers", - "section": "", - "text": "Figure 6.1. A histogram showing the distribution of responses to a particular question (no. 25) in Attitudes towards Sad Music (ASM) instrument.\n\nsadness <- MusicScienceData::sadness\ng1 <- sadness %>%\n drop_na(ASM25) %>% # drop missing values\n ggplot(aes(x = ASM25))+\n geom_boxplot(fill=\"grey50\", colour='black')+\n scale_x_continuous(breaks = seq(1,7,by=1))+\n ylab('Count')+\n xlab('1 = Strongly disagree, 7 = Strongly agree')+\n theme_MusicScience()\nprint(g1)\n\n\n\n\n\n\n\n\nThe boxplot shows the distribution of answers of attitudes towards sad music (question 25). Boxplot diagnoses ratings of 1 as outliers (indicated by the dot at the value of 1). The median of that distribution is 6 and lower end of the IQR is 5 and the interquartile range is 2, so the lower threshold for the outliers is 2 (5-2×1.5) and therefore the few values of 1 are singled out as potential outliers. Let’s verify the calculation so we understand the routine.\n\n\n\n\nMD <- median(sadness$ASM25,na.rm=TRUE) # median\nprint(paste('Median:',MD))\n\n[1] \"Median: 6\"\n\nIQR_range <- IQR(sadness$ASM25,na.rm=TRUE) # interquartile range\nprint(paste('Interquartile range:',IQR_range))\n\n[1] \"Interquartile range: 2\"\n\nlower_threshold <- (MD - IQR_range/2) - (IQR_range * 1.5) # combine lower end of the IQR and IQR range x 1.5\nprint(paste('Lower threshold for outliers:',lower_threshold))\n\n[1] \"Lower threshold for outliers: 2\"\n\n\nThe second example comes from the priming study and the reaction time responses (see Figure 6.2). The mean response time was 632 ms, the upper threshold for outliers using the IQR-based technique is 930 ms and the lower threshold is 254 ms, so any value below 254 ms or above 930 ms could be considered as a potential outlier. To be fair, reaction times are not even supposed to be normally distributed and they have strong right skewed shape caused by participants tendency to respond asymmetrically (more responses towards the slow end of the response than the fast). There is a specific way to eliminate too fast reactions (<200ms) or slow reactions (>1500ms) (Brysbaert & Stevens, 2018), and even after this, the analysis of the reaction time data will utilise a statistical operation that is suited to the specific distribution of the data (e.g. GLMM with shifted log-normal distribution) or apply a log transformation of the data. But as we can see from Figure below, eliminating over 40 timed out responses (>2000 ms) does make the data much cleaner.\n\n\n\n\noptions(repr.plot.width = 12, repr.plot.height = 10) # Default plot size for Colab\nd <- MusicScienceData::priming\n\ng1<-ggplot(d,aes(x=RT))+\n geom_histogram(binwidth=100,colour='grey50',fill='white')+\n ggtitle('Bin width 100')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(0,2000,by=400))+\n theme_MusicScience()\n\ng2<-ggplot(d,aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n ggtitle('Bin width 10')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(0,2000,by=400))+\n theme_MusicScience()\n\ng3<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n ggtitle('Bin width 10 with trimming')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(200,1500,by=200),limits = c(0,2000))+\n geom_vline(xintercept = c(200,1500),linetype='dashed')+\n theme_MusicScience()\n\ng4<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n geom_density(aes(y=10 * after_stat(count)),alpha=0.5,colour='black',fill=NA)+\n ggtitle('Bin width 10 density with trimming')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(200,1500,by=200))+\n theme_MusicScience()\n\nG1 <- plot_grid(g1, g2, g3, g4, nrow = 2)\nprint(G1)\n\n\n\n\n\n\n\n\nIf the IQR-based method is the first way to diagnose outliers, the other common way of diagnosing outliers is to convert the variables into Z-scores, where the mean is 0 and the standard deviation is 1. A z-score of -4 would mean that it is four standard deviations from the mean. One rule of thumb suggests that observations ±3 standard deviations from the mean are potential outliers. Besides these two simple metrics, there are more sophisticated ways to identify outliers such as using normal distribution (Grubb’s method) or a distance measure (Mahalanobis method) but ultimately the yardstick for making decisions based on any technique is subjective and must be clearly explained and motivated." - }, - { - "objectID": "Chapter6.4.html#dealing-with-outliers", - "href": "Chapter6.4.html#dealing-with-outliers", - "title": "Ch. 6 – Outliers", - "section": "", - "text": "After diagnosing that there are outliers in the data, you need to decide what to do with them. It is possible to keep the outliers in the data if the analysis can work with outliers and not to be disruptively influenced by them. For instance, if the analysis operations can be done with non-parameteric inferential statistics that rely on ranks (the order of the values) – not the actual distances – between the observations, this can avoid the detrimental effect of the outliers to statistical inferences (see Chapter 7). There are also variant techniques to carry out correlation and regression analyses that are designed to work with data that partially violate assumptions of normality (e.g., rank correlations, robust regression, and lasso regression). Similar operations exist for comparing means, ranging from non-parametric variants of t-test (Mann-Whitney U test) and ANOVA (Kruskal-Wallis test) to generalised linear mixed models (GLMMs), where one can change the underlying assumptions of the distribution from normal distribution to something else that reflects the underlying data better. A simpler option is either to eliminate the outliers or replace them with the nearest plausible data (sometimes called Winsorising) where you trim the values to the edge of the definition of the outliers. The decision of what is an appropriate way to deal with the problematic observations depends on many issues, but the idea of trimming them to the edge of the outliers is to preserve the observations in the data but just to remove their leverage (the distance from the mean) by moving them to the acceptable range. Again, there is no hard guidance on what the best practice for is dealing with outliers as sometimes data is extremely rare and throwing parts of it away can handicap the analysis. In any case, reporting the diagnosis (what diagnosis operation was used and how many outliers were detected) and the treatment of the outliers is always necessary." - }, - { - "objectID": "Chapter6.4.html#missing-data", - "href": "Chapter6.4.html#missing-data", - "title": "Ch. 6 – Outliers", - "section": "", - "text": "Sometimes there are missing observations in the data and the reasons for these might be as varied as the reasons for outliers. Also, you might have created more missing data if you decide to eliminate outliers, which could mean that those offending extreme observations are considered missing. If the missing is in the original observations, it is worth considering the reason for the missing data before deciding what to do with it; It might tell you that a survey question was badly formed or related to a private issue that many people did not want to respond to, they skipped a question, or perhaps the experiment data had an erroneous coding for a trial. When the missing observations are clearly linked to such a data collection issue, it might be best to report this as it is. When the amount of missing data is low and not clearly linked to any known issue, there are several ways to deal with them. One of them is to allow them to be missing, and most of the analysis operations is competent statistical software suites can deal with the omissions. These missing observations, if they are coded properly in the statistical software (e.g., NA in R) and not as values of any kind (coding missing values as zeros is downright dangerous as further calculations will then start to treat them as actual values). For instance, in the examples above, I have dropped missing observations when constructing plots (line 3 in Code 6.6) and tables (Table 6.3) and calculating means (the example just below the table above).\nIn most cases, reporting how many missing observations there are and whether they are specifically affecting the study design is sufficient to press on with the analysis with keeping the data as it (with the missing observations in the data frame if they are properly coded as missing). More advanced ways of dealing with the missing observations is to infer the missing values from the other variables (imputation) or to interpolate the missing observations from the other data (Howell, 2008), but the prudent use of either of these techniques requires sophisticated data analysis skills and I would not recommend following the route of filling in the gaps in the data with educated guesses, unless this is absolutely necessary and you know exactly what you are doing." - }, - { - "objectID": "Chapter6.4.html#non-normal-distributions", - "href": "Chapter6.4.html#non-normal-distributions", - "title": "Ch. 6 – Outliers", - "section": "", - "text": "The final issue of data quality relates to the distribution of the data. Most of the operations I have talked about – and will be talking about in the Chapter 7 – assumes that the observations fall into the normal distribution, which is symmetric and governed by mean and variance of a specific kind (σ^2). When the observations have wildly different distribution from this one, skewed in one direction (asymmetrical), or heteroscedastic (where the variation is uneven across the range of a variable), one might need to revert to statistical operations designed to handle non-normal distributions (non-parametric operations) or to try to transform the observations into something closer to normal distribution. There is nothing suspicious or problematic in the act of transforming a variable if it makes the analysis and interpretation easier, but again one must report and justify such operations carefully. We have already come across one common transformation that is often applied to reaction time data, namely logarithmic transform. Other transformations for data that has positive skew is a square root transformation. There is also a technique called power transformation which attempts to find the best transformation that creates the closest match to the normal distribution (also known as Box-Cox technique). A statistical software packages come with routines that can identify violations from normality such as Kolmogorov-Smirnov or Shapiro-Wilk’s tests. Rather than blindly attempting to use a neat transformation to rescue a problematic variable, I would recommend common sense approach where the underlying reason for the non-normality of the distribution is considered. If it is something that typically happens with the measurements (such as reaction time data) and not just a poorly designed measure with a ceiling or floor effects, the transformation is easy to motivate and apply. In other cases, it probably wise to take a deep breath and consult advanced statistics guide, e.g., Howell (2016), or see recommendations at the end of the book." - }, - { - "objectID": "Chapter6.4.html#references", - "href": "Chapter6.4.html#references", - "title": "Ch. 6 – Outliers", - "section": "", - "text": "Eerola, T., & Peltola, H.-R. (2016). Memorable experiences with sad music - reasons, reactions and mechanisms of three types of experiences. PloS ONE, 11(6), e0157444. https://doi.org/http://dx.doi.org/10.1371/journal.pone.0157444\nEerola, T., & Vuoskoski, J. K. (2011). A comparison of the discrete and dimensional models of emotion in music. Psychology of Music, 39(1), 18–49." - }, - { - "objectID": "Chapter6.2.html", - "href": "Chapter6.2.html", - "title": "Ch. 6 – Data Organisation", - "section": "", - "text": "This notebook demonstrates Data Diagnostics and Summaries.\nThis section is based on R template for analysing data from experiments and surveys and justification to follow certain conventions and structures. This document is available as a rendered html at https://tuomaseerola.github.io/R_template/.\n\n\nFor each project, you should establish one repository that you can clone/share using appropriate service (such as Github, Gitlab, or other services, even Dropbox works for collaborations). Name the repository with a compact but informative name (chord_priming or emotion_recognition5) that separates it from the potential other projects.\nWithin this repository, it is a good idea to establish separate folders for separate elements:\n\n/data Data in read-only format (preferably CSV or TSV format)\n/munge All operations to pre-process, recode, or trim data\n/scr All actual R scripts used in the analysis\n/figures Outputs from the scripts (optional if use reporting languages)\n/docs Outputs from the reports (optional if use reporting languages)\n\nIn this repository, contents.R is the file that compiles the full analysis and allows you to reproduce evertyhing in R. Alternatively this file can Rmarkdown file, which is neat analysis and reporting format or even Quarto document, which is more advanced version of this. Nevertheless, the summary document contains all the stages, structures and processes of the project. This is structured to be executed in a coherent order and manner (i.e., loading, transforming, screening the data, and then visualising, applying statistical analyses, creating figures, and tables).\nreport.Rmd will create the report that incorporates comments and the actual analyses and produces either html or pdf file (report.html, report.pdf) in the docs folder.\n\n\nTypically the data is in CSV (comma-separated values) or TSV (tab-separated values) as this is the output from most experiment software solutions and also easily exported from Qualtrics, psychophysiological measures and so on. Sometimes the data might be in Excel format, which can also be read easily into R, and I would advice against large amount of edits in Excel as you would lose the ability to tell what has been changed and why. The rule is that we never edit or manipulate or fix or alter the raw data, no matter what the format is.\nIt is good to store the raw original data with time stamps to the data folder and if you get a newer datasets or more observations, you add a new data file to the data folder with a new timestamp and keep the old one for reference. There are situations when the data has excess observations (pilot participants), typos and other issues, but it is easier and more transparent to handle these in the processing (munging) stage.\n\n\n\nMunging refers to preprocessing the raw data to be useful for the actual analysis. Often this means relabelling the names of the variables (columns) and possibly recoding the observations (as numeric responses or as factors). It is also very typical to pivot the data from wide format (numerous variables in columns) to long format so the key variables contain all manipulations.\n\n\n\nOften you develop the analysis in stages, starting with some form of quality control and moving onto descriptives and then inferential statistics. For enhanced clarity and debugging, it is a good idea to develop these as separate scripts (and also possibly as functions) and store them in the scripts folder. The production of tables and figures can also be explicitly done with separate scripts.\nIn the end, you should have one file (that I call contents.R) that is able to produce the full analysis from reading the data, preprocessing the data, calculating quality control indicators, summarising the data, producing the analyses and creating tables and figures.\n\n\n\n\nProceed to either a fuller explanation of the process at https://tuomaseerola.github.io/R_template/ or check the quarto slides about R_template in action to explore the steps of the analysis process.\n\n\nStart R and open up the contents.R file using your preferred editor. Check that the directory after the first command setwd is pointing the location of your analysis directory and run the first lines of the code:\n\n# contents.R\n## INITIALISE: LOAD LIBRARIES\nlibrary(tidyverse, quietly = TRUE) # Loads the necessary R libraries\n\nIf you get errors at this stage with new installation of R, they might refer to the special libraries that were loaded or installed in libraries.R. This script should install the required libraries for you such as ggplot2, but there might be issues with your particular setup.\n\n\n\nNext, it is time to load the data with a scripts, the first one read_data_survey.R is simply reading an TSV file exported from Qualtrics stored in data folder. I’ve taken the second, descriptive header row out of the data to simply the process, but different datasets will have slightly different structures.\n\nsource('scr/read_data_survey.R') # Produces data frame v \n\nThis should retrieve a data frame into a variable called v in R, which contains a complex data frame. In the next step this raw data will be munged, that is, pre-processed in several ways. Pre-processing can have multiple steps, here these have broken into two:\nFirst operation carries out a long list of renaming the variables (columns in the data, rename_variables.R). This can be avoided if the data has these names already, and it is quite useful to try to embed meaningful variables names to the data collection (experiment or survey or manual coding).\nRecoding instruments (recode_instruments.R) has several steps and it might be useful to study the steps separately. Finally the responses are reshaped into a form called long-form that is better suited for the analyses. This dataframe will be called df.\n\nsource('munge/rename_variables.R') # Renames the columns of the v\nsource('munge/recode_instruments.R')# Produces df (long-form) from v\n\nAfter the munging, it is prudent to check various aspects of the data.\n\nDescriptives such as the N, age, gender are echoed in order to remind us of the dataset properties (demographics_info.R).\nWe can also explore the consistency of the ratings across the people to check whether people agreed on the ratings and generally understood the task (interrater_reliability.R).\nWe also want to look at the distributions of the collected data in order to learn whether one needs to use certain operations (transformations or resort to non-parametric statistics) in the subsequent analyses (visualise.R). This step will also include displaying correlations between the emotion scales which is a useful operation to learn about the overlap of the concepts used in the tasks.\n\n\nsource('scr/demographics_info.R') # Reports N, Age and other details", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Data Organisation" - ] - }, - { - "objectID": "Chapter6.2.html#organisation", - "href": "Chapter6.2.html#organisation", - "title": "Ch. 6 – Data Organisation", - "section": "", - "text": "For each project, you should establish one repository that you can clone/share using appropriate service (such as Github, Gitlab, or other services, even Dropbox works for collaborations). Name the repository with a compact but informative name (chord_priming or emotion_recognition5) that separates it from the potential other projects.\nWithin this repository, it is a good idea to establish separate folders for separate elements:\n\n/data Data in read-only format (preferably CSV or TSV format)\n/munge All operations to pre-process, recode, or trim data\n/scr All actual R scripts used in the analysis\n/figures Outputs from the scripts (optional if use reporting languages)\n/docs Outputs from the reports (optional if use reporting languages)\n\nIn this repository, contents.R is the file that compiles the full analysis and allows you to reproduce evertyhing in R. Alternatively this file can Rmarkdown file, which is neat analysis and reporting format or even Quarto document, which is more advanced version of this. Nevertheless, the summary document contains all the stages, structures and processes of the project. This is structured to be executed in a coherent order and manner (i.e., loading, transforming, screening the data, and then visualising, applying statistical analyses, creating figures, and tables).\nreport.Rmd will create the report that incorporates comments and the actual analyses and produces either html or pdf file (report.html, report.pdf) in the docs folder.\n\n\nTypically the data is in CSV (comma-separated values) or TSV (tab-separated values) as this is the output from most experiment software solutions and also easily exported from Qualtrics, psychophysiological measures and so on. Sometimes the data might be in Excel format, which can also be read easily into R, and I would advice against large amount of edits in Excel as you would lose the ability to tell what has been changed and why. The rule is that we never edit or manipulate or fix or alter the raw data, no matter what the format is.\nIt is good to store the raw original data with time stamps to the data folder and if you get a newer datasets or more observations, you add a new data file to the data folder with a new timestamp and keep the old one for reference. There are situations when the data has excess observations (pilot participants), typos and other issues, but it is easier and more transparent to handle these in the processing (munging) stage.\n\n\n\nMunging refers to preprocessing the raw data to be useful for the actual analysis. Often this means relabelling the names of the variables (columns) and possibly recoding the observations (as numeric responses or as factors). It is also very typical to pivot the data from wide format (numerous variables in columns) to long format so the key variables contain all manipulations.\n\n\n\nOften you develop the analysis in stages, starting with some form of quality control and moving onto descriptives and then inferential statistics. For enhanced clarity and debugging, it is a good idea to develop these as separate scripts (and also possibly as functions) and store them in the scripts folder. The production of tables and figures can also be explicitly done with separate scripts.\nIn the end, you should have one file (that I call contents.R) that is able to produce the full analysis from reading the data, preprocessing the data, calculating quality control indicators, summarising the data, producing the analyses and creating tables and figures.", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Data Organisation" - ] - }, - { - "objectID": "Chapter6.2.html#example-repository-and-template-analysis", - "href": "Chapter6.2.html#example-repository-and-template-analysis", - "title": "Ch. 6 – Data Organisation", - "section": "", - "text": "Proceed to either a fuller explanation of the process at https://tuomaseerola.github.io/R_template/ or check the quarto slides about R_template in action to explore the steps of the analysis process.\n\n\nStart R and open up the contents.R file using your preferred editor. Check that the directory after the first command setwd is pointing the location of your analysis directory and run the first lines of the code:\n\n# contents.R\n## INITIALISE: LOAD LIBRARIES\nlibrary(tidyverse, quietly = TRUE) # Loads the necessary R libraries\n\nIf you get errors at this stage with new installation of R, they might refer to the special libraries that were loaded or installed in libraries.R. This script should install the required libraries for you such as ggplot2, but there might be issues with your particular setup.\n\n\n\nNext, it is time to load the data with a scripts, the first one read_data_survey.R is simply reading an TSV file exported from Qualtrics stored in data folder. I’ve taken the second, descriptive header row out of the data to simply the process, but different datasets will have slightly different structures.\n\nsource('scr/read_data_survey.R') # Produces data frame v \n\nThis should retrieve a data frame into a variable called v in R, which contains a complex data frame. In the next step this raw data will be munged, that is, pre-processed in several ways. Pre-processing can have multiple steps, here these have broken into two:\nFirst operation carries out a long list of renaming the variables (columns in the data, rename_variables.R). This can be avoided if the data has these names already, and it is quite useful to try to embed meaningful variables names to the data collection (experiment or survey or manual coding).\nRecoding instruments (recode_instruments.R) has several steps and it might be useful to study the steps separately. Finally the responses are reshaped into a form called long-form that is better suited for the analyses. This dataframe will be called df.\n\nsource('munge/rename_variables.R') # Renames the columns of the v\nsource('munge/recode_instruments.R')# Produces df (long-form) from v\n\nAfter the munging, it is prudent to check various aspects of the data.\n\nDescriptives such as the N, age, gender are echoed in order to remind us of the dataset properties (demographics_info.R).\nWe can also explore the consistency of the ratings across the people to check whether people agreed on the ratings and generally understood the task (interrater_reliability.R).\nWe also want to look at the distributions of the collected data in order to learn whether one needs to use certain operations (transformations or resort to non-parametric statistics) in the subsequent analyses (visualise.R). This step will also include displaying correlations between the emotion scales which is a useful operation to learn about the overlap of the concepts used in the tasks.\n\n\nsource('scr/demographics_info.R') # Reports N, Age and other details", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Data Organisation" - ] - }, - { - "objectID": "Chapter3.html", - "href": "Chapter3.html", - "title": "Ch. 3 – Historic profiles", - "section": "", - "text": "Load or install necessary R packages.\n\nif (!require(devtools)) install.packages(\"devtools\",\n repos = \"http://cran.us.r-project.org\")\ndevtools::install_github(\"tuomaseerola/inconMore\")\n\n\nlibrary(inconMore)\nlibrary(ggplot2, quietly = TRUE)\nlibrary(tidyverse, quietly = TRUE)\noptions(repr.plot.width = 6, repr.plot.height = 4) # Default plot size for Colab\n\n\n\n\nFrequency of intervals in Bach sinfonias (bars) and ratings of consonance of the intervals (lines, from Bowling, Purves & Gill, 2018). Interval frequencies recreated from Huron 2001.\n\nIV<-c(\"P1\",\"m2\",\"M2\",\"m3\",\"M3\",\"P4\",\"TT\",\"P5\",\"m6\",\"M6\",\"m7\",\"M7\",\"P8\")\nFrequency <- c(15,7,26,87,58,50,35,52,65,88,32,3,23)/100 # approx. from Huron 2001, p. 20\nlibrary(inconMore) # Let's use more recent data\na <- inconMore::bowl18 # Bowling et al., 2018 ratings for 12 intervals\nConsonance <- scales::rescale(c(NA,a$rating[1:12]),to = c(0,1)) # No unison\ndf <- data.frame(IV,Consonance,Frequency)\ndf$Nro <- 1:13\n\nPlot both.\n\ng1 <- ggplot(df) +\n geom_bar(aes(x=Nro, y=Frequency),stat=\"identity\", fill=\"gray40\",colour='black')+\n geom_line(aes(x=Nro, y=Consonance),stat=\"identity\", group=1,linewidth=1.25,colour=\"gray80\",alpha=0.80)+\n geom_point(aes(x=Nro, y=Consonance),stat=\"identity\", group=1,size=3,alpha=0.80)+\n theme_bw()+\n xlab('Interval')+\n ylab('Normalized Freq./Consonance')+\n scale_x_continuous(breaks = seq(1,13,by=1),labels = IV,expand = c(0.01,0.01))+\n scale_y_continuous(breaks = seq(0,1,by=0.25),expand = c(0.01,0.01),limits = c(0,1))\ng1\n\n\n\n\n\n\n\n\n\n\n\n\nBowling, D. L., Purves, D., & Gill, K. Z. (2018). Vocal similarity predicts the relative attraction of musical chords. Proceedings of the National Academy of Sciences, 115(1), 216–221.\nHuron, D. (2001). Tone and voice: A derivation of the rules of voice-leading from perceptual principles. Music Perception, 19(1), 1–64.", - "crumbs": [ - "Background", - "Ch. 3 – Historic profiles" - ] - }, - { - "objectID": "Chapter3.html#load-libraries", - "href": "Chapter3.html#load-libraries", - "title": "Ch. 3 – Historic profiles", - "section": "", - "text": "Load or install necessary R packages.\n\nif (!require(devtools)) install.packages(\"devtools\",\n repos = \"http://cran.us.r-project.org\")\ndevtools::install_github(\"tuomaseerola/inconMore\")\n\n\nlibrary(inconMore)\nlibrary(ggplot2, quietly = TRUE)\nlibrary(tidyverse, quietly = TRUE)\noptions(repr.plot.width = 6, repr.plot.height = 4) # Default plot size for Colab", - "crumbs": [ - "Background", - "Ch. 3 – Historic profiles" - ] - }, - { - "objectID": "Chapter3.html#code-3.1", - "href": "Chapter3.html#code-3.1", - "title": "Ch. 3 – Historic profiles", - "section": "", - "text": "Frequency of intervals in Bach sinfonias (bars) and ratings of consonance of the intervals (lines, from Bowling, Purves & Gill, 2018). Interval frequencies recreated from Huron 2001.\n\nIV<-c(\"P1\",\"m2\",\"M2\",\"m3\",\"M3\",\"P4\",\"TT\",\"P5\",\"m6\",\"M6\",\"m7\",\"M7\",\"P8\")\nFrequency <- c(15,7,26,87,58,50,35,52,65,88,32,3,23)/100 # approx. from Huron 2001, p. 20\nlibrary(inconMore) # Let's use more recent data\na <- inconMore::bowl18 # Bowling et al., 2018 ratings for 12 intervals\nConsonance <- scales::rescale(c(NA,a$rating[1:12]),to = c(0,1)) # No unison\ndf <- data.frame(IV,Consonance,Frequency)\ndf$Nro <- 1:13\n\nPlot both.\n\ng1 <- ggplot(df) +\n geom_bar(aes(x=Nro, y=Frequency),stat=\"identity\", fill=\"gray40\",colour='black')+\n geom_line(aes(x=Nro, y=Consonance),stat=\"identity\", group=1,linewidth=1.25,colour=\"gray80\",alpha=0.80)+\n geom_point(aes(x=Nro, y=Consonance),stat=\"identity\", group=1,size=3,alpha=0.80)+\n theme_bw()+\n xlab('Interval')+\n ylab('Normalized Freq./Consonance')+\n scale_x_continuous(breaks = seq(1,13,by=1),labels = IV,expand = c(0.01,0.01))+\n scale_y_continuous(breaks = seq(0,1,by=0.25),expand = c(0.01,0.01),limits = c(0,1))\ng1", - "crumbs": [ - "Background", - "Ch. 3 – Historic profiles" - ] - }, - { - "objectID": "Chapter3.html#references", - "href": "Chapter3.html#references", - "title": "Ch. 3 – Historic profiles", - "section": "", - "text": "Bowling, D. L., Purves, D., & Gill, K. Z. (2018). Vocal similarity predicts the relative attraction of musical chords. Proceedings of the National Academy of Sciences, 115(1), 216–221.\nHuron, D. (2001). Tone and voice: A derivation of the rules of voice-leading from perceptual principles. Music Perception, 19(1), 1–64.", - "crumbs": [ - "Background", - "Ch. 3 – Historic profiles" - ] - }, - { - "objectID": "Chapter1.html#preliminaries", - "href": "Chapter1.html#preliminaries", - "title": "Ch. 1 – Notebook basics", - "section": "Preliminaries", - "text": "Preliminaries\nTo install the MusicScienceData package that contains several example datasets used in this book, run the following command.\n\n#if (!require(devtools)) install.packages(\"devtools\",quiet=TRUE)\ndevtools::install_github(\"tuomaseerola/MusicScienceData\",quiet=TRUE)", - "crumbs": [ - "Background", - "Ch. 1 – Notebook basics" - ] - }, - { - "objectID": "Chapter1.html#code-1.1", - "href": "Chapter1.html#code-1.1", - "title": "Ch. 1 – Notebook basics", - "section": "Code 1.1", - "text": "Code 1.1\nThis is the first R code example, which demonstrates loading package that contains datasets, choosing one dataset, and then calculating correlation between two rated concepts (energy and tension).\n\n# Code 1.1\nlibrary(MusicScienceData) # loads library w data\ndata <- MusicScienceData::soundtrack # pick data\ncor.test(data$Energy, # calc. correlation\n data$Tension)\n\n\n Pearson's product-moment correlation\n\ndata: data$Energy and data$Tension\nt = 7.3396, df = 108, p-value = 4.222e-11\nalternative hypothesis: true correlation is not equal to 0\n95 percent confidence interval:\n 0.4368271 0.6896336\nsample estimates:\n cor \n0.576884", - "crumbs": [ - "Background", - "Ch. 1 – Notebook basics" - ] - }, - { - "objectID": "Chapter4.html", - "href": "Chapter4.html", - "title": "Ch. 4 – Correlations", - "section": "", - "text": "This data illustrates different correlation coefficients by taking the inspiration from a study by Maruskin et al. (2012), who collected self-reports related to chills. As we don’t have the access to the data, the correlations are created by simulating a multivariate normal distribution (see generate_data.R) just to illustrate the way the pattern of correlation changes.\n\n#### Libraries ------------------------------------------------\nlibrary(MASS)\nlibrary(ggplot2)\noptions(repr.plot.width = 6, repr.plot.height = 6) # Default plot size for Colab\n\n#### define a function -------------------------------------\ngenerate_data <- function(N=NULL,r=NULL,m_x=NULL,range_x=NULL,m_y=NULL,range_y=NULL){\n # Generate data\n out <- as.data.frame(mvrnorm(N, mu = c(0,0), \n Sigma = matrix(c(1,r,r,1), ncol = 2), \n empirical = TRUE))\n # Calculations to create multiplication and addition factors for mean and range of X and Y\n mx.factor <- range_x/6\n addx.factor <- m_x - (mx.factor*3)\n my.factor <- range_y/6\n addy.factor <- m_y - (my.factor*3)\n \n # Adjust so that values are positive and include factors to match desired means and ranges\n out$V1.s <- (out$V1 - min(out$V1))*mx.factor + addx.factor\n out$V2.s <- (out$V2 - min(out$V2))*my.factor + addy.factor\n return<-out\n}\n\n\n#### Correlations we want to simulate ------------------\n\nN <- 362\nr <- 0.32 # Desired correlation\nd.mx <- 10 # Desired mean of X\nd.rangex <- 17 # Desired range of X\nd.my <- 10 # Desired mean of Y\nd.rangey <- 17 # Desired range of Y\n\n\n#### Coldshivers and negative emotionality --------------\nd1 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey)\n\n# Plot scatterplot along with regression line\ng1 <- ggplot(d1, aes(x=V1.s, y=V2.s)) + \n geom_point(colour='gray25') + \n xlab('Coldshivers')+\n ylab('Negative emotionality')+\n annotate(\"text\",x = 3.0, y=16,label = \"italic(r)==0.32\", parse=TRUE,size=4.5)+\n geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+\n scale_x_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n scale_y_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n coord_fixed()+\n theme_bw()\nprint(g1)\n\n\n\n\n\n\n\n\n\n#### Coldshivers and Goosetingles --------------\nset.seed(101)\nr <- 0.65 # Desired correlation\nd2 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey)\ng2 <- ggplot(d2, aes(x=V1.s, y=V2.s)) + \n geom_point(colour='gray25') + \n xlab('Goosetingles')+\n ylab('Coldshivers')+\n annotate(\"text\",x = 3.0, y=16,label = \"italic(r)==0.65\", parse=TRUE,size=4.5)+\n geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+\n scale_x_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n scale_y_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n coord_fixed()+\n theme_bw()\nprint(g2)\n\n\n\n\n\n\n\n\n\n#### Overall chills and Goosetingles --------------\nset.seed(101)\nr <- 0.91\nd3 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey)\n# Plot scatterplot along with regression line\ng3 <- ggplot(d3, aes(x=V1.s, y=V2.s)) + \n geom_point(colour='gray25') + \n xlab('Overall chills')+\n ylab('Goosetingles')+\n annotate(\"text\",x = 3.0, y=16,label = \"italic(r)==0.91\", parse=TRUE,size=4.5)+\n geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+\n scale_x_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n scale_y_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n coord_fixed()+\n theme_bw()\nprint(g3)\n\nWarning: Removed 2 rows containing missing values or values outside the scale range\n(`geom_smooth()`).\n\n\n\n\n\n\n\n\n\n\n#### Neuroticism and Goosetingles --------------\nset.seed(101)\nr <- 0.02\nd4 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey)\n# Plot scatterplot along with regression line\ng4 <- ggplot(d4, aes(x=V1.s, y=V2.s)) + \n geom_point(colour='gray25') + \n xlab('Neuroticism')+\n ylab('Goosetingles')+\n annotate(\"text\",x = 3.0, y=16,label = \"italic(r)==0.02\", parse=TRUE,size=4.5)+\n geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+\n scale_x_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n scale_y_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n coord_fixed()+\n theme_bw()\nprint(g4)\n\n\n\n\n\n\n\n\n\n\n\nThis recreates forest plot with a small sample of studies (10) from 105 experiments analysed by Cooper (2020).\n\n#install.packages(\"metafor\",repos='http://cran.us.r-project.org',quiet=TRUE)\nlibrary(metafor,quiet=TRUE)\n\n\nLoading the 'metafor' package (version 4.6-0). For an\nintroduction to the package please type: help(metafor)\n\n# 10 example studies from Cooper 2020\ndat <- data.frame(\n id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),\n yi = c(-0.71, -0.45, -0.25, 0.03, 0.17, 0.20, 0.36, 0.56, 0.72, 0.96),\n vi = c(0.143, 0.203, 0.039, 0.057, 0.107, 0.059, 0.046, 0.109, 0.12, 0.050),\n author = c(\"Mehr et al.\", \"Bhide et al.\", \"Gromko\", \"Rickar et al.\", \"Hanson et al.\", \"Schellenberg\", \"Ho et al.\", \"Bilhartz et al.\", \"Dege et al.\", \"Flaugnacco et al.\"),\n year = c(2013, 2013, 2005, 2012, 2003, 2004, 2003, 2000, 2011, 2015))\n\nres.ee <- rma(yi, vi, data=dat, method=\"EE\")\n\nforest(res.ee, header=c(\"Experiment\", \"g [95% CR]\"), top=2, xlab=\"Effect Size and Credibility Intervals\",slab=paste(author, year, sep=\", \"),cex=0.9)\n\n\n\n\n\n\n\n\n\n\n\n\nCooper, P. K. (2020). It’s all in your head: A meta-analysis on the effects of music training on cognitive measures in schoolchildren. International Journal of Music Education, 38(3), 321–336.\nMaruskin, L. A., Thrash, T. M., & Elliot, A. J. (2012). The chills as a psychological construct: Content universe, factor structure, affective composition, elicitors, trait antecedents, and consequences. Journal of Personality and Social Psychology, 103(1), 135–157.", - "crumbs": [ - "Background", - "Ch. 4 – Correlations" - ] - }, - { - "objectID": "Chapter4.html#figure-4.1-illustration-of-correlations", - "href": "Chapter4.html#figure-4.1-illustration-of-correlations", - "title": "Ch. 4 – Correlations", - "section": "", - "text": "This data illustrates different correlation coefficients by taking the inspiration from a study by Maruskin et al. (2012), who collected self-reports related to chills. As we don’t have the access to the data, the correlations are created by simulating a multivariate normal distribution (see generate_data.R) just to illustrate the way the pattern of correlation changes.\n\n#### Libraries ------------------------------------------------\nlibrary(MASS)\nlibrary(ggplot2)\noptions(repr.plot.width = 6, repr.plot.height = 6) # Default plot size for Colab\n\n#### define a function -------------------------------------\ngenerate_data <- function(N=NULL,r=NULL,m_x=NULL,range_x=NULL,m_y=NULL,range_y=NULL){\n # Generate data\n out <- as.data.frame(mvrnorm(N, mu = c(0,0), \n Sigma = matrix(c(1,r,r,1), ncol = 2), \n empirical = TRUE))\n # Calculations to create multiplication and addition factors for mean and range of X and Y\n mx.factor <- range_x/6\n addx.factor <- m_x - (mx.factor*3)\n my.factor <- range_y/6\n addy.factor <- m_y - (my.factor*3)\n \n # Adjust so that values are positive and include factors to match desired means and ranges\n out$V1.s <- (out$V1 - min(out$V1))*mx.factor + addx.factor\n out$V2.s <- (out$V2 - min(out$V2))*my.factor + addy.factor\n return<-out\n}\n\n\n#### Correlations we want to simulate ------------------\n\nN <- 362\nr <- 0.32 # Desired correlation\nd.mx <- 10 # Desired mean of X\nd.rangex <- 17 # Desired range of X\nd.my <- 10 # Desired mean of Y\nd.rangey <- 17 # Desired range of Y\n\n\n#### Coldshivers and negative emotionality --------------\nd1 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey)\n\n# Plot scatterplot along with regression line\ng1 <- ggplot(d1, aes(x=V1.s, y=V2.s)) + \n geom_point(colour='gray25') + \n xlab('Coldshivers')+\n ylab('Negative emotionality')+\n annotate(\"text\",x = 3.0, y=16,label = \"italic(r)==0.32\", parse=TRUE,size=4.5)+\n geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+\n scale_x_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n scale_y_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n coord_fixed()+\n theme_bw()\nprint(g1)\n\n\n\n\n\n\n\n\n\n#### Coldshivers and Goosetingles --------------\nset.seed(101)\nr <- 0.65 # Desired correlation\nd2 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey)\ng2 <- ggplot(d2, aes(x=V1.s, y=V2.s)) + \n geom_point(colour='gray25') + \n xlab('Goosetingles')+\n ylab('Coldshivers')+\n annotate(\"text\",x = 3.0, y=16,label = \"italic(r)==0.65\", parse=TRUE,size=4.5)+\n geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+\n scale_x_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n scale_y_continuous(limits = c(1,22),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n coord_fixed()+\n theme_bw()\nprint(g2)\n\n\n\n\n\n\n\n\n\n#### Overall chills and Goosetingles --------------\nset.seed(101)\nr <- 0.91\nd3 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey)\n# Plot scatterplot along with regression line\ng3 <- ggplot(d3, aes(x=V1.s, y=V2.s)) + \n geom_point(colour='gray25') + \n xlab('Overall chills')+\n ylab('Goosetingles')+\n annotate(\"text\",x = 3.0, y=16,label = \"italic(r)==0.91\", parse=TRUE,size=4.5)+\n geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+\n scale_x_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n scale_y_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n coord_fixed()+\n theme_bw()\nprint(g3)\n\nWarning: Removed 2 rows containing missing values or values outside the scale range\n(`geom_smooth()`).\n\n\n\n\n\n\n\n\n\n\n#### Neuroticism and Goosetingles --------------\nset.seed(101)\nr <- 0.02\nd4 <- generate_data(N, r, d.mx, d.rangex, d.my, d.rangey)\n# Plot scatterplot along with regression line\ng4 <- ggplot(d4, aes(x=V1.s, y=V2.s)) + \n geom_point(colour='gray25') + \n xlab('Neuroticism')+\n ylab('Goosetingles')+\n annotate(\"text\",x = 3.0, y=16,label = \"italic(r)==0.02\", parse=TRUE,size=4.5)+\n geom_smooth(formula = y ~ x, method='lm',color='gray50',fullrange=TRUE)+\n scale_x_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n scale_y_continuous(limits = c(1,18),expand = c(0.00,0.00),breaks = seq(0,20,by=5))+\n coord_fixed()+\n theme_bw()\nprint(g4)", - "crumbs": [ - "Background", - "Ch. 4 – Correlations" - ] - }, - { - "objectID": "Chapter4.html#example-4.2-meta-analysis", - "href": "Chapter4.html#example-4.2-meta-analysis", - "title": "Ch. 4 – Correlations", - "section": "", - "text": "This recreates forest plot with a small sample of studies (10) from 105 experiments analysed by Cooper (2020).\n\n#install.packages(\"metafor\",repos='http://cran.us.r-project.org',quiet=TRUE)\nlibrary(metafor,quiet=TRUE)\n\n\nLoading the 'metafor' package (version 4.6-0). For an\nintroduction to the package please type: help(metafor)\n\n# 10 example studies from Cooper 2020\ndat <- data.frame(\n id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),\n yi = c(-0.71, -0.45, -0.25, 0.03, 0.17, 0.20, 0.36, 0.56, 0.72, 0.96),\n vi = c(0.143, 0.203, 0.039, 0.057, 0.107, 0.059, 0.046, 0.109, 0.12, 0.050),\n author = c(\"Mehr et al.\", \"Bhide et al.\", \"Gromko\", \"Rickar et al.\", \"Hanson et al.\", \"Schellenberg\", \"Ho et al.\", \"Bilhartz et al.\", \"Dege et al.\", \"Flaugnacco et al.\"),\n year = c(2013, 2013, 2005, 2012, 2003, 2004, 2003, 2000, 2011, 2015))\n\nres.ee <- rma(yi, vi, data=dat, method=\"EE\")\n\nforest(res.ee, header=c(\"Experiment\", \"g [95% CR]\"), top=2, xlab=\"Effect Size and Credibility Intervals\",slab=paste(author, year, sep=\", \"),cex=0.9)", - "crumbs": [ - "Background", - "Ch. 4 – Correlations" - ] - }, - { - "objectID": "Chapter4.html#references", - "href": "Chapter4.html#references", - "title": "Ch. 4 – Correlations", - "section": "", - "text": "Cooper, P. K. (2020). It’s all in your head: A meta-analysis on the effects of music training on cognitive measures in schoolchildren. International Journal of Music Education, 38(3), 321–336.\nMaruskin, L. A., Thrash, T. M., & Elliot, A. J. (2012). The chills as a psychological construct: Content universe, factor structure, affective composition, elicitors, trait antecedents, and consequences. Journal of Personality and Social Psychology, 103(1), 135–157.", - "crumbs": [ - "Background", - "Ch. 4 – Correlations" - ] - }, - { - "objectID": "Chapter6.1.html", - "href": "Chapter6.1.html", - "title": "Ch. 6 – Using R", - "section": "", - "text": "This notebook demonstrates how to use R.\n\n\nThere are plenty of tools for data analysis and statistics available. I will only consider those that are open source and free to use as this is the only way to guarantee that people are able to access the tools. Sadly some fine tools such as SPSS, JMP, Minitab, SAS, or Stata don’t fulfil these principles. R and RStudio, JASP, Jamovi and Python (made better with libraries such as scipy) and some others are free and open source software that have become common research tools in empirical sciences. In addition to being free and easily available, they have excellent capacities to share the analysis workflow and some have tools to ensure replicability over years and different versions. I will focus on R in the statistical analysis and explain why I think this is a good option for analysis empirical data.\n\n\n\n\nR is a versatile environment for analysing any data. It is interactive and suits well for casual exploration of data that comes in many different forms (numbers, text strings) and different shapes (long and wide data). What is even more important in R is that it is fundamentally based on scripts that serve as a blueprint of the analysis you have done and allows you, or anyone else, to replicate your analyses simply by running the same script. R is also:\n\nFree and open source.\nIt works well on all operating systems: Windows, Mac Os, UNIX/Linux.\nthe Community of R users is broad and active and for this reason the resources for learning and asking questions are impressive and well-developed (see Appendix).\nIt already has several music-specific tools (e.g., incon,gm,hrep libraries) and datasets (MusicScienceData).\nIt is excellent for any statistical analysis.\n\nHere is a quick tutorial for using R, but I would also recommend Getting Started with R and The Basics guides by the company who brings us RStudio (Posit).\n\n\nThe basic interface in R is called R console that will carry out commands as you type them. There are several ways to gain access to an R console. One way is to simply start R on your computer. The console looks something like this:\n\nAs an easy example, try using the console in R to calculate the duration in seconds of an eight note (or a quaver in the U.K.) in common time signature (4/4) when the tempo is quarter note = 120 beats per minute (BPM):\n\n0.5 * (60 / 120)\n\n[1] 0.25\n\n\nThe code boxes show R code typed into the R console. Anything followed by hashtag (#) is a comment and will not be interpreted by R. Here we see that one eight note is 0.25 seconds (or 250 ms if you want) in this tempo. We got this by remembering that one quarter note lasts for 60 / tempo in BPM seconds. And since eighth note is a half of a quarter note, we expressed this as 0.5 and multiplied (* ) this with the tempo expression (60/120). You would get the same result with 1/2 * 60/120. Or you could define the duration and tempo as an equation:\n\nnote <- 0.5\ntempo <- 120\ndur_in_seconds <- note * 60 / tempo # Calculate\nprint(dur_in_seconds * 1000) # Convert to milliseconds\n\n[1] 250\n\n\nThis example demonstrates that there are several ways of doing calculations in R. In the last example we tried to be clear about the operations and defined variables (note and tempo) and used them to calculate the duration of the note and finally to transform the value into milliseconds.\n\n\n\nRStudio is a sleek and interactive integrated development environment (IDE) that offers a number of great features to use R efficiently as it offers the console, and panes for folders, help files, an index of what it is the memory, and a separate window for plots.\n\nThere are three main panes in RStudio. The left pane shows the R console. On the right, the top pane includes tabs such as Environment and History, while the bottom pane shows five tabs: File, Plots, Packages, Help, and Viewer.\n\n\nScripts are a collection of commands that can be edited with a text editor and executed in R.\nTo start a new script, you can click on File, then New File, then R Script. This starts a new pane on the left and it is here where you can start writing your script. This will contain your analysis. By documenting the analysis in the script, you, or anyone else, can replicate the steps in the script very easily and get the same results afterwards just by running the script.\n\n\n\nRStudio helps to make the code more readable and easy to use by making different parts of the code in different colour and the indentation is automatically modified as you write the code. It will also close the parenthesis and suggest the variable names as you go along, and warn if a line of code contains an error. If we look at our new script, we first need to give it a name. A good practice is to name scripts in a descriptive fashion, use lower case letters, avoid spaces, only to use hyphens to separate words, and then followed by the suffix .R. We will call this script code6.1.R. This one will grab data from a published dataset of emotions expressed by film soundtracks and plot the rated mean valence and energy of each track.\nSee Chapter7.R R code at https://tuomaseerola.github.io/emr/ site.\n\n# code6.1.R\n#library(devtools) # add libraries from github\n#devtools::install_github(\"tuomaseerola/MusicScienceData\")\nlibrary(MusicScienceData) # loads library w data\nlibrary(ggplot2) # loads library\nggplot(MusicScienceData::soundtrack, # defines the data\n aes(x=Valence,\n y=Energy,\n colour=TARGET_EMOTION))+ # variables\n geom_point(size=4)+ # plots points\n theme_MusicScience() # applies style\n\n\n\n\n\n\n\n\nLet’s review this script. The first four lines of code in an R script are used to load the libraries that are needed. Here we load the data from soundtrack study, which is part of the MusicScienceData library. As an example, we will make a graph showing the position of 110 music excerpts in terms of their rated Valence and Energy. Once you have copied or typed the code above, you can run it by executing the code by clicking on the Run button on the upper right side of the editing pane.\nOnce you run the code, you will see it appear in the R console and, in this case, the generated plot appears in the plots console. The plot console has a useful interface that permits you to click back and forward across different plots, zoom in to the plot, or save the plots as files. I recommend learning to save the plots as graphics directly in the script so that it is easier to control their size (width and height, and resolution) and type (pdf, tiff, or png), and to replicate the identical plots afterwards.\n\nTo run one line at a time instead of the entire script, you can use Control-Enter on Windows and command-return on the Mac Os.\n\n\n\n\nR has thousands of libraries available that offer data, libraries and functions. Not everybody needs all these functionalities, so R offers these as packages that are easy to install within R. We will need a few of these so let’s see how this works.\nIn RStudio, the Tools tab contains an option to install packages. We can load a package into our R sessions using the library function:\n\nlibrary(ggplot2)\n\nIf this command gives you an error, you probably do not have this fabulous plotting library installed in your R yet. This can be fixed by typing:\n\ninstall.packages(\"ggplot2\")\n\nAfter installation, you still need to make the library active in your session by invoking the library command described earlier. Different examples in this book will utilise different libraries. We only install the libraries once, because they remain installed and only need to be loaded with the library command.\n\n\n\n\n\n\nNote\n\n\n\nThe library where many examples come from, MusicScienceData, needs to be installed with the following code. This is because I have only released that library at Github for easier development, and R is not able to find it unless you have this extra package called devtools installed.\n\n\n\ninstall.packages(\"devtools\") # add libraries from github\ndevtools::install_github(\"tuomaseerola/MusicScienceData\")", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Using R" - ] - }, - { - "objectID": "Chapter6.1.html#using-r-and-rstudio", - "href": "Chapter6.1.html#using-r-and-rstudio", - "title": "Ch. 6 – Using R", - "section": "", - "text": "There are plenty of tools for data analysis and statistics available. I will only consider those that are open source and free to use as this is the only way to guarantee that people are able to access the tools. Sadly some fine tools such as SPSS, JMP, Minitab, SAS, or Stata don’t fulfil these principles. R and RStudio, JASP, Jamovi and Python (made better with libraries such as scipy) and some others are free and open source software that have become common research tools in empirical sciences. In addition to being free and easily available, they have excellent capacities to share the analysis workflow and some have tools to ensure replicability over years and different versions. I will focus on R in the statistical analysis and explain why I think this is a good option for analysis empirical data.", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Using R" - ] - }, - { - "objectID": "Chapter6.1.html#why-use-r", - "href": "Chapter6.1.html#why-use-r", - "title": "Ch. 6 – Using R", - "section": "", - "text": "R is a versatile environment for analysing any data. It is interactive and suits well for casual exploration of data that comes in many different forms (numbers, text strings) and different shapes (long and wide data). What is even more important in R is that it is fundamentally based on scripts that serve as a blueprint of the analysis you have done and allows you, or anyone else, to replicate your analyses simply by running the same script. R is also:\n\nFree and open source.\nIt works well on all operating systems: Windows, Mac Os, UNIX/Linux.\nthe Community of R users is broad and active and for this reason the resources for learning and asking questions are impressive and well-developed (see Appendix).\nIt already has several music-specific tools (e.g., incon,gm,hrep libraries) and datasets (MusicScienceData).\nIt is excellent for any statistical analysis.\n\nHere is a quick tutorial for using R, but I would also recommend Getting Started with R and The Basics guides by the company who brings us RStudio (Posit).\n\n\nThe basic interface in R is called R console that will carry out commands as you type them. There are several ways to gain access to an R console. One way is to simply start R on your computer. The console looks something like this:\n\nAs an easy example, try using the console in R to calculate the duration in seconds of an eight note (or a quaver in the U.K.) in common time signature (4/4) when the tempo is quarter note = 120 beats per minute (BPM):\n\n0.5 * (60 / 120)\n\n[1] 0.25\n\n\nThe code boxes show R code typed into the R console. Anything followed by hashtag (#) is a comment and will not be interpreted by R. Here we see that one eight note is 0.25 seconds (or 250 ms if you want) in this tempo. We got this by remembering that one quarter note lasts for 60 / tempo in BPM seconds. And since eighth note is a half of a quarter note, we expressed this as 0.5 and multiplied (* ) this with the tempo expression (60/120). You would get the same result with 1/2 * 60/120. Or you could define the duration and tempo as an equation:\n\nnote <- 0.5\ntempo <- 120\ndur_in_seconds <- note * 60 / tempo # Calculate\nprint(dur_in_seconds * 1000) # Convert to milliseconds\n\n[1] 250\n\n\nThis example demonstrates that there are several ways of doing calculations in R. In the last example we tried to be clear about the operations and defined variables (note and tempo) and used them to calculate the duration of the note and finally to transform the value into milliseconds.\n\n\n\nRStudio is a sleek and interactive integrated development environment (IDE) that offers a number of great features to use R efficiently as it offers the console, and panes for folders, help files, an index of what it is the memory, and a separate window for plots.\n\nThere are three main panes in RStudio. The left pane shows the R console. On the right, the top pane includes tabs such as Environment and History, while the bottom pane shows five tabs: File, Plots, Packages, Help, and Viewer.\n\n\nScripts are a collection of commands that can be edited with a text editor and executed in R.\nTo start a new script, you can click on File, then New File, then R Script. This starts a new pane on the left and it is here where you can start writing your script. This will contain your analysis. By documenting the analysis in the script, you, or anyone else, can replicate the steps in the script very easily and get the same results afterwards just by running the script.\n\n\n\nRStudio helps to make the code more readable and easy to use by making different parts of the code in different colour and the indentation is automatically modified as you write the code. It will also close the parenthesis and suggest the variable names as you go along, and warn if a line of code contains an error. If we look at our new script, we first need to give it a name. A good practice is to name scripts in a descriptive fashion, use lower case letters, avoid spaces, only to use hyphens to separate words, and then followed by the suffix .R. We will call this script code6.1.R. This one will grab data from a published dataset of emotions expressed by film soundtracks and plot the rated mean valence and energy of each track.\nSee Chapter7.R R code at https://tuomaseerola.github.io/emr/ site.\n\n# code6.1.R\n#library(devtools) # add libraries from github\n#devtools::install_github(\"tuomaseerola/MusicScienceData\")\nlibrary(MusicScienceData) # loads library w data\nlibrary(ggplot2) # loads library\nggplot(MusicScienceData::soundtrack, # defines the data\n aes(x=Valence,\n y=Energy,\n colour=TARGET_EMOTION))+ # variables\n geom_point(size=4)+ # plots points\n theme_MusicScience() # applies style\n\n\n\n\n\n\n\n\nLet’s review this script. The first four lines of code in an R script are used to load the libraries that are needed. Here we load the data from soundtrack study, which is part of the MusicScienceData library. As an example, we will make a graph showing the position of 110 music excerpts in terms of their rated Valence and Energy. Once you have copied or typed the code above, you can run it by executing the code by clicking on the Run button on the upper right side of the editing pane.\nOnce you run the code, you will see it appear in the R console and, in this case, the generated plot appears in the plots console. The plot console has a useful interface that permits you to click back and forward across different plots, zoom in to the plot, or save the plots as files. I recommend learning to save the plots as graphics directly in the script so that it is easier to control their size (width and height, and resolution) and type (pdf, tiff, or png), and to replicate the identical plots afterwards.\n\nTo run one line at a time instead of the entire script, you can use Control-Enter on Windows and command-return on the Mac Os.\n\n\n\n\nR has thousands of libraries available that offer data, libraries and functions. Not everybody needs all these functionalities, so R offers these as packages that are easy to install within R. We will need a few of these so let’s see how this works.\nIn RStudio, the Tools tab contains an option to install packages. We can load a package into our R sessions using the library function:\n\nlibrary(ggplot2)\n\nIf this command gives you an error, you probably do not have this fabulous plotting library installed in your R yet. This can be fixed by typing:\n\ninstall.packages(\"ggplot2\")\n\nAfter installation, you still need to make the library active in your session by invoking the library command described earlier. Different examples in this book will utilise different libraries. We only install the libraries once, because they remain installed and only need to be loaded with the library command.\n\n\n\n\n\n\nNote\n\n\n\nThe library where many examples come from, MusicScienceData, needs to be installed with the following code. This is because I have only released that library at Github for easier development, and R is not able to find it unless you have this extra package called devtools installed.\n\n\n\ninstall.packages(\"devtools\") # add libraries from github\ndevtools::install_github(\"tuomaseerola/MusicScienceData\")", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Using R" - ] - }, - { - "objectID": "Chapter6.3.html", - "href": "Chapter6.3.html", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "This notebook demonstrates Data Diagnostics and Summaries.\n\n\nLoad or install the necessary R packages.\n\nlibrary(tidyverse,quiet=TRUE)\nlibrary(ggplot2,quiet=TRUE)\n#install.packages(\"cowplot\",quiet=TRUE)\nlibrary(cowplot,quiet=TRUE)\noptions(repr.plot.width = 7, repr.plot.height = 5)\n\n\nif (!require(devtools)) install.packages(\"devtools\",quiet=TRUE)\ndevtools::install_github(\"tuomaseerola/MusicScienceData@main\",quiet=TRUE)\nlibrary(MusicScienceData,quiet=TRUE)\n\n\n\n\n\nprint(MusicScienceData::sadness[1:4,1:7])\n\n# A tibble: 4 × 7\n subj age gender listen expert listensad ASM1\n <fct> <fct> <fct> <fct> <chr> <fct> <int>\n1 1 35 to 44 Female d MusicL Sometimes 6\n2 2 45 to 54 Female mult./d MusicL Often 2\n3 3 18 to 24 Female d NM Sometimes 6\n4 4 25 to 34 Male d Amat. Sometimes 5\n\n\n\n\n\n\nprint(MusicScienceData::priming[1:3,1:6])\n\n# A tibble: 3 × 6\n Participant Prime_V Target_V RT Correct Age\n <fct> <fct> <fct> <int> <fct> <int>\n1 1 Positive Negative 444 Correct 24\n2 1 Positive Negative 437 Correct 24\n3 1 Negative Negative 453 Correct 24\n\n\n\n\n\nFigure 6.1. A histogram showing the distribution of responses to a particular question (no. 25) in Attitudes towards Sad Music (ASM) instrument.\n\nsadness <- MusicScienceData::sadness\ng1 <- sadness %>%\n drop_na(ASM25) %>% # drop missing values\n ggplot(aes(x = ASM25))+\n geom_histogram(bins=7,fill=\"grey50\", colour='black')+\n scale_x_continuous(breaks = seq(1,7,by=1))+\n ylab('Count')+\n xlab('1 = Strongly disagree, 7 = Strongly agree')+\n theme_MusicScience()\ng1\n\n\n\n\n\n\n\n\n\n\n\nFigure 6.4. A box plot showing the distribution of responses to a particular question (no. 23) in Attitudes towards Sad Music (ASM) instrument split across gender.\n\ng5 <- sadness %>%\n drop_na(ASM25) %>% # drop missing values\n ggplot(aes(y = ASM25,fill=gender))+\n geom_boxplot()+\n scale_y_continuous(breaks = seq(1,7,by=1))+\n scale_x_discrete()+\n scale_fill_grey(start = .4,end = .8,name='Gender')+\n ylab('1 = Strongly disagree, 7 = Strongly agree')+\n theme_MusicScience()\nprint(g5)\n\n\n\n\n\n\n\n\n\n\n\nFigure 6.5. Alternative visualisations of data. A: density plot across gender, B: multiple boxplots, C: boxplot overlaid with original data, D: violin plot with mean and median overlaid.\n\noptions(repr.plot.width = 12, repr.plot.height = 10)\nd <- MusicScienceData::priming\n\ng1<-ggplot(d,aes(x=RT))+\n geom_histogram(binwidth=100,colour='grey50',fill='white')+\n ggtitle('Bin width 100')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(0,2000,by=400))+\n theme_MusicScience()\n\n\ng2<-ggplot(d,aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n ggtitle('Bin width 10')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(0,2000,by=400))+\n theme_MusicScience()\n\ng3<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n ggtitle('Bin width 10 with trimming')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(200,1500,by=200),limits = c(0,2000))+\n geom_vline(xintercept = c(200,1500),linetype='dashed')+\n theme_MusicScience()\n\n\ng4<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n geom_density(aes(y=10 * after_stat(count)),alpha=0.5,colour='black',fill=NA)+\n ggtitle('Bin width 10 density with trimming')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(200,1500,by=200))+\n theme_MusicScience()\n\nG1 <- plot_grid(g1, g2, g3, g4, nrow = 2)\nprint(G1)\n\n\n\n\n\n\n\n\n\n\n\nTable 6.1: The means of the ASM question 20 across the age.\n\nlibrary(Hmisc,quietly = TRUE)\ntable1 <- MusicScienceData::sadness %>%\n drop_na(ASM20) %>% # drop missing values\n group_by(age) %>%\n summarise(n=n(),mean_cl_normal(ASM20))\n\ncolnames(table1) <- c('Age','N','M','95% CI LL','95% CI UL')\nknitr::kable(table1,digits = 2, format='simple',\n caption = 'The means of the ASM question 20 across the age.')\n\n\nThe means of the ASM question 20 across the age.\n\n\nAge\nN\nM\n95% CI LL\n95% CI UL\n\n\n\n\n18 to 24\n355\n4.51\n4.38\n4.64\n\n\n25 to 34\n497\n4.64\n4.52\n4.76\n\n\n35 to 44\n329\n4.74\n4.60\n4.88\n\n\n45 to 54\n213\n4.75\n4.55\n4.95\n\n\n55 to 64\n136\n5.00\n4.77\n5.23\n\n\n65 to 74\n40\n4.92\n4.50\n5.35\n\n\n\n\n\n\n\n\n\nmean(MusicScienceData::sadness$ASM20, na.rm=TRUE) # Mean (ignore missing values)\n\n[1] 4.684076\n\nsd(MusicScienceData::sadness$ASM20,na.rm=TRUE)\n\n[1] 1.34759\n\n\n\n\n\nFigure 6.6. A bar graph showing the means of the responses to the question no. 20 in Attitudes towards Sad Music (ASM) instrument across gender.\n\ng6 <- sadness %>%\n drop_na(ASM20) %>% # drop missing values\n group_by(gender) %>%\n summarise(mean= mean(ASM20),ci = mean_cl_normal(ASM20)) %>%\n ggplot(aes(x = gender,y = mean,fill=gender))+\n geom_col(colour='black',show.legend = FALSE)+\n geom_errorbar(aes(ymin=ci$ymin,ymax=ci$ymax),width=0.5)+\n scale_y_continuous(breaks = seq(1,7,by=1), expand = c(0,0))+\n scale_fill_grey(start=.25,end=.75)+\n coord_cartesian(ylim = c(1, 7)) +\n ylab('Mean ± 95% CI')+\n xlab('Gender')+\n theme_MusicScience()\nprint(g6)\n\n\n\n\n\n\n\n\n\n\n\nFigure 6.7. A bar graph showing the means of the responses to the question no. 6 in Attitudes towards Sad Music (ASM) instrument across musical expertise.\n\ng1 <- MusicScienceData::sadness %>%\n drop_na(ASM1) %>% # drop missing values\n ggplot(aes(x= ASM1,color=gender))+\n geom_density(adjust=1.25)+\n scale_color_grey(name='Gender')+\n scale_x_continuous(breaks = seq(1,7,by=1))+\n ggtitle(sadness_ASM_labels[1])+\n ylab('Density')+\n theme_bw()+\n theme(legend.justification=c(1,0), legend.position=c(0.95,0.75))+\n theme(plot.title = element_text(size=11))\n\ntmp<-as_tibble(MusicScienceData::sadness)\ntmp2<-tmp[,c(3,7:10)]\ndfl <- pivot_longer(tmp2,cols = c(2:5))\n\ng2 <- dfl %>%\n drop_na(value) %>% # drop missing values\n ggplot(aes(x=name,y = value,fill=gender))+\n geom_boxplot(outlier.shape =\"\")+\n scale_y_continuous(breaks = seq(1,7,by=1))+\n scale_x_discrete()+\n scale_fill_grey(start = .75, end=.25, name=\"Gender\")+\n ggtitle('ASM items 1 to 4')+\n ylab('1 = Strongly disagree, 7 = Strongly agree')+\n xlab('Item')+\n theme_bw()+\n theme(legend.justification=c(1,0), legend.position=c(0.95,0.70))\n\ng3 <- MusicScienceData::sadness %>%\n drop_na(ASM12) %>% # drop missing values\n ggplot(aes(x=1,y = ASM12))+\n geom_boxplot(fill='gray70')+\n geom_jitter(alpha=0.13,colour='black', width = 0.33)+\n scale_y_continuous(breaks = seq(1,7,by=1))+\n scale_x_discrete()+\n ggtitle(sadness_ASM_labels[12])+\n ylab('1 = Strongly disagree, 7 = Strongly agree')+\n xlab('ASM12')+\n theme_bw()\n\ng4 <- MusicScienceData::sadness %>%\n drop_na(ASM13) %>% # drop missing values\n ggplot(aes(x=1,y = ASM13))+\n geom_violin(fill='grey70',adjust=1.2,alpha=0.50)+\n scale_y_continuous(breaks = seq(1,7,by=1))+\n scale_x_discrete()+\n stat_summary(fun = median, fun.min = median, fun.max = median,\n geom = \"crossbar\", width = 0.9)+\n stat_summary(fun = mean, fun.min = mean, fun.max = mean,\n geom = \"crossbar\", width = 0.9,colour='gray50')+\n ggtitle(sadness_ASM_labels[13])+\n annotate(\"text\",x=1.6,y=mean(MusicScienceData::sadness$ASM13,na.rm = TRUE),label='Mean',hjust=0)+\n annotate(\"text\",x=1.6,y=median(MusicScienceData::sadness$ASM13,na.rm = TRUE),label='Median',hjust=0)+\n ylab('1 = Strongly disagree, 7 = Strongly agree')+\n xlab('ASM13')+\n theme_bw()\n\nG2 <- plot_grid(g1,g2,g3,g4,labels = c(\"A\", \"B\", \"C\", \"D\"),ncol = 2, nrow = 2)\nprint(G2)\n\n\n\n\n\n\n\n\n\n\n\nFigure 6.8. A scatterplot showing the means of the ratings to 110 film soundtrack excerpts using scales tension and valence in Eerola and Vuoskoski (2011).\n\ng9 <- ggplot(soundtrack) +\n aes(x = Valence, y = Tension, colour = TARGET_EMOTION,\n label=Number,\n shape= TARGET_FRAMEWORK) +\n geom_point(size=4,alpha=0.80,show.legend=FALSE) +\n coord_fixed(ratio = 1)+\n geom_smooth(aes(shape = NULL,colour=NULL),method=\"lm\",\n formula='y ~x',se=FALSE, fullrange=TRUE,\n level=0.95, colour='grey50', # adds trendline\n linetype='dashed',show.legend = FALSE)+\n geom_text(show.legend=FALSE,color='white',size=1.7)+ # labels\n scale_colour_grey(name='Emotion',start = .6,end = 0)+\n scale_shape(name='Framework')+\n scale_x_continuous(breaks=seq(1,9,by=2),limits=c(1,9))+\n scale_y_continuous(breaks=seq(1,9,by=2),limits=c(1,9))+\n theme_MusicScience()\nprint(g9)\n\n\n\n\n\n\n\n\n\n\n\n\nEerola, T., & Peltola, H.-R. (2016). Memorable experiences with sad music - reasons, reactions and mechanisms of three types of experiences. PloS ONE, 11(6), e0157444. https://doi.org/http://dx.doi.org/10.1371/journal.pone.0157444\nEerola, T., & Vuoskoski, J. K. (2011). A comparison of the discrete and dimensional models of emotion in music. Psychology of Music, 39(1), 18–49.", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#preliminaries", - "href": "Chapter6.3.html#preliminaries", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "Load or install the necessary R packages.\n\nlibrary(tidyverse,quiet=TRUE)\nlibrary(ggplot2,quiet=TRUE)\n#install.packages(\"cowplot\",quiet=TRUE)\nlibrary(cowplot,quiet=TRUE)\noptions(repr.plot.width = 7, repr.plot.height = 5)\n\n\nif (!require(devtools)) install.packages(\"devtools\",quiet=TRUE)\ndevtools::install_github(\"tuomaseerola/MusicScienceData@main\",quiet=TRUE)\nlibrary(MusicScienceData,quiet=TRUE)", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.1", - "href": "Chapter6.3.html#code-6.1", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "print(MusicScienceData::sadness[1:4,1:7])\n\n# A tibble: 4 × 7\n subj age gender listen expert listensad ASM1\n <fct> <fct> <fct> <fct> <chr> <fct> <int>\n1 1 35 to 44 Female d MusicL Sometimes 6\n2 2 45 to 54 Female mult./d MusicL Often 2\n3 3 18 to 24 Female d NM Sometimes 6\n4 4 25 to 34 Male d Amat. Sometimes 5", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.2", - "href": "Chapter6.3.html#code-6.2", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "print(MusicScienceData::priming[1:3,1:6])\n\n# A tibble: 3 × 6\n Participant Prime_V Target_V RT Correct Age\n <fct> <fct> <fct> <int> <fct> <int>\n1 1 Positive Negative 444 Correct 24\n2 1 Positive Negative 437 Correct 24\n3 1 Negative Negative 453 Correct 24", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.3", - "href": "Chapter6.3.html#code-6.3", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "Figure 6.1. A histogram showing the distribution of responses to a particular question (no. 25) in Attitudes towards Sad Music (ASM) instrument.\n\nsadness <- MusicScienceData::sadness\ng1 <- sadness %>%\n drop_na(ASM25) %>% # drop missing values\n ggplot(aes(x = ASM25))+\n geom_histogram(bins=7,fill=\"grey50\", colour='black')+\n scale_x_continuous(breaks = seq(1,7,by=1))+\n ylab('Count')+\n xlab('1 = Strongly disagree, 7 = Strongly agree')+\n theme_MusicScience()\ng1", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.5", - "href": "Chapter6.3.html#code-6.5", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "Figure 6.4. A box plot showing the distribution of responses to a particular question (no. 23) in Attitudes towards Sad Music (ASM) instrument split across gender.\n\ng5 <- sadness %>%\n drop_na(ASM25) %>% # drop missing values\n ggplot(aes(y = ASM25,fill=gender))+\n geom_boxplot()+\n scale_y_continuous(breaks = seq(1,7,by=1))+\n scale_x_discrete()+\n scale_fill_grey(start = .4,end = .8,name='Gender')+\n ylab('1 = Strongly disagree, 7 = Strongly agree')+\n theme_MusicScience()\nprint(g5)", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.6", - "href": "Chapter6.3.html#code-6.6", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "Figure 6.5. Alternative visualisations of data. A: density plot across gender, B: multiple boxplots, C: boxplot overlaid with original data, D: violin plot with mean and median overlaid.\n\noptions(repr.plot.width = 12, repr.plot.height = 10)\nd <- MusicScienceData::priming\n\ng1<-ggplot(d,aes(x=RT))+\n geom_histogram(binwidth=100,colour='grey50',fill='white')+\n ggtitle('Bin width 100')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(0,2000,by=400))+\n theme_MusicScience()\n\n\ng2<-ggplot(d,aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n ggtitle('Bin width 10')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(0,2000,by=400))+\n theme_MusicScience()\n\ng3<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n ggtitle('Bin width 10 with trimming')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(200,1500,by=200),limits = c(0,2000))+\n geom_vline(xintercept = c(200,1500),linetype='dashed')+\n theme_MusicScience()\n\n\ng4<-ggplot(dplyr::filter(d,RT>200 & RT<1500),aes(x=RT))+\n geom_histogram(binwidth=10,colour='grey50',fill='white')+\n geom_density(aes(y=10 * after_stat(count)),alpha=0.5,colour='black',fill=NA)+\n ggtitle('Bin width 10 density with trimming')+\n ylab('Count')+\n xlab('Reaction time (ms)')+\n scale_x_continuous(breaks=seq(200,1500,by=200))+\n theme_MusicScience()\n\nG1 <- plot_grid(g1, g2, g3, g4, nrow = 2)\nprint(G1)", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.7", - "href": "Chapter6.3.html#code-6.7", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "Table 6.1: The means of the ASM question 20 across the age.\n\nlibrary(Hmisc,quietly = TRUE)\ntable1 <- MusicScienceData::sadness %>%\n drop_na(ASM20) %>% # drop missing values\n group_by(age) %>%\n summarise(n=n(),mean_cl_normal(ASM20))\n\ncolnames(table1) <- c('Age','N','M','95% CI LL','95% CI UL')\nknitr::kable(table1,digits = 2, format='simple',\n caption = 'The means of the ASM question 20 across the age.')\n\n\nThe means of the ASM question 20 across the age.\n\n\nAge\nN\nM\n95% CI LL\n95% CI UL\n\n\n\n\n18 to 24\n355\n4.51\n4.38\n4.64\n\n\n25 to 34\n497\n4.64\n4.52\n4.76\n\n\n35 to 44\n329\n4.74\n4.60\n4.88\n\n\n45 to 54\n213\n4.75\n4.55\n4.95\n\n\n55 to 64\n136\n5.00\n4.77\n5.23\n\n\n65 to 74\n40\n4.92\n4.50\n5.35", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.8", - "href": "Chapter6.3.html#code-6.8", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "mean(MusicScienceData::sadness$ASM20, na.rm=TRUE) # Mean (ignore missing values)\n\n[1] 4.684076\n\nsd(MusicScienceData::sadness$ASM20,na.rm=TRUE)\n\n[1] 1.34759", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.9", - "href": "Chapter6.3.html#code-6.9", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "Figure 6.6. A bar graph showing the means of the responses to the question no. 20 in Attitudes towards Sad Music (ASM) instrument across gender.\n\ng6 <- sadness %>%\n drop_na(ASM20) %>% # drop missing values\n group_by(gender) %>%\n summarise(mean= mean(ASM20),ci = mean_cl_normal(ASM20)) %>%\n ggplot(aes(x = gender,y = mean,fill=gender))+\n geom_col(colour='black',show.legend = FALSE)+\n geom_errorbar(aes(ymin=ci$ymin,ymax=ci$ymax),width=0.5)+\n scale_y_continuous(breaks = seq(1,7,by=1), expand = c(0,0))+\n scale_fill_grey(start=.25,end=.75)+\n coord_cartesian(ylim = c(1, 7)) +\n ylab('Mean ± 95% CI')+\n xlab('Gender')+\n theme_MusicScience()\nprint(g6)", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.10", - "href": "Chapter6.3.html#code-6.10", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "Figure 6.7. A bar graph showing the means of the responses to the question no. 6 in Attitudes towards Sad Music (ASM) instrument across musical expertise.\n\ng1 <- MusicScienceData::sadness %>%\n drop_na(ASM1) %>% # drop missing values\n ggplot(aes(x= ASM1,color=gender))+\n geom_density(adjust=1.25)+\n scale_color_grey(name='Gender')+\n scale_x_continuous(breaks = seq(1,7,by=1))+\n ggtitle(sadness_ASM_labels[1])+\n ylab('Density')+\n theme_bw()+\n theme(legend.justification=c(1,0), legend.position=c(0.95,0.75))+\n theme(plot.title = element_text(size=11))\n\ntmp<-as_tibble(MusicScienceData::sadness)\ntmp2<-tmp[,c(3,7:10)]\ndfl <- pivot_longer(tmp2,cols = c(2:5))\n\ng2 <- dfl %>%\n drop_na(value) %>% # drop missing values\n ggplot(aes(x=name,y = value,fill=gender))+\n geom_boxplot(outlier.shape =\"\")+\n scale_y_continuous(breaks = seq(1,7,by=1))+\n scale_x_discrete()+\n scale_fill_grey(start = .75, end=.25, name=\"Gender\")+\n ggtitle('ASM items 1 to 4')+\n ylab('1 = Strongly disagree, 7 = Strongly agree')+\n xlab('Item')+\n theme_bw()+\n theme(legend.justification=c(1,0), legend.position=c(0.95,0.70))\n\ng3 <- MusicScienceData::sadness %>%\n drop_na(ASM12) %>% # drop missing values\n ggplot(aes(x=1,y = ASM12))+\n geom_boxplot(fill='gray70')+\n geom_jitter(alpha=0.13,colour='black', width = 0.33)+\n scale_y_continuous(breaks = seq(1,7,by=1))+\n scale_x_discrete()+\n ggtitle(sadness_ASM_labels[12])+\n ylab('1 = Strongly disagree, 7 = Strongly agree')+\n xlab('ASM12')+\n theme_bw()\n\ng4 <- MusicScienceData::sadness %>%\n drop_na(ASM13) %>% # drop missing values\n ggplot(aes(x=1,y = ASM13))+\n geom_violin(fill='grey70',adjust=1.2,alpha=0.50)+\n scale_y_continuous(breaks = seq(1,7,by=1))+\n scale_x_discrete()+\n stat_summary(fun = median, fun.min = median, fun.max = median,\n geom = \"crossbar\", width = 0.9)+\n stat_summary(fun = mean, fun.min = mean, fun.max = mean,\n geom = \"crossbar\", width = 0.9,colour='gray50')+\n ggtitle(sadness_ASM_labels[13])+\n annotate(\"text\",x=1.6,y=mean(MusicScienceData::sadness$ASM13,na.rm = TRUE),label='Mean',hjust=0)+\n annotate(\"text\",x=1.6,y=median(MusicScienceData::sadness$ASM13,na.rm = TRUE),label='Median',hjust=0)+\n ylab('1 = Strongly disagree, 7 = Strongly agree')+\n xlab('ASM13')+\n theme_bw()\n\nG2 <- plot_grid(g1,g2,g3,g4,labels = c(\"A\", \"B\", \"C\", \"D\"),ncol = 2, nrow = 2)\nprint(G2)", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#code-6.11", - "href": "Chapter6.3.html#code-6.11", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "Figure 6.8. A scatterplot showing the means of the ratings to 110 film soundtrack excerpts using scales tension and valence in Eerola and Vuoskoski (2011).\n\ng9 <- ggplot(soundtrack) +\n aes(x = Valence, y = Tension, colour = TARGET_EMOTION,\n label=Number,\n shape= TARGET_FRAMEWORK) +\n geom_point(size=4,alpha=0.80,show.legend=FALSE) +\n coord_fixed(ratio = 1)+\n geom_smooth(aes(shape = NULL,colour=NULL),method=\"lm\",\n formula='y ~x',se=FALSE, fullrange=TRUE,\n level=0.95, colour='grey50', # adds trendline\n linetype='dashed',show.legend = FALSE)+\n geom_text(show.legend=FALSE,color='white',size=1.7)+ # labels\n scale_colour_grey(name='Emotion',start = .6,end = 0)+\n scale_shape(name='Framework')+\n scale_x_continuous(breaks=seq(1,9,by=2),limits=c(1,9))+\n scale_y_continuous(breaks=seq(1,9,by=2),limits=c(1,9))+\n theme_MusicScience()\nprint(g9)", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter6.3.html#references", - "href": "Chapter6.3.html#references", - "title": "Ch. 6 – Diagnostics", - "section": "", - "text": "Eerola, T., & Peltola, H.-R. (2016). Memorable experiences with sad music - reasons, reactions and mechanisms of three types of experiences. PloS ONE, 11(6), e0157444. https://doi.org/http://dx.doi.org/10.1371/journal.pone.0157444\nEerola, T., & Vuoskoski, J. K. (2011). A comparison of the discrete and dimensional models of emotion in music. Psychology of Music, 39(1), 18–49.", - "crumbs": [ - "Data & Statistics", - "Ch. 6 – Diagnostics" - ] - }, - { - "objectID": "Chapter7.html", - "href": "Chapter7.html", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "This notebook demonstrates running inferential statistical tests in R.\n\n\nLoad libraries and install MusicScienceData package where the example data is stored.\n\nlibrary(ggplot2,quietly = TRUE)\nlibrary(tidyverse,quietly = TRUE)\nlibrary(MusicScienceData,quiet=TRUE)\n\n\n\n\nSee text for the explanation.\n\ndf <- MusicScienceData::sadness # define data\nt <- t.test(ASM20 ~ gender, data=df) # t test\nprint(t$statistic) # show the t value\n\n t \n-5.054596 \n\nprint(scales::pvalue(t$p.value))\n\n[1] \"<0.001\"\n\ndplyr::summarise(dplyr::group_by(df, gender), # means and SDs\n M=mean(ASM20,na.rm=TRUE),\n SD=sd(ASM20,na.rm=TRUE))\n\n# A tibble: 2 × 3\n gender M SD\n <fct> <dbl> <dbl>\n1 Female 4.59 1.37\n2 Male 4.96 1.24\n\n\n\n\n\n\ndf <- MusicScienceData::sadness # define data\nmodel.aov <- aov(ASM20 ~ age, data=df) # run anova\nF <- summary(model.aov) # summarise\nprint(F)\n\n Df Sum Sq Mean Sq F value Pr(>F) \nage 5 29.9 5.986 3.321 0.00548 **\nResiduals 1564 2819.4 1.803 \n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n7 observations deleted due to missingness\n\n\n\n\n\n\nTABLE<-TukeyHSD(model.aov,conf.level = 0.95)\nprint(knitr::kable(TABLE$age,digits = 3,\n caption = 'Comparison of age groups\n for Item 20 in ASM survey.',\n format = 'simple'))\n\n\n\nTable: Comparison of age groups\n for Item 20 in ASM survey.\n\n diff lwr upr p adj\n------------------ ------- ------- ------ ------\n25 to 34-18 to 24 0.133 -0.133 0.399 0.713\n35 to 44-18 to 24 0.232 -0.062 0.525 0.214\n45 to 54-18 to 24 0.244 -0.088 0.576 0.289\n55 to 64-18 to 24 0.493 0.107 0.879 0.004\n65 to 74-18 to 24 0.418 -0.221 1.057 0.423\n35 to 44-25 to 34 0.099 -0.174 0.371 0.906\n45 to 54-25 to 34 0.111 -0.202 0.425 0.914\n55 to 64-25 to 34 0.360 -0.011 0.731 0.063\n65 to 74-25 to 34 0.285 -0.344 0.915 0.789\n45 to 54-35 to 44 0.013 -0.324 0.349 1.000\n55 to 64-35 to 44 0.261 -0.129 0.652 0.396\n65 to 74-35 to 44 0.186 -0.455 0.828 0.962\n55 to 64-45 to 54 0.249 -0.172 0.669 0.540\n65 to 74-45 to 54 0.174 -0.486 0.834 0.975\n65 to 74-55 to 64 -0.075 -0.764 0.614 1.000\n\n\n\n\n\n\ndf <- MusicScienceData::sadness # define data\nmodel2.aov <- aov(ASM20 ~ age * gender, data=df) # run anova\nF2 <- summary(model2.aov)\nprint(F2)\n\n Df Sum Sq Mean Sq F value Pr(>F) \nage 5 29.9 5.99 3.377 0.00488 ** \ngender 1 45.7 45.69 25.773 4.3e-07 ***\nage:gender 5 11.5 2.31 1.303 0.25997 \nResiduals 1558 2762.1 1.77 \n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n7 observations deleted due to missingness\n\n\n\n\n\nThis analysis requires extra libraries and raw data read from github. The installation might be slow in Colab because of package dependencies.\n\n#install.packages(\"lme4\",quiet=TRUE) # Required for LMM analysis\n#install.packages(\"lmerTest\",quiet=TRUE) # Optional\nlibrary(lme4,quiet=TRUE)\nlibrary(lmerTest,quiet=TRUE)\n\n\nlibrary(lme4,quiet=TRUE)\nlibrary(lmerTest,quiet=TRUE)\nd <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')\nd2 <- dplyr::filter(d,Emotion=='Dimensional') #\nd3 <- dplyr::filter(d2, Category=='Anger' |\n Category=='Fear' |\n Category=='Happy' |\n Category=='Sad' |\n Category=='Tender')\nm1 <- lmer(Valence ~ Category * Gender + (1|id) + (1|Track), data = d3)\ns <- summary(m1,corr=FALSE)\nS<-s$coefficients; S<-round(S,2); S[,5]<-scales::pvalue(S[,5])\nprint(knitr::kable(S,format = 'simple',\n caption = 'LMM results of Valence ratings.'))\n\n\n\nTable: LMM results of Valence ratings.\n\n Estimate Std. Error df t value Pr(>|t|) \n-------------------------- --------- ----------- ------- -------- ---------\n(Intercept) 3.43 0.25 58.17 13.51 <0.001 \nCategoryFear 0.07 0.34 47.43 0.19 0.850 \nCategoryHappy 4.16 0.34 47.43 12.24 <0.001 \nCategorySad 1.63 0.34 47.43 4.79 <0.001 \nCategoryTender 3.4 0.34 47.43 10.01 <0.001 \nGenderMale -0.09 0.21 110.04 -0.45 0.650 \nCategoryFear:GenderMale -0.07 0.19 2348 -0.34 0.730 \nCategoryHappy:GenderMale -0.04 0.19 2348 -0.22 0.820 \nCategorySad:GenderMale -0.46 0.19 2348 -2.41 0.020 \nCategoryTender:GenderMale 0 0.19 2348 0.01 0.990 \n\n\n\n\n\n\nd <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')\nS <- d %>%\n filter(Category=='Sad') %>%\n group_by(Category,Gender) %>%\n summarise(M=mean(Valence,na.rm=T),SD=sd(Valence,na.rm=T),\n .groups = 'drop')\nprint(S)\n\n# A tibble: 2 × 4\n Category Gender M SD\n <chr> <chr> <dbl> <dbl>\n1 Sad Female 5.05 1.69\n2 Sad Male 4.5 1.54\n\n\n\n\n\n\nlibrary(MusicScienceData) # loads library w data\ngender_age_xtab <- table(MusicScienceData::sadness$age,\n MusicScienceData::sadness$gender)\nprint(gender_age_xtab)\n\n \n Female Male\n 18 to 24 269 87\n 25 to 34 361 137\n 35 to 44 231 101\n 45 to 54 158 55\n 55 to 64 118 19\n 65 to 74 34 7\n\nresult <- chisq.test(gender_age_xtab) # Chi^2 test\nprint(result)\n\n\n Pearson's Chi-squared test\n\ndata: gender_age_xtab\nX-squared = 16.649, df = 5, p-value = 0.005215\n\n\n\n\n\n\nlibrary(MusicScienceData) # load library w data\ndata <- MusicScienceData::soundtrack # define data\nr<-cor.test(data$Valence, data$Tension) # calculate correlation\nprint(r$estimate) # print coefficient\n\n cor \n-0.8269947 \n\n## cor\n## -0.827\nprint(scales::pvalue(r$p.value)) # print pretty p value\n\n[1] \"<0.001\"\n\n## [1] \"<0.001\"\nprint(r$parameter) # print df\n\n df \n108 \n\n\n\n\n\n\nlibrary(MusicScienceData) # loads library w data\nd1 <- MusicScienceData::soundtrack # get ratings\nd2 <- MusicScienceData::soundtrack_features[,c(2:3,5:6)] # select only some features\nd1[,17:21] <- as.data.frame(scale(d2)) # normalise\n\ntmp <- cor(d1[,c(3,17:20)]) # get correlations\nprint(round(tmp[2:5,1],2)) # display first line\n\n RMS sp_centr spec_rolloff spec_zcr \n 0.58 0.36 0.40 0.32 \n\n\n\n\n\n\nmodel.reg <- lm(Energy ~ RMS + sp_centr + spec_rolloff +\n spec_zcr, data = d1)\ns <- summary(model.reg) # R2adj = 0.424 (Energy)\nprint(s)\n\n\nCall:\nlm(formula = Energy ~ RMS + sp_centr + spec_rolloff + spec_zcr, \n data = d1)\n\nResiduals:\n Min 1Q Median 3Q Max \n-2.4719 -1.1042 -0.2064 0.9427 3.4504 \n\nCoefficients:\n Estimate Std. Error t value Pr(>|t|) \n(Intercept) 5.4865 0.1309 41.905 < 2e-16 ***\nRMS 0.9067 0.1397 6.491 2.88e-09 ***\nsp_centr -1.9069 1.2245 -1.557 0.122 \nspec_rolloff 1.9663 0.9502 2.069 0.041 * \nspec_zcr 0.5995 0.4170 1.438 0.154 \n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nResidual standard error: 1.373 on 105 degrees of freedom\nMultiple R-squared: 0.4504, Adjusted R-squared: 0.4295 \nF-statistic: 21.52 on 4 and 105 DF, p-value: 5.528e-13\n\n\n\n\n\n\nr <- cor(d1$Energy, d1$RMS)\nprint( r^2 ) # print the squared correlation\n\n[1] 0.3378173\n\nsummary(lm(Energy ~ RMS,data=d1)) # Summarise regression\n\n\nCall:\nlm(formula = Energy ~ RMS, data = d1)\n\nResiduals:\n Min 1Q Median 3Q Max \n-2.6644 -1.1921 -0.3852 1.1875 3.3296 \n\nCoefficients:\n Estimate Std. Error t value Pr(>|t|) \n(Intercept) 5.4865 0.1417 38.717 < 2e-16 ***\nRMS 1.0567 0.1424 7.423 2.79e-11 ***\n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nResidual standard error: 1.486 on 108 degrees of freedom\nMultiple R-squared: 0.3378, Adjusted R-squared: 0.3317 \nF-statistic: 55.1 on 1 and 108 DF, p-value: 2.788e-11", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#preliminaries", - "href": "Chapter7.html#preliminaries", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "Load libraries and install MusicScienceData package where the example data is stored.\n\nlibrary(ggplot2,quietly = TRUE)\nlibrary(tidyverse,quietly = TRUE)\nlibrary(MusicScienceData,quiet=TRUE)", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.1", - "href": "Chapter7.html#code-7.1", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "See text for the explanation.\n\ndf <- MusicScienceData::sadness # define data\nt <- t.test(ASM20 ~ gender, data=df) # t test\nprint(t$statistic) # show the t value\n\n t \n-5.054596 \n\nprint(scales::pvalue(t$p.value))\n\n[1] \"<0.001\"\n\ndplyr::summarise(dplyr::group_by(df, gender), # means and SDs\n M=mean(ASM20,na.rm=TRUE),\n SD=sd(ASM20,na.rm=TRUE))\n\n# A tibble: 2 × 3\n gender M SD\n <fct> <dbl> <dbl>\n1 Female 4.59 1.37\n2 Male 4.96 1.24", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.2", - "href": "Chapter7.html#code-7.2", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "df <- MusicScienceData::sadness # define data\nmodel.aov <- aov(ASM20 ~ age, data=df) # run anova\nF <- summary(model.aov) # summarise\nprint(F)\n\n Df Sum Sq Mean Sq F value Pr(>F) \nage 5 29.9 5.986 3.321 0.00548 **\nResiduals 1564 2819.4 1.803 \n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n7 observations deleted due to missingness", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.3", - "href": "Chapter7.html#code-7.3", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "TABLE<-TukeyHSD(model.aov,conf.level = 0.95)\nprint(knitr::kable(TABLE$age,digits = 3,\n caption = 'Comparison of age groups\n for Item 20 in ASM survey.',\n format = 'simple'))\n\n\n\nTable: Comparison of age groups\n for Item 20 in ASM survey.\n\n diff lwr upr p adj\n------------------ ------- ------- ------ ------\n25 to 34-18 to 24 0.133 -0.133 0.399 0.713\n35 to 44-18 to 24 0.232 -0.062 0.525 0.214\n45 to 54-18 to 24 0.244 -0.088 0.576 0.289\n55 to 64-18 to 24 0.493 0.107 0.879 0.004\n65 to 74-18 to 24 0.418 -0.221 1.057 0.423\n35 to 44-25 to 34 0.099 -0.174 0.371 0.906\n45 to 54-25 to 34 0.111 -0.202 0.425 0.914\n55 to 64-25 to 34 0.360 -0.011 0.731 0.063\n65 to 74-25 to 34 0.285 -0.344 0.915 0.789\n45 to 54-35 to 44 0.013 -0.324 0.349 1.000\n55 to 64-35 to 44 0.261 -0.129 0.652 0.396\n65 to 74-35 to 44 0.186 -0.455 0.828 0.962\n55 to 64-45 to 54 0.249 -0.172 0.669 0.540\n65 to 74-45 to 54 0.174 -0.486 0.834 0.975\n65 to 74-55 to 64 -0.075 -0.764 0.614 1.000", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.4", - "href": "Chapter7.html#code-7.4", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "df <- MusicScienceData::sadness # define data\nmodel2.aov <- aov(ASM20 ~ age * gender, data=df) # run anova\nF2 <- summary(model2.aov)\nprint(F2)\n\n Df Sum Sq Mean Sq F value Pr(>F) \nage 5 29.9 5.99 3.377 0.00488 ** \ngender 1 45.7 45.69 25.773 4.3e-07 ***\nage:gender 5 11.5 2.31 1.303 0.25997 \nResiduals 1558 2762.1 1.77 \n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n7 observations deleted due to missingness", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.5", - "href": "Chapter7.html#code-7.5", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "This analysis requires extra libraries and raw data read from github. The installation might be slow in Colab because of package dependencies.\n\n#install.packages(\"lme4\",quiet=TRUE) # Required for LMM analysis\n#install.packages(\"lmerTest\",quiet=TRUE) # Optional\nlibrary(lme4,quiet=TRUE)\nlibrary(lmerTest,quiet=TRUE)\n\n\nlibrary(lme4,quiet=TRUE)\nlibrary(lmerTest,quiet=TRUE)\nd <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')\nd2 <- dplyr::filter(d,Emotion=='Dimensional') #\nd3 <- dplyr::filter(d2, Category=='Anger' |\n Category=='Fear' |\n Category=='Happy' |\n Category=='Sad' |\n Category=='Tender')\nm1 <- lmer(Valence ~ Category * Gender + (1|id) + (1|Track), data = d3)\ns <- summary(m1,corr=FALSE)\nS<-s$coefficients; S<-round(S,2); S[,5]<-scales::pvalue(S[,5])\nprint(knitr::kable(S,format = 'simple',\n caption = 'LMM results of Valence ratings.'))\n\n\n\nTable: LMM results of Valence ratings.\n\n Estimate Std. Error df t value Pr(>|t|) \n-------------------------- --------- ----------- ------- -------- ---------\n(Intercept) 3.43 0.25 58.17 13.51 <0.001 \nCategoryFear 0.07 0.34 47.43 0.19 0.850 \nCategoryHappy 4.16 0.34 47.43 12.24 <0.001 \nCategorySad 1.63 0.34 47.43 4.79 <0.001 \nCategoryTender 3.4 0.34 47.43 10.01 <0.001 \nGenderMale -0.09 0.21 110.04 -0.45 0.650 \nCategoryFear:GenderMale -0.07 0.19 2348 -0.34 0.730 \nCategoryHappy:GenderMale -0.04 0.19 2348 -0.22 0.820 \nCategorySad:GenderMale -0.46 0.19 2348 -2.41 0.020 \nCategoryTender:GenderMale 0 0.19 2348 0.01 0.990", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.6", - "href": "Chapter7.html#code-7.6", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "d <- read.csv('https://raw.githubusercontent.com/tuomaseerola/emr/main/data/raw_ratings.csv')\nS <- d %>%\n filter(Category=='Sad') %>%\n group_by(Category,Gender) %>%\n summarise(M=mean(Valence,na.rm=T),SD=sd(Valence,na.rm=T),\n .groups = 'drop')\nprint(S)\n\n# A tibble: 2 × 4\n Category Gender M SD\n <chr> <chr> <dbl> <dbl>\n1 Sad Female 5.05 1.69\n2 Sad Male 4.5 1.54", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.7", - "href": "Chapter7.html#code-7.7", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "library(MusicScienceData) # loads library w data\ngender_age_xtab <- table(MusicScienceData::sadness$age,\n MusicScienceData::sadness$gender)\nprint(gender_age_xtab)\n\n \n Female Male\n 18 to 24 269 87\n 25 to 34 361 137\n 35 to 44 231 101\n 45 to 54 158 55\n 55 to 64 118 19\n 65 to 74 34 7\n\nresult <- chisq.test(gender_age_xtab) # Chi^2 test\nprint(result)\n\n\n Pearson's Chi-squared test\n\ndata: gender_age_xtab\nX-squared = 16.649, df = 5, p-value = 0.005215", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.8", - "href": "Chapter7.html#code-7.8", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "library(MusicScienceData) # load library w data\ndata <- MusicScienceData::soundtrack # define data\nr<-cor.test(data$Valence, data$Tension) # calculate correlation\nprint(r$estimate) # print coefficient\n\n cor \n-0.8269947 \n\n## cor\n## -0.827\nprint(scales::pvalue(r$p.value)) # print pretty p value\n\n[1] \"<0.001\"\n\n## [1] \"<0.001\"\nprint(r$parameter) # print df\n\n df \n108", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.9", - "href": "Chapter7.html#code-7.9", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "library(MusicScienceData) # loads library w data\nd1 <- MusicScienceData::soundtrack # get ratings\nd2 <- MusicScienceData::soundtrack_features[,c(2:3,5:6)] # select only some features\nd1[,17:21] <- as.data.frame(scale(d2)) # normalise\n\ntmp <- cor(d1[,c(3,17:20)]) # get correlations\nprint(round(tmp[2:5,1],2)) # display first line\n\n RMS sp_centr spec_rolloff spec_zcr \n 0.58 0.36 0.40 0.32", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.10", - "href": "Chapter7.html#code-7.10", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "model.reg <- lm(Energy ~ RMS + sp_centr + spec_rolloff +\n spec_zcr, data = d1)\ns <- summary(model.reg) # R2adj = 0.424 (Energy)\nprint(s)\n\n\nCall:\nlm(formula = Energy ~ RMS + sp_centr + spec_rolloff + spec_zcr, \n data = d1)\n\nResiduals:\n Min 1Q Median 3Q Max \n-2.4719 -1.1042 -0.2064 0.9427 3.4504 \n\nCoefficients:\n Estimate Std. Error t value Pr(>|t|) \n(Intercept) 5.4865 0.1309 41.905 < 2e-16 ***\nRMS 0.9067 0.1397 6.491 2.88e-09 ***\nsp_centr -1.9069 1.2245 -1.557 0.122 \nspec_rolloff 1.9663 0.9502 2.069 0.041 * \nspec_zcr 0.5995 0.4170 1.438 0.154 \n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nResidual standard error: 1.373 on 105 degrees of freedom\nMultiple R-squared: 0.4504, Adjusted R-squared: 0.4295 \nF-statistic: 21.52 on 4 and 105 DF, p-value: 5.528e-13", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter7.html#code-7.11", - "href": "Chapter7.html#code-7.11", - "title": "Ch. 7 – Inferential statistics", - "section": "", - "text": "r <- cor(d1$Energy, d1$RMS)\nprint( r^2 ) # print the squared correlation\n\n[1] 0.3378173\n\nsummary(lm(Energy ~ RMS,data=d1)) # Summarise regression\n\n\nCall:\nlm(formula = Energy ~ RMS, data = d1)\n\nResiduals:\n Min 1Q Median 3Q Max \n-2.6644 -1.1921 -0.3852 1.1875 3.3296 \n\nCoefficients:\n Estimate Std. Error t value Pr(>|t|) \n(Intercept) 5.4865 0.1417 38.717 < 2e-16 ***\nRMS 1.0567 0.1424 7.423 2.79e-11 ***\n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\nResidual standard error: 1.486 on 108 degrees of freedom\nMultiple R-squared: 0.3378, Adjusted R-squared: 0.3317 \nF-statistic: 55.1 on 1 and 108 DF, p-value: 2.788e-11", - "crumbs": [ - "Data & Statistics", - "Ch. 7 – Inferential statistics" - ] - }, - { - "objectID": "Chapter9.2.html", - "href": "Chapter9.2.html", - "title": "Ch. 9 - Event counts", - "section": "", - "text": "Install music21 and other elements needed to run this in Colab environment. Press play and wait for all commands to be executed - this initial command might take some time as it needs to build the musi21 environment.", - "crumbs": [ - "Score analysis", - "Ch. 9 - Event counts" - ] - }, - { - "objectID": "Chapter9.2.html#event-counts", - "href": "Chapter9.2.html#event-counts", - "title": "Ch. 9 - Event counts", - "section": "Event counts", - "text": "Event counts\n\nfrom music21 import * # activate library\nimport pandas as pd\nimport seaborn as sns\nimport matplotlib.pyplot as plt\n\n\nfrom music21 import * # activate library\nimport pandas as pd\nimport seaborn as sns\nimport matplotlib.pyplot as plt\n# Define pieces\n# These two piece are related, same piece, different harmonisation\nbwv110_7 = corpus.parse('bach/bwv110.7.xml') # bwv110.7\nbwv40_3 = corpus.parse('bach/bwv40.3.xml')\n\n\nExtract key and trasponse to common tonic\n\nk = bwv110_7.analyze('key')\nprint(k)\ni = interval.Interval(k.tonic, pitch.Pitch('C'))\nprint(i)\nbwv110_7 = bwv110_7.transpose(i)\n\nk = bwv40_3.analyze('key')\nprint(k)\ni = interval.Interval(k.tonic, pitch.Pitch('C'))\nprint(i)\nbwv40_3 = bwv40_3.transpose(i)\n\nprint('====== Transposed')\nt = bwv110_7.analyze('key')\nprint(t)\nt = bwv40_3.analyze('key')\nprint(t)\n\nb minor\n<music21.interval.Interval M-7>\ng minor\n<music21.interval.Interval P-5>\n====== Transposed\nc minor\nc minor\n\n\n\n\nCalculate pitch-class distribution\n\n# pitch-class\npcCount = analysis.pitchAnalysis.pitchAttributeCount(bwv110_7, 'pitchClass')\npc = range(0, 12)\npitchclass = ('C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B')\nl_pcCount = [[i, pcCount[i]]for i in pc]\nd = pd.DataFrame(data=l_pcCount, columns=['pc_nro', 'count'])\nd['Percentage'] = d['count'] / sum(d['count'])\nd[\"Pitch-Class\"] = pitchclass\nd[\"Piece\"] = 'BWV 110/7'\n\npcCount2 = analysis.pitchAnalysis.pitchAttributeCount(bwv40_3, 'pitchClass')\nl_pcCount2 = [[i, pcCount2[i]]for i in pc]\nd2 = pd.DataFrame(data=l_pcCount2, columns=['pc_nro', 'count'])\nd2['Percentage'] = d2['count'] / sum(d2['count'])\nd2[\"Pitch-Class\"] = pitchclass\nd2[\"Piece\"] = 'BWV 40/3'\n\nPC = pd.concat([d, d2])\n\n\n\nCalculate interval distribution\n\n# intervals\n#| echo: true\n#| eval: true\ndf=[]\ndf2=[]\ncounts = dict() # add empty dictionary\nfor x in range(-12, 13):\n counts[x] = 0\n\nfor part in bwv110_7.recurse().parts:\n p = part.recurse(classFilter=('Note', 'Rest')) # this is ok but loses rests\n intervalStream1 = p.melodicIntervals(skipOctaves=True,skipRests=True)\n items = []\n for i in intervalStream1.recurse():\n items.append(i.semitones)\n for j in items:\n counts[j] = counts.get(j, 0) + 1\n\ndf = pd.DataFrame({'Interval': list(counts.keys()),\n 'Counts': list(counts.values())})\ndf['Percentage'] = df['Counts'] / sum(df['Counts'])\ndf[\"Piece\"] = 'BWV 110/7'\n\nfor part in bwv40_3.recurse().parts:\n p = part.recurse(classFilter=('Note', 'Rest')) # this is ok but loses rests\n intervalStream1 = p.melodicIntervals()\n items = []\n for i in intervalStream1.recurse():\n items.append(i.semitones)\n for j in items:\n counts[j] = counts.get(j, 0) + 1\n\ndf2 = pd.DataFrame({'Interval': list(counts.keys()),\n 'Counts': list(counts.values())})\ndf2['Percentage'] = df2['Counts'] / sum(df2['Counts'])\ndf2[\"Piece\"] = 'BWV 40/3'\n\nIV = pd.concat([df, df2])\n\n/var/folders/b0/vtr2rd_96119zlr64t5hvlgr0000gp/T/ipykernel_65284/4086787063.py:12: StreamIteratorInefficientWarning: melodicIntervals is not defined on StreamIterators. Call .stream() first for efficiency\n intervalStream1 = p.melodicIntervals(skipOctaves=True,skipRests=True)\n/var/folders/b0/vtr2rd_96119zlr64t5hvlgr0000gp/T/ipykernel_65284/4086787063.py:26: StreamIteratorInefficientWarning: melodicIntervals is not defined on StreamIterators. Call .stream() first for efficiency\n intervalStream1 = p.melodicIntervals()\n\n\n\n\nCalculate duration distribution\n\n# durations\n\npart = bwv110_7.recurse().parts\np = part.recurse()\ndurCount = analysis.elements.attributeCount(p, 'quarterLength')\ndu = pd.DataFrame({'Duration': list(durCount.keys()),\n 'Counts': list(durCount.values())})\ndu['Percentage'] = du['Counts'] / sum(du['Counts'])\n\nfilter = (du['Duration'] < 10)\ndu = du[filter]\nfilter = (du['Duration'] >= 0.25)\ndu = du[filter]\ndu[\"Piece\"] = 'BWV 110/7'\n\npart = bwv40_3.recurse().parts\np = part.recurse()\ndurCount = analysis.elements.attributeCount(p, 'quarterLength')\ndu2 = pd.DataFrame({'Duration': list(durCount.keys()),\n 'Counts': list(durCount.values())})\ndu2['Percentage'] = du2['Counts'] / sum(du2['Counts'])\n\nfilter = (du2['Duration'] < 10)\ndu2 = du2[filter]\nfilter = (du2['Duration'] >= 0.25)\ndu2 = du2[filter]\ndu2[\"Piece\"] = 'BWV 40/3'\n\nDU = pd.concat([du, du2])\n\n/var/folders/b0/vtr2rd_96119zlr64t5hvlgr0000gp/T/ipykernel_65284/1590200641.py:4: StreamIteratorInefficientWarning: recurse is not defined on StreamIterators. Call .stream() first for efficiency\n p = part.recurse()\n/var/folders/b0/vtr2rd_96119zlr64t5hvlgr0000gp/T/ipykernel_65284/1590200641.py:17: StreamIteratorInefficientWarning: recurse is not defined on StreamIterators. Call .stream() first for efficiency\n p = part.recurse()", - "crumbs": [ - "Score analysis", - "Ch. 9 - Event counts" - ] - }, - { - "objectID": "Chapter9.2.html#create-plots", - "href": "Chapter9.2.html#create-plots", - "title": "Ch. 9 - Event counts", - "section": "Create plots", - "text": "Create plots\n\n## Set graphic params\nsns.set_theme()\nsns.set_style(\"whitegrid\")\ncolors = [\"#b8b6b6\", \"#636362\"]\ncustomPalette = sns.set_palette(sns.color_palette(colors))\nsns.set_palette(customPalette)\n\nplt.rcParams[\"figure.figsize\"] = [7.6, 10.0]\nplt.rcParams[\"figure.autolayout\"] = True\n\nf, axes = plt.subplots(3, 1)\ng = sns.barplot(x='Pitch-Class', y='Percentage', data=PC,\n orient='v', ax=axes[0], hue='Piece')\ng.legend_.remove()\naxes[0].text(11, 0.18, \"$\\chi^2=7.2, p=0.70$\", horizontalalignment='right', size='x-small', color='black')\n\nbar_plot = sns.barplot(x='Interval', y='Percentage',\n data=IV, orient='v', ax=axes[1], hue='Piece')\nfor index, label in enumerate(bar_plot.get_xticklabels()):\n if index % 2 == 1:\n label.set_visible(True)\n else:\n label.set_visible(False)\n\naxes[1].text(25, 0.12, \"$\\chi^2=17.2, p=0.37$\", horizontalalignment='right', size='x-small', color='black')\n\nh = sns.barplot(x='Duration', y='Percentage', data=DU,\n orient='v', ax=axes[2], hue='Piece')\n\naxes[2].text(5.25, 0.45, \"$\\chi^2=3.9, p=0.55$\", horizontalalignment='right', size = 'x-small', color='black')\n\nh.legend_.remove()\nplt.show()", - "crumbs": [ - "Score analysis", - "Ch. 9 - Event counts" - ] - }, - { - "objectID": "Chapter9.2.html#statistics", - "href": "Chapter9.2.html#statistics", - "title": "Ch. 9 - Event counts", - "section": "Statistics", - "text": "Statistics\n\nPitch-class\nfrom scipy import stats\nimport numpy as np\n\nPC2 = PC.pivot(index='pc_nro', columns='Piece', values='count')\nPC2['Sum'] = PC2.sum(axis=1)\nPC3 = PC2[PC2.Sum != 0]\nobs2 = np.array([PC3['BWV 110/7'], PC3['BWV 40/3']])\nc, p, dof, exp = stats.chi2_contingency(obs2)\nprint(f\"_Chi_$^2$ value = {round(c,2)}, _p_-value = {round(p,3)}, _df_ = {dof}\")\nChi\\(^2\\) value = 7.25, p-value = 0.702, df = 10\n\n\nIntervals\nIV2 = IV.pivot(index='Interval', columns='Piece', values='Counts')\nIV2['Sum'] = IV2.sum(axis=1)\nIV3 = IV2[IV2.Sum != 0]\nobs2 = np.array([IV3['BWV 110/7'], IV3['BWV 40/3']])\nc, p, dof, exp = stats.chi2_contingency(obs2)\nprint(f\"_Chi_$^2$ value = {round(c,2)}, _p_-value = {round(p,3)}, _df_ = {dof}\")\nChi\\(^2\\) value = 17.2, p-value = 0.373, df = 16\n\n\nDurations\nDU2 = DU.pivot(index='Duration', columns='Piece', values='Counts')\nDU2 = DU2.replace(np.nan,0)\nDU2['Sum'] = DU2.sum(axis=1)\nDU3 = DU2[DU2.Sum != 0]\n\nobs2 = np.array([DU3['BWV 110/7'], DU3['BWV 40/3']])\nc, p, dof, exp = stats.chi2_contingency(obs2)\nprint(f\"_Chi_$^2$ value = {round(c,2)}, _p_-value = {round(p,3)}, _df_ = {dof}\")\nChi\\(^2\\) value = 3.94, p-value = 0.558, df = 5", - "crumbs": [ - "Score analysis", - "Ch. 9 - Event counts" - ] - }, - { - "objectID": "Chapter9.4.html", - "href": "Chapter9.4.html", - "title": "Ch. 9 - Expressive Timing", - "section": "", - "text": "This notebook demonstrates expressive timing profiles from real performances from https://github.com/fosfrancesco/asap-dataset.\n\n\n\nlibrary(ggplot2, quietly = TRUE)\nlibrary(tidyverse, quietly = TRUE)\nlibrary(dplyr, quietly = TRUE)\n\n\n\n\nThis gets the metadata from ASAP project (see Foscarin et al., 2020) and selects Preludes op 23.4.\nd <- read.csv(\"https://raw.githubusercontent.com/fosfrancesco/asap-dataset/master/metadata.csv\",header = TRUE,sep = ',')\ndf<-dplyr::filter(d,title=='Preludes_op_23_4')\ndf<-df[1:3,]\nprint(knitr::kable(head(df[,1:3],3)))\n\n\n\ncomposer\ntitle\nfolder\n\n\n\n\nRachmaninoff\nPreludes_op_23_4\nRachmaninoff/Preludes_op_23/4\n\n\nRachmaninoff\nPreludes_op_23_4\nRachmaninoff/Preludes_op_23/4\n\n\nRachmaninoff\nPreludes_op_23_4\nRachmaninoff/Preludes_op_23/4\n\n\n\n\n\n\nbasedir <-'https://raw.githubusercontent.com/tuomaseerola/emr/master/'\ndeadpan <- read.csv(paste0(basedir,'data/midi_score_annotations.txt'),header = FALSE, sep = '\\t')\nprint(knitr::kable(head(deadpan,3)))\n\n\n\nV1\nV2\nV3\n\n\n\n\n0.0\n0.0\ndb,3/4,2\n\n\n1.2\n1.2\nb\n\n\n2.4\n2.4\nb\n\n\n\nfn <- NULL\nfn[1]<-'data/ChenGuang12M_annotations.txt'\nfn[2]<-'data/MorozovS09_annotations.txt'\nfn[3]<-'data/WuuE07M_annotations.txt'\nPerformer <- c('Chen Guang','Yevgeny Morozov','Elliot Wuu')\n\n\n\nD <- NULL\nfor (k in 1:length(fn)) {\n perf<-read.csv(paste0(basedir,fn[k]),header=F,sep='\\t')\n DF<-data.frame(score=deadpan$V1,perf=perf$V1,\n annotation=deadpan$V3)\n DF <- dplyr::filter(DF,score < 30) # Limit to first 10 bars = 3*10 beats\n DF2 <- normperf(DF) # Defined previouslys\n DF2$Performer<-Performer[k]\n D<-rbind(D,DF2)\n}\n\noptions(encoding = \"UTF-8\")\n#library(dplyr)\nDF <- dplyr::filter(D,score < 30) # First 10 bars = 3*10 beats\nprint(knitr::kable(head(DF[,1:6],3)))\n\n\n\nscore\nperf\nannotation\nperf_N\ndelta\ndelta2\n\n\n\n\n0.0\n0.000000\ndb,3/4,2\n0.000000\n0.0000000\n0.00000\n\n\n1.2\n1.916667\nb\n1.935339\n0.7353393\n735.33933\n\n\n2.4\n3.009115\nb\n3.038430\n0.6384300\n-96.90928\n\n\n\n\n\n\n\noptions(repr.plot.width = 12, repr.plot.height = 5)\ng1 <- ggplot(DF,aes(x=perf_N,y=scoredelta_rawperf_Ndelta,colour=Performer,shape=Performer))+\n geom_line(alpha=0.85)+\n geom_point(alpha=0.85,size=2.5)+\n scale_color_grey(start = 0.30,end = 0.8)+\n geom_smooth(aes(colour = NULL,shape=NULL), method = \"loess\", span=0.2,se=FALSE,colour='black',linewidth=1.25)+\n scale_x_continuous(limits=c(0,30),breaks = seq(0,30,by=3),expand = c(0.02,0.002),labels=(seq(0,30,by=3)/3)+1) +\n xlab('Bar')+\n ylab('Deviation in ms')+\n theme_bw()+\n theme(legend.position=c(.85, .80))+\n theme(legend.background = element_blank()) + # Remove overall border\n theme(legend.key = element_blank())\nprint(g1)\n\n\n\n\n\n\n\n\n\n\n\n\nFoscarin, F., Mcleod, A., Rigaux, P., Jacquemard, F., & Sakai, M. (2020). ASAP: a dataset of aligned scores and performances for piano transcription. In International Society for Music Information Retrieval Conference (pp. 534-541).", - "crumbs": [ - "Score analysis", - "Ch. 9 - Expressive Timing" - ] - }, - { - "objectID": "Chapter9.4.html#load-libraries", - "href": "Chapter9.4.html#load-libraries", - "title": "Ch. 9 - Expressive Timing", - "section": "", - "text": "library(ggplot2, quietly = TRUE)\nlibrary(tidyverse, quietly = TRUE)\nlibrary(dplyr, quietly = TRUE)", - "crumbs": [ - "Score analysis", - "Ch. 9 - Expressive Timing" - ] - }, - { - "objectID": "Chapter9.4.html#get-data-from-asap-project", - "href": "Chapter9.4.html#get-data-from-asap-project", - "title": "Ch. 9 - Expressive Timing", - "section": "", - "text": "This gets the metadata from ASAP project (see Foscarin et al., 2020) and selects Preludes op 23.4.\nd <- read.csv(\"https://raw.githubusercontent.com/fosfrancesco/asap-dataset/master/metadata.csv\",header = TRUE,sep = ',')\ndf<-dplyr::filter(d,title=='Preludes_op_23_4')\ndf<-df[1:3,]\nprint(knitr::kable(head(df[,1:3],3)))\n\n\n\ncomposer\ntitle\nfolder\n\n\n\n\nRachmaninoff\nPreludes_op_23_4\nRachmaninoff/Preludes_op_23/4\n\n\nRachmaninoff\nPreludes_op_23_4\nRachmaninoff/Preludes_op_23/4\n\n\nRachmaninoff\nPreludes_op_23_4\nRachmaninoff/Preludes_op_23/4", - "crumbs": [ - "Score analysis", - "Ch. 9 - Expressive Timing" - ] - }, - { - "objectID": "Chapter9.4.html#read-score-annotations", - "href": "Chapter9.4.html#read-score-annotations", - "title": "Ch. 9 - Expressive Timing", - "section": "", - "text": "basedir <-'https://raw.githubusercontent.com/tuomaseerola/emr/master/'\ndeadpan <- read.csv(paste0(basedir,'data/midi_score_annotations.txt'),header = FALSE, sep = '\\t')\nprint(knitr::kable(head(deadpan,3)))\n\n\n\nV1\nV2\nV3\n\n\n\n\n0.0\n0.0\ndb,3/4,2\n\n\n1.2\n1.2\nb\n\n\n2.4\n2.4\nb\n\n\n\nfn <- NULL\nfn[1]<-'data/ChenGuang12M_annotations.txt'\nfn[2]<-'data/MorozovS09_annotations.txt'\nfn[3]<-'data/WuuE07M_annotations.txt'\nPerformer <- c('Chen Guang','Yevgeny Morozov','Elliot Wuu')", - "crumbs": [ - "Score analysis", - "Ch. 9 - Expressive Timing" - ] - }, - { - "objectID": "Chapter9.4.html#choose-extract-from-all-performers", - "href": "Chapter9.4.html#choose-extract-from-all-performers", - "title": "Ch. 9 - Expressive Timing", - "section": "", - "text": "D <- NULL\nfor (k in 1:length(fn)) {\n perf<-read.csv(paste0(basedir,fn[k]),header=F,sep='\\t')\n DF<-data.frame(score=deadpan$V1,perf=perf$V1,\n annotation=deadpan$V3)\n DF <- dplyr::filter(DF,score < 30) # Limit to first 10 bars = 3*10 beats\n DF2 <- normperf(DF) # Defined previouslys\n DF2$Performer<-Performer[k]\n D<-rbind(D,DF2)\n}\n\noptions(encoding = \"UTF-8\")\n#library(dplyr)\nDF <- dplyr::filter(D,score < 30) # First 10 bars = 3*10 beats\nprint(knitr::kable(head(DF[,1:6],3)))\n\n\n\nscore\nperf\nannotation\nperf_N\ndelta\ndelta2\n\n\n\n\n0.0\n0.000000\ndb,3/4,2\n0.000000\n0.0000000\n0.00000\n\n\n1.2\n1.916667\nb\n1.935339\n0.7353393\n735.33933\n\n\n2.4\n3.009115\nb\n3.038430\n0.6384300\n-96.90928", - "crumbs": [ - "Score analysis", - "Ch. 9 - Expressive Timing" - ] - }, - { - "objectID": "Chapter9.4.html#plot-expressive-timing-deviations", - "href": "Chapter9.4.html#plot-expressive-timing-deviations", - "title": "Ch. 9 - Expressive Timing", - "section": "", - "text": "options(repr.plot.width = 12, repr.plot.height = 5)\ng1 <- ggplot(DF,aes(x=perf_N,y=scoredelta_rawperf_Ndelta,colour=Performer,shape=Performer))+\n geom_line(alpha=0.85)+\n geom_point(alpha=0.85,size=2.5)+\n scale_color_grey(start = 0.30,end = 0.8)+\n geom_smooth(aes(colour = NULL,shape=NULL), method = \"loess\", span=0.2,se=FALSE,colour='black',linewidth=1.25)+\n scale_x_continuous(limits=c(0,30),breaks = seq(0,30,by=3),expand = c(0.02,0.002),labels=(seq(0,30,by=3)/3)+1) +\n xlab('Bar')+\n ylab('Deviation in ms')+\n theme_bw()+\n theme(legend.position=c(.85, .80))+\n theme(legend.background = element_blank()) + # Remove overall border\n theme(legend.key = element_blank())\nprint(g1)", - "crumbs": [ - "Score analysis", - "Ch. 9 - Expressive Timing" - ] - }, - { - "objectID": "Chapter9.4.html#references", - "href": "Chapter9.4.html#references", - "title": "Ch. 9 - Expressive Timing", - "section": "", - "text": "Foscarin, F., Mcleod, A., Rigaux, P., Jacquemard, F., & Sakai, M. (2020). ASAP: a dataset of aligned scores and performances for piano transcription. In International Society for Music Information Retrieval Conference (pp. 534-541).", - "crumbs": [ - "Score analysis", - "Ch. 9 - Expressive Timing" - ] - }, - { - "objectID": "Chapter10.1.html", - "href": "Chapter10.1.html", - "title": "Ch. 10 – Basics (sines)", - "section": "", - "text": "Show the code\nimport numpy as np\nfrom matplotlib import pyplot as plt", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Basics (sines)" - ] - }, - { - "objectID": "Chapter10.1.html#figure-10.1.-illustration-of-basic-representations-and-transformations-of-audio-using-a-400-hz-sine-wave-and-complex-tone-consisting-of-400-600-and-1600-hz-sine-waves.", - "href": "Chapter10.1.html#figure-10.1.-illustration-of-basic-representations-and-transformations-of-audio-using-a-400-hz-sine-wave-and-complex-tone-consisting-of-400-600-and-1600-hz-sine-waves.", - "title": "Ch. 10 – Basics (sines)", - "section": "", - "text": "Show the code\nimport numpy as np\nfrom matplotlib import pyplot as plt", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Basics (sines)" - ] - }, - { - "objectID": "Chapter10.1.html#create-sine-waves", - "href": "Chapter10.1.html#create-sine-waves", - "title": "Ch. 10 – Basics (sines)", - "section": "Create sine waves", - "text": "Create sine waves\n\n### Define the properties of a sine wave\n\nfrequency = 400 # Frequency\nduration = 0.01 # Duration of sound\namplitude = 1.0 # Amplitude\nphase = 0.75 # Phase\nFs = 22050 # Sampling rate (per second)\n\n# This code creates the sine wave with the properties you detailed above\nnum_samples = int(Fs * duration) \nt = np.arange(num_samples) / Fs\nx = amplitude * np.sin(2 * np.pi * (frequency * t - phase))\nfig, ax = plt.subplots(figsize=(7.5, 2.75))\nax.plot(t, x, color='red')\nax.set_xlabel('Time (s)')\nax.set_title(\"Sine (400 Hz)\")\nax.set_ylabel('Air pressure deviation')\nax.set_ylim([-1.05, 1.05])\nax.set_yticks(np.arange(-1, 1.5, 1.0)) \nax.set_xlim([0.0, 0.01])\nax.set_xticks(np.arange(0, 0.0125, 0.0025)) \nax.grid()\nax.annotate('', xy=(0.0025, 0), xytext=(0.0025, 1), \n arrowprops=dict(arrowstyle='<->', mutation_scale=15, \n color='0.3'), size=2)\nax.text(0.0025, 0.5, \"Amplitude\", size=12, \n color='0.3', ha=\"center\", va=\"center\")\nax.annotate('', xy=(0, 1), xytext=(0.0025, 1), \n arrowprops=dict(arrowstyle='<->', mutation_scale=19, \n color='0.3'), size=2)\nax.text(0.00125, 0.85, \"Period\", size=12, \n color='0.3', ha=\"center\", va=\"center\")\n\nplt.show()", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Basics (sines)" - ] - }, - { - "objectID": "Chapter10.1.html#complex-sounds", - "href": "Chapter10.1.html#complex-sounds", - "title": "Ch. 10 – Basics (sines)", - "section": "Complex sounds", - "text": "Complex sounds\nLet’s combine sine waves of different frequency (400, 600, 1600 Hz).\n\n\nShow the code\nimport numpy as np\nfrom matplotlib import pyplot as plt \n\nfig = plt.figure()\nfig.set_figheight(6)\nfig.set_figwidth(10)\n\nax1 = plt.subplot2grid(shape=(6, 3), loc=(0, 1), colspan=2, rowspan=3)\nax3 = plt.subplot2grid(shape=(6, 3), loc=(3, 1), colspan=2, rowspan=3)\nax2 = plt.subplot2grid(shape=(6, 3), loc=(3, 0), colspan=1)\nax4 = plt.subplot2grid(shape=(6, 3), loc=(4, 0), colspan=1)\nax5 = plt.subplot2grid(shape=(6, 3), loc=(5, 0), colspan=1)\n\nfrequency = 400 # Frequency\nduration = 0.01 # Duration of sound\namplitude = 1.0 # Amplitude\nphase = 0.75 # Phase\nFs = 22050 # Sampling rate (per second)\n\nnum_samples = int(Fs * duration) \nt = np.arange(num_samples) / Fs\nx = amplitude * np.sin(2 * np.pi * (frequency * t - phase))\n\nax1.plot(t, x, color='red', linewidth=2.0, linestyle='-')\nax1.set_xlabel('Time (s)')\nax1.set_title(\"Sine (400 Hz)\")\nax1.set_ylabel('Air pressure deviation')\nax1.set_ylim([-1.05, 1.05])\nax1.set_yticks(np.arange(-1, 1.5, 1.0)) \nax1.set_xlim([0.0, 0.01])\nax1.set_xticks(np.arange(0, 0.0125, 0.0025)) \nax1.grid()\n\nax1.annotate('', xy=(0.0025, 0), xytext=(0.0025, 1), \n arrowprops=dict(arrowstyle='<->', \n mutation_scale=15, color='0.3'), size=2)\nax1.text(0.0025, 0.5, \"Amplitude\", size=12, color='0.3', \n ha=\"center\", va=\"center\")\nax1.annotate('', xy=(0, 1), xytext=(0.0025, 1), \n arrowprops=dict(arrowstyle='<->', mutation_scale=19, \n color='0.3'), size=2)\nax1.text(0.00125, 0.85, \"Period\", size=12, \n color='0.3', ha=\"center\", va=\"center\")\n\n# Combine several sine waves (here are three frequencies)\nfrequency1 = 400 \nfrequency2 = 600\nfrequency3 = 1600\nduration = 0.01\namplitude = 1.0\nphase = 0.75\nFs = 20050\n\nnum_samples = int(Fs * duration)\nt = np.arange(num_samples) / Fs\nx1 = amplitude * np.sin(2 * np.pi * (frequency1 * t - phase)) # 1st sine\nx2 = amplitude * np.sin(2 * np.pi * (frequency2 * t - phase)) # 2nd sine\nx3 = amplitude * np.sin(2 * np.pi * (frequency3 * t - phase)) # 3rd sine\n\nax2.plot(t, x1, color='red')\nax4.plot(t, x2, color='red')\nax5.plot(t, x3, color='red')\n\nax2.set_title(\"400 Hz\")\nax4.set_title(\"600 Hz\")\nax5.set_title(\"1600 Hz\")\n\nax2.set_xticks(np.arange(0, 0.0125, 0.0025)) \nax2.set_xlim([0.0, 0.01])\nax2.set_yticks(np.arange(-1, 1.5, 1.0)) \n\nax4.set_xticks(np.arange(0, 0.0125, 0.0025)) \nax4.set_xlim([0.0, 0.01])\nax4.set_yticks(np.arange(-1, 1.5, 1.0)) \n\nax5.set_xticks(np.arange(0, 0.0125, 0.0025)) \nax5.set_xlim([0.0, 0.01])\nax5.set_yticks(np.arange(-1, 1.5, 1.0)) \n\nfig.subplots_adjust(hspace=.001, wspace=0.5)\n\n# Combine all three (sum and divide by 3 to keep the amplitude as original)\nx123 = (x1+x2+x3)/3\n\nax3.plot(t, x123, color='blue', linewidth=2.0, linestyle='-')\nax3.set_xlabel('Time (s)')\nax3.set_title(\"Complex tone (sines of 400 Hz + 600 Hz + 1600 Hz)\")\nax3.set_ylabel('')\nax3.set_ylim([-1.01, 1.01])\nax3.set_xlim([0, 0.01])\nax3.set_xticks(np.arange(0, 0.0125, 0.0025)) \nax3.set_yticks(np.arange(-1, 1.5, 1.0)) \nax3.grid()\nfig.tight_layout()\n\n\nax2.annotate('', xy=(1.11/100, -9.3), xytext=(1.01/100, 0), \n arrowprops=dict(width=0.5, headlength=3, headwidth=3, \n color='0.3'), size=2, annotation_clip=False)\nax4.annotate('', xy=(1.063/100, 0), xytext=(1.01/100, 0), \n arrowprops=dict(width=0.5, headlength=3, headwidth=3, \n color='0.3'), size=2, annotation_clip=False)\nax5.annotate('', xy=(1.11/100, 9.3), xytext=(1.01/100, 0),\n arrowprops=dict(width=0.5, headlength=3, headwidth=3, \n color='0.3'), size=2, annotation_clip=False)\nax4.text(1.09/100, -0.6, r'$\\sum$', size=9, backgroundcolor='0.8')\n\nplt.show()", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Basics (sines)" - ] - }, - { - "objectID": "Chapter10.3.html", - "href": "Chapter10.3.html", - "title": "Ch. 10 – Physical", - "section": "", - "text": "Show the code\nimport numpy as np\nimport librosa\nimport librosa.display\nimport IPython.display as ipd\nfrom matplotlib import pyplot as plt \n\n\n\n\n\n\nx, sr = librosa.load('data/help.mp3', offset=1.05, duration=10.087)\nipd.display(ipd.Audio(data=x, rate=sr))\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n\n\n\nfig, ax = plt.subplots(nrows=1,figsize=(7.5, 2.75))\nlibrosa.display.waveshow(x, sr=sr, ax=ax, color='indigo')\n\nax.set_title(\"Waveform\")\nax.set_xlabel(\"Time (s)\")\nax.set_ylabel(\"Amplitude\")\nax.set_xticks(range(0, 11, 1))\nax.set_xlim([0, 10])\nax.grid()\nfig.tight_layout()\nplt.show()\n\n\n\n\n\n\n\n\n\n\n\n\nfig, ax = plt.subplots(nrows=1, figsize=(7.5, 2.75))\nrms = librosa.feature.rms(y=x) # Extra dynamics (RMS)\ndb = librosa.amplitude_to_db(rms, ref=np.max) # Convert into dB. Note that this is a relative measure (loudest is now 0) \ntimes = librosa.times_like(rms)\n\nax.plot(times, db[0], color='darkblue')\nax.set_title(\"Loudness\")\nax.set_ylim([-15,0])\nax.set_ylabel(\"Decibel\")\nax.set_xlabel(\"Time (s)\")\nax.set_xticks(range(0, 11, 1))\nax.set_xlim([0, 10])\nax.grid()\nfig.tight_layout()\nplt.show()\n\n\n\n\n\n\n\n\n\n\n\n\nfig, ax = plt.subplots(nrows=1, figsize=(7.5, 2.75))\n\n\nonset_subbands = librosa.onset.onset_strength_multi(y=x, \n sr=sr, \n channels=[0, 32, 64, 96, 128])\n\nonset_subbands_s = sum(onset_subbands, 1)\nax.plot(times, onset_subbands_s, 'maroon')\n\ntempo, beats = librosa.beat.beat_track(y=x, sr=sr, trim=False)\nplt.vlines(times[beats], 0, onset_subbands_s.max(), color='0.40', alpha=0.80,\n linestyle='--', label='Beats')\n\no_env = librosa.onset.onset_strength(y=x, sr=sr)\ntimes = librosa.times_like(o_env, sr=sr)\nonset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)\n\nax.set_title(\"Onset strength and estimated beats\")\nax.set_ylabel(\"Strength\")\nax.set_xlabel(\"Time (s)\")\nax.set_ylim([0, 60])\nax.set_xticks(range(0, 11, 1))\nax.set_xlim([0, 10])\nax.grid()\nfig.tight_layout()\n\ndata = np.loadtxt('data/Help_beats.csv') \nann_time = data[0:16, 0]-1.05\nann_label = data[0:16, 1]\nfor x in range(16):\n ax.text(ann_time[x], 53, int(ann_label[x]), size=6, \n backgroundcolor='0.8', weight='bold', ha='center')\n\nplt.show()", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Physical" - ] - }, - { - "objectID": "Chapter10.3.html#figure-10.3.-waveform-loudness-and-onset-strengths-black-curve-and-estimated-beats-dashed-lines-of-the-intro-to-help-by-the-beatles.", - "href": "Chapter10.3.html#figure-10.3.-waveform-loudness-and-onset-strengths-black-curve-and-estimated-beats-dashed-lines-of-the-intro-to-help-by-the-beatles.", - "title": "Ch. 10 – Physical", - "section": "", - "text": "Show the code\nimport numpy as np\nimport librosa\nimport librosa.display\nimport IPython.display as ipd\nfrom matplotlib import pyplot as plt \n\n\n\n\n\n\nx, sr = librosa.load('data/help.mp3', offset=1.05, duration=10.087)\nipd.display(ipd.Audio(data=x, rate=sr))\n\n\n \n \n Your browser does not support the audio element.\n \n \n\n\n\n\n\n\nfig, ax = plt.subplots(nrows=1,figsize=(7.5, 2.75))\nlibrosa.display.waveshow(x, sr=sr, ax=ax, color='indigo')\n\nax.set_title(\"Waveform\")\nax.set_xlabel(\"Time (s)\")\nax.set_ylabel(\"Amplitude\")\nax.set_xticks(range(0, 11, 1))\nax.set_xlim([0, 10])\nax.grid()\nfig.tight_layout()\nplt.show()\n\n\n\n\n\n\n\n\n\n\n\n\nfig, ax = plt.subplots(nrows=1, figsize=(7.5, 2.75))\nrms = librosa.feature.rms(y=x) # Extra dynamics (RMS)\ndb = librosa.amplitude_to_db(rms, ref=np.max) # Convert into dB. Note that this is a relative measure (loudest is now 0) \ntimes = librosa.times_like(rms)\n\nax.plot(times, db[0], color='darkblue')\nax.set_title(\"Loudness\")\nax.set_ylim([-15,0])\nax.set_ylabel(\"Decibel\")\nax.set_xlabel(\"Time (s)\")\nax.set_xticks(range(0, 11, 1))\nax.set_xlim([0, 10])\nax.grid()\nfig.tight_layout()\nplt.show()\n\n\n\n\n\n\n\n\n\n\n\n\nfig, ax = plt.subplots(nrows=1, figsize=(7.5, 2.75))\n\n\nonset_subbands = librosa.onset.onset_strength_multi(y=x, \n sr=sr, \n channels=[0, 32, 64, 96, 128])\n\nonset_subbands_s = sum(onset_subbands, 1)\nax.plot(times, onset_subbands_s, 'maroon')\n\ntempo, beats = librosa.beat.beat_track(y=x, sr=sr, trim=False)\nplt.vlines(times[beats], 0, onset_subbands_s.max(), color='0.40', alpha=0.80,\n linestyle='--', label='Beats')\n\no_env = librosa.onset.onset_strength(y=x, sr=sr)\ntimes = librosa.times_like(o_env, sr=sr)\nonset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)\n\nax.set_title(\"Onset strength and estimated beats\")\nax.set_ylabel(\"Strength\")\nax.set_xlabel(\"Time (s)\")\nax.set_ylim([0, 60])\nax.set_xticks(range(0, 11, 1))\nax.set_xlim([0, 10])\nax.grid()\nfig.tight_layout()\n\ndata = np.loadtxt('data/Help_beats.csv') \nann_time = data[0:16, 0]-1.05\nann_label = data[0:16, 1]\nfor x in range(16):\n ax.text(ann_time[x], 53, int(ann_label[x]), size=6, \n backgroundcolor='0.8', weight='bold', ha='center')\n\nplt.show()", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Physical" - ] - }, - { - "objectID": "Chapter10.5.html", - "href": "Chapter10.5.html", - "title": "Ch. 10 – Semantic", - "section": "", - "text": "This code requires that the user supplements their own client_id and client_secret which can be obtained after registering to developer account for Spotify API. The code will not run without these. However, the visualisation part of the code works with the save data (data/top_n_track_features2.csv).\n\n\nShow the code\nimport pandas as pd\nimport numpy as np\nimport spotipy\nfrom spotipy.oauth2 import SpotifyClientCredentials", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Semantic" - ] - }, - { - "objectID": "Chapter10.5.html#figure-10.6.-visualisation-of-the-features-of-two-songs-help-and-yesterday-by-the-beatles-within-the-context-of-500-other-tracks-from-1964-1966-using-four-high-level-features-retrieved-from-spotify.", - "href": "Chapter10.5.html#figure-10.6.-visualisation-of-the-features-of-two-songs-help-and-yesterday-by-the-beatles-within-the-context-of-500-other-tracks-from-1964-1966-using-four-high-level-features-retrieved-from-spotify.", - "title": "Ch. 10 – Semantic", - "section": "", - "text": "This code requires that the user supplements their own client_id and client_secret which can be obtained after registering to developer account for Spotify API. The code will not run without these. However, the visualisation part of the code works with the save data (data/top_n_track_features2.csv).\n\n\nShow the code\nimport pandas as pd\nimport numpy as np\nimport spotipy\nfrom spotipy.oauth2 import SpotifyClientCredentials", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Semantic" - ] - }, - { - "objectID": "Chapter10.5.html#retrieve-spotify-features", - "href": "Chapter10.5.html#retrieve-spotify-features", - "title": "Ch. 10 – Semantic", - "section": "1. Retrieve Spotify features", - "text": "1. Retrieve Spotify features\n\nclient_id = 'YOUR_CLIENT_ID_HERE'\nclient_secret = 'YOUR_SECRET_KEY_HERE'\n\nsp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,\n client_secret=client_secret))\n\nresults = sp.search(q='The Beatles', limit=20)\nfor idx, track in enumerate(results['tracks']['items']):\n print(idx, track['name'])\n\ntrack = results['tracks']['items'][18] # help is 18\nprint(track['name'])\nprint(track['href'])\nprint(track['popularity'])\nprint(\"===========PREVIEW===========\")\nprint(track['preview_url'])\nprint(\"===========PREVIEW===========\")\n\na = sp.audio_features(track['id'])\n\nprint(a[0]['valence']) # Help!: 0.763, Yesterday: 0.315\nprint(a[0]['energy']) # Help!: 0.725, Yesterday: 0.179\nprint(a[0]['tempo']) # Help!: 95.003, Yesterday: 96.53\nprint(a[0]['loudness']) # Help!: -7.576, Yesterday: -11.83\nprint(a[0]['acousticness']) # Help!: 0.188, Yesterday: 0.879\n\nprint(\"===========GENRE===========\")\nname = []\nTid = []\nvalence = []\nenergy = []\ntempo = []\nloudness = []\ninstrumentalness = []\nacousticness = []\ndanceability = []\n\n# get 500 tracks, 50 each time\noffset_vals = np.arange(1, 500, 10)\n\nfor i in range(50):\n results = sp.search(q='genre:pop & year:1964-1966', limit=10, \n offset=offset_vals[i])\n for idx, track in enumerate(results['tracks']['items']):\n name.append(track['name'])\n Tid.append(track['id'])\n a = sp.audio_features(track['id'])\n valence.append(a[0]['valence'])\n energy.append(a[0]['energy'])\n instrumentalness.append(a[0]['instrumentalness'])\n acousticness.append(a[0]['acousticness'])\n danceability.append(a[0]['danceability'])\n tempo.append(a[0]['tempo'])\n loudness.append(a[0]['loudness'])\n print(i, ':', idx)\n\n# Store in data frame and save to a file\ndf = pd.DataFrame({'valence': valence, 'energy': energy, 'tempo': tempo,\n 'acousticness': acousticness,\n 'loudness': loudness, 'id': Tid})\ndf.to_csv('data/top_n_track_features2.csv')", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Semantic" - ] - }, - { - "objectID": "Chapter10.5.html#visualise", - "href": "Chapter10.5.html#visualise", - "title": "Ch. 10 – Semantic", - "section": "2. Visualise", - "text": "2. Visualise\n\nimport pandas as pd\nimport numpy as np\nimport seaborn as sns\nfrom matplotlib import pyplot as plt \n\n\n# Get data (from a previous process)\nd = pd.read_csv('data/top_n_track_features2.csv')\n\n# set graphic (seaborn) theme\nsns.set_theme()\nsns.set_style(\"whitegrid\")\n\nfig = plt.figure()\nfig.set_figheight(8)\nfig.set_figwidth(9)\n\n# Define multiple plots\nax1 = plt.subplot2grid(shape=(2, 2), loc=(0, 0))\nax2 = plt.subplot2grid(shape=(2, 2), loc=(0, 1))\nax3 = plt.subplot2grid(shape=(2, 2), loc=(1, 0))\nax4 = plt.subplot2grid(shape=(2, 2), loc=(1, 1))\n\nsns.histplot(x='valence', data=d, color='blue', ax=ax1)\nax1.set_xlabel('Valence (0-1)')\nax1.axes.axvline(0.763, color='red', linewidth=2, alpha=.7)\nax1.text(0.763, ax1.get_ylim()[1], \"Help!\", size=12, backgroundcolor='0.9',\n ha=\"center\", va=\"top\", alpha=0.85)\nax1.axes.axvline(0.315, color='green', linewidth=2, alpha=.7)\nax1.text(0.315, ax1.get_ylim()[1], \"Yesterday\", size=12, backgroundcolor='0.9',\n ha=\"center\", va=\"top\", alpha=0.85)\nax1.set_xlim([0, 1])\nax1.set_xticks(np.arange(0, 1.1, 0.20)) \n\nsns.histplot(x='energy', data=d, color='blue', ax=ax2)\nax2.set_xlabel('Energy (0-1)')\nax2.axes.axvline(0.725, color='red', linewidth=2, alpha=.7)\nax2.text(0.725, ax2.get_ylim()[1], \"Help!\", size=12, backgroundcolor='0.9',\n ha=\"center\", va=\"top\", alpha=0.85)\nax2.axes.axvline(0.179, color='green', linewidth=2, alpha=.7)\nax2.text(0.179, ax2.get_ylim()[1], \"Yesterday\", size=12, backgroundcolor='0.9',\n ha=\"center\", va=\"top\", alpha=0.85)\nax2.set_xlim([0, 1])\nax2.set_xticks(np.arange(0, 1.1, 0.20)) \n\nsns.histplot(x='tempo', data=d, color='blue', ax=ax3)\nax3.set_xlabel('Tempo (BPM)')\nax3.axes.axvline(95, color='red', linewidth=2, alpha=.7)\nax3.text(90, ax3.get_ylim()[1], \"Help!\", size=12, backgroundcolor='0.9',\n ha=\"right\", va=\"top\", alpha=0.85)\nax3.axes.axvline(96.5, color='green', linewidth=2, alpha=.7)\nax3.text(100, ax3.get_ylim()[1], \"Yesterday\", size=12, backgroundcolor='0.9',\n ha=\"left\", va=\"top\", alpha=0.85)\n\nsns.histplot(x='acousticness', data=d, color='blue', ax=ax4)\nax4.set_xlabel('Acousticness (0-1)')\nax4.axes.axvline(0.188, color='red', linewidth=2, alpha=.7)\nax4.text(0.188, ax4.get_ylim()[1], \"Help!\", size=12, backgroundcolor='0.9',\n ha=\"center\", va=\"top\", alpha=0.85)\nax4.axes.axvline(0.879, color='green', linewidth=2, alpha=.7)\nax4.text(0.879, ax4.get_ylim()[1], \"Yesterday\", size=12, backgroundcolor='0.9',\n ha=\"center\", va=\"top\", alpha=0.85)\nax4.set_xlim([0, 1])\nax4.set_xticks(np.arange(0, 1.1, 0.20)) \n\nfig.tight_layout()\nplt.show()", - "crumbs": [ - "Audio analysis", - "Ch. 10 – Semantic" - ] - }, - { - "objectID": "Chapter11.3.html", - "href": "Chapter11.3.html", - "title": "EMR", - "section": "", - "text": "Corpus analysis example of onsets.\n\n\n\nlibrary(onsetsync) # to handle onsets\nlibrary(dplyr) # to handle summaries\n\n\n\n\nThese are build into the onsetsync package and come from IEMP collection. The code runs an analysis of asynchrony across different Cuban Salsa and Son tracks (five in total) and create a table of the Bass asynchronies with Guitar and Tres (in milliseconds).\ncorpus <- onsetsync::CSS_IEMP # Cuban Salsa & Son\nD <- sync_sample_paired(corpus,'Bass','Guitar',0,1,'SD')\nRES <-summarise(group_by(D$asynch,name), M = mean(asynch*1000))\nD2 <- sync_sample_paired(corpus,'Bass','Tres',0,1,'SD')\nRES2 <- summarise(group_by(D2$asynch,name), M = mean(asynch*1000))\nnames(RES)[2] <- 'Bass - Guitar (in ms)' # rename for clarity\nRES$`Bass - Tres (in ms)` <- RES2$M # rename for clarity\nprint(knitr::kable(RES,digits=1)) # create table\n\n\n\nname\nBass - Guitar (in ms)\nBass - Tres (in ms)\n\n\n\n\nEl Cantante\n-5.2\n14.6\n\n\nHabanera\n-11.3\n6.6\n\n\nPalo Santo\n-16.1\n-3.5\n\n\nTumbao Sangreao\n-12.0\n5.2\n\n\nYo Naci En Un Sola\n-7.1\n-4.4", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Synchrony" - ] - }, - { - "objectID": "Chapter11.3.html#ch.-11-synchrony", - "href": "Chapter11.3.html#ch.-11-synchrony", - "title": "EMR", - "section": "", - "text": "Corpus analysis example of onsets.\n\n\n\nlibrary(onsetsync) # to handle onsets\nlibrary(dplyr) # to handle summaries\n\n\n\n\nThese are build into the onsetsync package and come from IEMP collection. The code runs an analysis of asynchrony across different Cuban Salsa and Son tracks (five in total) and create a table of the Bass asynchronies with Guitar and Tres (in milliseconds).\ncorpus <- onsetsync::CSS_IEMP # Cuban Salsa & Son\nD <- sync_sample_paired(corpus,'Bass','Guitar',0,1,'SD')\nRES <-summarise(group_by(D$asynch,name), M = mean(asynch*1000))\nD2 <- sync_sample_paired(corpus,'Bass','Tres',0,1,'SD')\nRES2 <- summarise(group_by(D2$asynch,name), M = mean(asynch*1000))\nnames(RES)[2] <- 'Bass - Guitar (in ms)' # rename for clarity\nRES$`Bass - Tres (in ms)` <- RES2$M # rename for clarity\nprint(knitr::kable(RES,digits=1)) # create table\n\n\n\nname\nBass - Guitar (in ms)\nBass - Tres (in ms)\n\n\n\n\nEl Cantante\n-5.2\n14.6\n\n\nHabanera\n-11.3\n6.6\n\n\nPalo Santo\n-16.1\n-3.5\n\n\nTumbao Sangreao\n-12.0\n5.2\n\n\nYo Naci En Un Sola\n-7.1\n-4.4", - "crumbs": [ - "Corpus studies", - "Ch. 11 – Synchrony" - ] - }, - { - "objectID": "Citation.html", - "href": "Citation.html", - "title": "Citation", - "section": "", - "text": "Note\n\n\n\nEerola, T. (in press). Music and Science – Guide to Empirical Music Research. SEMPRE Studies in the Psychology of Music. London, UK: Routledge.", - "crumbs": [ - "Other", - "Citation" - ] - }, - { - "objectID": "Citation.html#book-bibliographic-details-in-apa-format", - "href": "Citation.html#book-bibliographic-details-in-apa-format", - "title": "Citation", - "section": "", - "text": "Note\n\n\n\nEerola, T. (in press). Music and Science – Guide to Empirical Music Research. SEMPRE Studies in the Psychology of Music. London, UK: Routledge.", - "crumbs": [ - "Other", - "Citation" - ] - }, - { - "objectID": "Citation.html#book-bibliographic-details-in-bibtex-format", - "href": "Citation.html#book-bibliographic-details-in-bibtex-format", - "title": "Citation", - "section": "Book bibliographic details in bibtex format:", - "text": "Book bibliographic details in bibtex format:\n\n@book{eerola2024,\n address = {London, UK},\n author = {Eerola, T.},\n publisher = {Routledge},\n series = {SEMPRE Studies in the Psychology of Music},\n title = {Music and Science -- Guide to Empirical Music Research},\n year = {2024}\n}", - "crumbs": [ - "Other", - "Citation" - ] - }, - { - "objectID": "Citation.html#website-bibliographic-details-in-apa-format", - "href": "Citation.html#website-bibliographic-details-in-apa-format", - "title": "Citation", - "section": "Website bibliographic details in APA format:", - "text": "Website bibliographic details in APA format:\n\n\n\n\n\n\nNote\n\n\n\nEerola, T. (2024). Music and Science – Guide to Empirical Music Research. https://tuomaseerola.github.io/emr/", - "crumbs": [ - "Other", - "Citation" - ] - }, - { - "objectID": "Citation.html#website-bibliographic-details-in-bibtex-format", - "href": "Citation.html#website-bibliographic-details-in-bibtex-format", - "title": "Citation", - "section": "Website bibliographic details in bibtex format:", - "text": "Website bibliographic details in bibtex format:\n\n@misc{eerola2024online,\n author = {Eerola, T.},\n title = {Music and Science -- Guide to Empirical Music Research},\n year = {2024},\n url = {https://tuomaseerola.github.io/emr/},\n urldate = {2024-1-1}\n}", - "crumbs": [ - "Other", - "Citation" - ] - }, - { - "objectID": "Version_R.html", - "href": "Version_R.html", - "title": "Establish R library versions within Colab", - "section": "", - "text": "In March 2024 Colab version of R is:\n\nR version 4.3.3 (2024-02-29)\ntidyverse 2.0.0’\nggplot2 3.4.4’\n\nTest the Colab version of R.\n\nprint(R.version.string)\n\n[1] \"R version 4.3.3 (2024-02-29)\"\n\n\n\n\nShow libraries\n\n#print(installed.packages())\npackageVersion(\"tidyverse\")\n\n[1] '2.0.0'\n\npackageVersion(\"ggplot2\")\n\n[1] '3.5.0'\n\n\n\n\n\n\nsessionInfo()\n\nR version 4.3.3 (2024-02-29)\nPlatform: x86_64-apple-darwin20 (64-bit)\nRunning under: macOS Ventura 13.6.4\n\nMatrix products: default\nBLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib \nLAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0\n\nlocale:\n[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8\n\ntime zone: Europe/London\ntzcode source: internal\n\nattached base packages:\n[1] stats graphics grDevices utils datasets methods base \n\nloaded via a namespace (and not attached):\n [1] htmlwidgets_1.6.4 compiler_4.3.3 fastmap_1.1.1 cli_3.6.2 \n [5] tools_4.3.3 htmltools_0.5.8 yaml_2.3.8 rmarkdown_2.26 \n [9] knitr_1.45 jsonlite_1.8.8 xfun_0.43 digest_0.6.35 \n[13] rlang_1.1.3 evaluate_0.23" - }, - { - "objectID": "Version_R.html#libraries", - "href": "Version_R.html#libraries", - "title": "Establish R library versions within Colab", - "section": "", - "text": "Show libraries\n\n#print(installed.packages())\npackageVersion(\"tidyverse\")\n\n[1] '2.0.0'\n\npackageVersion(\"ggplot2\")\n\n[1] '3.5.0'" - }, - { - "objectID": "Version_R.html#show-other-information", - "href": "Version_R.html#show-other-information", - "title": "Establish R library versions within Colab", - "section": "", - "text": "sessionInfo()\n\nR version 4.3.3 (2024-02-29)\nPlatform: x86_64-apple-darwin20 (64-bit)\nRunning under: macOS Ventura 13.6.4\n\nMatrix products: default\nBLAS: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib \nLAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0\n\nlocale:\n[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8\n\ntime zone: Europe/London\ntzcode source: internal\n\nattached base packages:\n[1] stats graphics grDevices utils datasets methods base \n\nloaded via a namespace (and not attached):\n [1] htmlwidgets_1.6.4 compiler_4.3.3 fastmap_1.1.1 cli_3.6.2 \n [5] tools_4.3.3 htmltools_0.5.8 yaml_2.3.8 rmarkdown_2.26 \n [9] knitr_1.45 jsonlite_1.8.8 xfun_0.43 digest_0.6.35 \n[13] rlang_1.1.3 evaluate_0.23" - }, { "objectID": "index.html", "href": "index.html", diff --git a/docs/sitemap.xml b/docs/sitemap.xml new file mode 100644 index 0000000..eb13e4f --- /dev/null +++ b/docs/sitemap.xml @@ -0,0 +1,7 @@ + + + + https://tuomaseerola.github.io/emr/index.html/index.html + 2024-04-05T14:10:43.999Z + +