-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathExtractionScript.py
More file actions
53 lines (36 loc) · 1.46 KB
/
ExtractionScript.py
File metadata and controls
53 lines (36 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
"""
:author: Marcus Tran
:date: February 15th, 2020
:description: Functions to comb through a data set and then extract the features from audio files
in a given data path
Based on Medium Article and Example Code
"""
import pandas as pd
import os
import librosa
import numpy as np
def extract_features(fileName):
try:
audio, sample_rate = librosa.load(fileName, res_type='kaiser_fast')
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccsscaled = np.mean(mfccs.T, axis=0)
except Exception as error:
print("Error encountered while parsing file: ", fileName)
return None
return mfccsscaled
def create_DataFrame(DataSetPath):
# Set the path for the folder containing the dataset
fulldatasetpath = DataSetPath
metadata = pd.read_json(fulldatasetpath + 'metadata.json', orient='split')
features = []
# Extract features from each sound file
for index, row in metadata.iterrows():
fileName = os.path.join(os.path.abspath(fulldatasetpath),'fold'+
str(row["fold"])+'/',str(row["slice_file_name"]))
class_label = row["class_name"]
data = extract_features(fileName)
features.append([data, class_label])
# Converts the data into a data frame for use
FeatureDataFrame = pd.DataFrame(features, columns=['feature', 'class_label'])
print('Finished extracting from ', len(FeatureDataFrame), ' files')
return FeatureDataFrame