Skip to content

Commit 891ef7e

Browse files
authored
Merge pull request #8 from jsingh811/setup
Setup
2 parents 74e096e + 0ce38a1 commit 891ef7e

File tree

5 files changed

+279
-5
lines changed

5 files changed

+279
-5
lines changed

pyAudioProcessing/__init__.py

Whitespace-only changes.
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Copyright (c) 1998, Malcolm Slaney <malcolm@interval.com>
5+
Copyright (c) 2009, Dan Ellis <dpwe@ee.columbia.edu>
6+
Copyright (c) 2014, Jason Heeris <jason.heeris@gmail.com>
7+
All rights reserved.
8+
9+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
10+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
11+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
12+
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
13+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
14+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
15+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
16+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
17+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
18+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19+
"""
20+
21+
import numpy as np
22+
import scipy as sp
23+
from scipy import signal as sgn
24+
25+
DEFAULT_FILTER_NUM = 100
26+
DEFAULT_LOW_FREQ = 100
27+
DEFAULT_HIGH_FREQ = 44100 / 4
28+
29+
30+
def erb_point(low_freq, high_freq, fraction):
31+
"""
32+
Calculates a single point on an ERB scale between ``low_freq`` and
33+
``high_freq``, determined by ``fraction``. When ``fraction`` is ``1``,
34+
``low_freq`` will be returned. When ``fraction`` is ``0``, ``high_freq``
35+
will be returned.
36+
37+
``fraction`` can actually be outside the range ``[0, 1]``, which in general
38+
isn't very meaningful, but might be useful when ``fraction`` is rounded a
39+
little above or below ``[0, 1]`` (eg. for plot axis labels).
40+
"""
41+
# Change the following three parameters if you wish to use a different ERB
42+
# scale. Must change in MakeERBCoeffs too.
43+
# TODO: Factor these parameters out
44+
ear_q = 9.26449 # Glasberg and Moore Parameters
45+
min_bw = 24.7
46+
order = 1
47+
48+
# All of the following expressions are derived in Apple TR #35, "An
49+
# Efficient Implementation of the Patterson-Holdsworth Cochlear Filter
50+
# Bank." See pages 33-34.
51+
erb_point = (
52+
-ear_q * min_bw
53+
+ np.exp(
54+
fraction * (
55+
-np.log(high_freq + ear_q * min_bw)
56+
+ np.log(low_freq + ear_q * min_bw)
57+
)
58+
) *
59+
(high_freq + ear_q * min_bw)
60+
)
61+
62+
return erb_point
63+
64+
65+
def erb_space(
66+
low_freq=DEFAULT_LOW_FREQ,
67+
high_freq=DEFAULT_HIGH_FREQ,
68+
num=DEFAULT_FILTER_NUM):
69+
"""
70+
This function computes an array of ``num`` frequencies uniformly spaced
71+
between ``high_freq`` and ``low_freq`` on an ERB scale.
72+
73+
For a definition of ERB, see Moore, B. C. J., and Glasberg, B. R. (1983).
74+
"Suggested formulae for calculating auditory-filter bandwidths and
75+
excitation patterns," J. Acoust. Soc. Am. 74, 750-753.
76+
"""
77+
return erb_point(
78+
low_freq,
79+
high_freq,
80+
np.arange(1, num + 1) / num
81+
)
82+
83+
84+
def centre_freqs(fs, num_freqs, cutoff):
85+
"""
86+
Calculates an array of centre frequencies (for :func:`make_erb_filters`)
87+
from a sampling frequency, lower cutoff frequency and the desired number of
88+
filters.
89+
90+
:param fs: sampling rate
91+
:param num_freqs: number of centre frequencies to calculate
92+
:type num_freqs: int
93+
:param cutoff: lower cutoff frequency
94+
:return: same as :func:`erb_space`
95+
"""
96+
return erb_space(cutoff, fs / 2, num_freqs)
97+
98+
99+
def make_erb_filters(fs, centre_freqs, width=1.0):
100+
"""
101+
This function computes the filter coefficients for a bank of
102+
Gammatone filters. These filters were defined by Patterson and Holdworth for
103+
simulating the cochlea.
104+
105+
The result is returned as a :class:`ERBCoeffArray`. Each row of the
106+
filter arrays contains the coefficients for four second order filters. The
107+
transfer function for these four filters share the same denominator (poles)
108+
but have different numerators (zeros). All of these coefficients are
109+
assembled into one vector that the ERBFilterBank can take apart to implement
110+
the filter.
111+
112+
The filter bank contains "numChannels" channels that extend from
113+
half the sampling rate (fs) to "lowFreq". Alternatively, if the numChannels
114+
input argument is a vector, then the values of this vector are taken to be
115+
the center frequency of each desired filter. (The lowFreq argument is
116+
ignored in this case.)
117+
118+
Note this implementation fixes a problem in the original code by
119+
computing four separate second order filters. This avoids a big problem with
120+
round off errors in cases of very small cfs (100Hz) and large sample rates
121+
(44kHz). The problem is caused by roundoff error when a number of poles are
122+
combined, all very close to the unit circle. Small errors in the eigth order
123+
coefficient, are multiplied when the eigth root is taken to give the pole
124+
location. These small errors lead to poles outside the unit circle and
125+
instability. Thanks to Julius Smith for leading me to the proper
126+
explanation.
127+
128+
Execute the following code to evaluate the frequency response of a 10
129+
channel filterbank::
130+
131+
fcoefs = MakeERBFilters(16000,10,100);
132+
y = ERBFilterBank([1 zeros(1,511)], fcoefs);
133+
resp = 20*log10(abs(fft(y')));
134+
freqScale = (0:511)/512*16000;
135+
semilogx(freqScale(1:255),resp(1:255,:));
136+
axis([100 16000 -60 0])
137+
xlabel('Frequency (Hz)'); ylabel('Filter Response (dB)');
138+
139+
| Rewritten by Malcolm Slaney@Interval. June 11, 1998.
140+
| (c) 1998 Interval Research Corporation
141+
|
142+
| (c) 2012 Jason Heeris (Python implementation)
143+
"""
144+
T = 1 / fs
145+
# Change the followFreqing three parameters if you wish to use a different
146+
# ERB scale. Must change in ERBSpace too.
147+
# TODO: factor these out
148+
ear_q = 9.26449 # Glasberg and Moore Parameters
149+
min_bw = 24.7
150+
order = 1
151+
152+
erb = width*((centre_freqs / ear_q) ** order + min_bw ** order) ** ( 1 /order)
153+
B = 1.019 * 2 * np.pi * erb
154+
155+
arg = 2 * centre_freqs * np.pi * T
156+
vec = np.exp(2j * arg)
157+
158+
A0 = T
159+
A2 = 0
160+
B0 = 1
161+
B1 = -2 * np.cos(arg) / np.exp(B * T)
162+
B2 = np.exp(-2 * B * T)
163+
164+
rt_pos = np.sqrt(3 + 2 ** 1.5)
165+
rt_neg = np.sqrt(3 - 2 ** 1.5)
166+
167+
common = -T * np.exp(-(B * T))
168+
169+
# TODO: This could be simplified to a matrix calculation involving the
170+
# constant first term and the alternating rt_pos/rt_neg and +/-1 second
171+
# terms
172+
k11 = np.cos(arg) + rt_pos * np.sin(arg)
173+
k12 = np.cos(arg) - rt_pos * np.sin(arg)
174+
k13 = np.cos(arg) + rt_neg * np.sin(arg)
175+
k14 = np.cos(arg) - rt_neg * np.sin(arg)
176+
177+
A11 = common * k11
178+
A12 = common * k12
179+
A13 = common * k13
180+
A14 = common * k14
181+
182+
gain_arg = np.exp(1j * arg - B * T)
183+
184+
gain = np.abs(
185+
(vec - gain_arg * k11)
186+
* (vec - gain_arg * k12)
187+
* (vec - gain_arg * k13)
188+
* (vec - gain_arg * k14)
189+
* ( T * np.exp(B * T)
190+
/ (-1 / np.exp(B * T) + 1 + vec * (1 - np.exp(B * T)))
191+
)**4
192+
)
193+
194+
allfilts = np.ones_like(centre_freqs)
195+
196+
fcoefs = np.column_stack([
197+
A0 * allfilts, A11, A12, A13, A14, A2*allfilts,
198+
B0 * allfilts, B1, B2,
199+
gain
200+
])
201+
202+
return fcoefs
203+
204+
205+
def erb_filterbank(wave, coefs):
206+
"""
207+
:param wave: input data (one dimensional sequence)
208+
:param coefs: gammatone filter coefficients
209+
210+
Process an input waveform with a gammatone filter bank. This function takes
211+
a single sound vector, and returns an array of filter outputs, one channel
212+
per row.
213+
214+
The fcoefs parameter, which completely specifies the Gammatone filterbank,
215+
should be designed with the :func:`make_erb_filters` function.
216+
217+
| Malcolm Slaney @ Interval, June 11, 1998.
218+
| (c) 1998 Interval Research Corporation
219+
| Thanks to Alain de Cheveigne' for his suggestions and improvements.
220+
|
221+
| (c) 2013 Jason Heeris (Python implementation)
222+
"""
223+
output = np.zeros((coefs[:,9].shape[0], wave.shape[0]))
224+
225+
gain = coefs[:, 9]
226+
# A0, A11, A2
227+
As1 = coefs[:, (0, 1, 5)]
228+
# A0, A12, A2
229+
As2 = coefs[:, (0, 2, 5)]
230+
# A0, A13, A2
231+
As3 = coefs[:, (0, 3, 5)]
232+
# A0, A14, A2
233+
As4 = coefs[:, (0, 4, 5)]
234+
# B0, B1, B2
235+
Bs = coefs[:, 6:9]
236+
237+
# Loop over channels
238+
for idx in range(0, coefs.shape[0]):
239+
# These seem to be reversed (in the sense of A/B order), but that's what
240+
# the original code did...
241+
# Replacing these with polynomial multiplications reduces both accuracy
242+
# and speed.
243+
y1 = sgn.lfilter(As1[idx], Bs[idx], wave)
244+
y2 = sgn.lfilter(As2[idx], Bs[idx], y1)
245+
y3 = sgn.lfilter(As3[idx], Bs[idx], y2)
246+
y4 = sgn.lfilter(As4[idx], Bs[idx], y3)
247+
output[idx, :] = y4 / gain[idx]
248+
249+
return output

pyAudioProcessing/features/getGfcc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import math
1212
import numpy
1313

14-
from gammatone import filters
14+
from pyAudioProcessing.features.gammatone import filters
1515

1616
################################################################################
1717
# Classes

requirements/requirements.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,4 @@ simplejson==3.16.0
66
eyed3==0.9.5
77
pydub==0.23.1
88
python-magic-bin==0.4.14
9-
git+https://github.com/detly/gammatone.git@0626328ef7c31d3b33214db2fdcd52e8601eb4c5#egg=gammatone
10-
git+https://github.com/tyiannak/pyAudioAnalysis.git@68e5859c0ec943096ab7d2d614bffc00107dbd9d#egg=pyAudioAnalysis
9+
pyAudioAnalysis==0.2.5

setup.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,38 @@
1+
import os
2+
import codecs
13
import setuptools
24

5+
PROJECT = os.path.abspath(os.path.dirname(__file__))
6+
REQUIREMENTS_PATH = "requirements/requirements.txt"
7+
# The text of the README file
8+
with open("README.md", "r") as f:
9+
long_description = f.read()
10+
11+
def read(path):
12+
with codecs.open(os.path.join(PROJECT, path), "rb", "utf-8") as f:
13+
return f.read()
14+
15+
def get_requirements(path=REQUIREMENTS_PATH):
16+
"""
17+
Returns a list of requirements defined by REQUIREMENTS_PATH.
18+
"""
19+
requirements = []
20+
for line in read(path).splitlines():
21+
requirements.append(line.strip())
22+
return requirements
23+
324
setuptools.setup(
425
name='pyAudioProcessing',
5-
version='1.0',
26+
version='1.1.3',
627
description='Audio processing-feature extraction and building machine learning models from audio data.',
28+
long_description=long_description,
29+
long_description_content_type="text/markdown",
730
author='Jyotika Singh',
8-
packages=['pyAudioProcessing'],
31+
packages=setuptools.find_packages(where=PROJECT),
932
url="https://github.com/jsingh811/pyAudioProcessing",
33+
install_requires=get_requirements(),
34+
python_requires='>=3.6, <4',
35+
py_modules=["pyAudioProcessing"],
1036
classifiers=[
1137
"Programming Language :: Python :: 3",
1238
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",

0 commit comments

Comments
 (0)