#!pip install tinymlgen
The Fast Fourier Transform¶
#!c:\users\pcimed\appdata\local\programs\python\python37\python.exe -m pip install --upgrade pip
Use¶
from tinymlgen import port
if name == 'main':
tf_model = create_tf_model()
c_code = port(tf_model)
Configuration¶
You can pass a few parameters to the port function:
optimize (=True): apply optimizers to the exported model. Can either be a list of optimizers or a boolean, in which case OPTIMIZE_FOR_SIZE is applied variable_name (='model_data'): give the exported array a custom name pretty_print (=False): print the array in a nicely formatted arrangement
Fourier transform This is where the Fourier Transform comes in. This method makes use of te fact that every non-linear function can be represented as a sum of (infinite) sine waves. In the underlying figure this is illustrated, as a step function is simulated by a multitude of sine waves.
N = number of samples
n = current sample
xn = value of the signal at time n
k = current frequency (0 Hz to N-1 Hz)
Xk = Result of the DFT (amplitude and phase)
Note that a dot product is defined as:
import matplotlib.pyplot as plt
import numpy as np
import copy
import time
import ctypes
from ctypes import *
# from https://www.ritchievink.com/blog/2017/04/23/understanding-the-fourier-transform-by-example/
def DFT(x):
"""
Compute the discrete Fourier Transform of the 1D array x
:param x: (array)
"""
N = x.size
n = np.arange(N)
k = n.reshape((N, 1))
e = np.exp(-2j * np.pi * k * n / N)
return np.dot(e, x)
ts=time.time_ns()
test=np.int32([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15])
te=time.time_ns()
print(te-ts)
print(test)
56025 [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
ts=time.time_ns()
DFT(test)
te=time.time_ns()
print(te-ts)
20256818
isin_data =np.array([
0, 1, 3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20,
22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 35, 36, 37, 39, 40, 41, 42,
44, 45, 46, 48, 49, 50, 52, 53, 54, 56, 57, 59, 60, 61, 63, 64, 65,
67, 68, 70, 71, 72, 74, 75, 77, 78, 80, 81, 82, 84, 85, 87, 88, 90,
91, 93, 94, 96, 97, 99, 100, 102, 104, 105, 107, 108, 110, 112, 113, 115, 117,
118, 120, 122, 124, 125, 127, 129, 131, 133, 134, 136, 138, 140, 142, 144, 146, 148,
150, 152, 155, 157, 159, 161, 164, 166, 169, 171, 174, 176, 179, 182, 185, 188, 191,
195, 198, 202, 206, 210, 215, 221, 227, 236],dtype=np.byte)
Pow2=np.array([1,2,4,8,16,32,64,128,256,512,1024,2048,4096],dtype=np.uint16)
RSSdata=np.array([7,6,6,5,5,5,4,4,4,4,3,3,3,3,3,3,3,2,2,2],dtype=np.byte)
def fastRSS(a, b):
'''ca=np.int32(aa)
cb=np.int32(bb)
a=copy.copy(ca)
b=copy.copy(cb)
'''
a=np.int16(a)
b=np.int16(b)
if a==0 and b==0:
return 0
#int min,max,temp1,temp2;
#byte clevel;
min= max=temp1= temp2=np.int16
clevel=np.byte
#code
if a<0:
a=-a
if b<0:
b=-b
clevel=0;
if a>b:
max=a
min=b
else:
max=b
min=a
if max> (min+min+min):
return max
else:
temp1= min>>3
if temp1==0:
temp1=1
temp2=min
while temp2<max:
temp2=temp2+temp1
clevel=clevel+1
temp2=RSSdata[clevel]
temp1=temp1>>1
for i in range(temp2):
max=max+temp1
return(max)
def fast_sine( Amp, th):
#int temp3,m1,m2;
#byte temp1,temp2, test,quad,accuracy;
'''cAmp=np.int32(AAmp)
cth=np.int32(tth)
Amp=copy.copy(cAmp)
th=copy.copy(cth)
'''
Amp=np.int16(Amp)
th=np.int16(th)
temp3= m1= m2=np.int16
temp1=temp2= test= quad=occuracy=np.byte
accuracy=5 #set it value from 1 to 7, where 7 being most accurate but slowest
#accuracy value of 5 recommended for typical applicaiton
while th>1024:
th=th-1024 #here 1024 = 2*pi or 360 deg
while th<0:
th=th+1024
quad=th>>8 #np.right_shift(th,8) #np.uint32(th)>>np.uint32(8);
if quad==1:
th= 512-th
elif quad==2:
th= th-512
elif quad==3:
th= 1024-th
temp1= 0
temp2= 128 #2 multiple
m1=0
m2=Amp
temp3=(m1+m2) >> 1 #np.uint32(m1+m2)>>np.uint32(1)
Amp=temp3
for i in range(accuracy):
test=(temp1+temp2) >> 1 # ,1) #np.uint32(temp1+temp2)>>np.uint32(1)
temp3=temp3>> 1 #np.uint32(temp3)>>np.uint32(1);
if th>isin_data[test]:
temp1=test
Amp=Amp+temp3
m1=Amp
else:
if th<isin_data[test]:
temp2=test
Amp=Amp-temp3
m2=Amp
if quad==2:
Amp= 0-Amp
else:
if quad==3:
Amp= 0-Amp
return Amp
def fast_cosine(Amp, th):
'''cAmp=np.int32(AAmp)
cth=np.int32(tth)
Amp=copy.copy(cAmp)
th=copy.copy(cth)
'''
th=256-th #cos th = sin (90-th) formula
return fast_sine(Amp,th)
fast_sine( 1024, 0)
16
'''
int in[],int N,float Frequency
Code to perform High speed and Accurate FFT on arduino,
setup:
1. in[] : Data array,
2. N : Number of sample (recommended sample size 2,4,8,16,32,64,128,256,512...)
3. Frequency: sampling frequency required as input (Hz)
It will by default return frequency with max amplitude,
if you need complex output or magnitudes uncomment required sections
If sample size is not in power of 2 it will be clipped to lower side of number.
i.e, for 150 number of samples, code will consider first 128 sample, remaining sample will be omitted.
For Arduino nano, FFT of more than 256 sample not possible due to mamory limitation
Code by ABHILASH
Contact: abhilashpatel121@gmail.com
Documentation & details: https://www.instructables.com/member/abhilash_patel/instructables/
Update(06/05/21): Correction made for support on Arduino Due
'''
def Approx_FFT( inn, N,Frequency):
#int a,c1,f,o,x,data_max,data_min=0;
#long data_avg,data_mag,temp11;
#byte scale,check=0;
# parameters
'''cN=np.int64(NN)
cFrequency=np.float64(FFrequency)
cinnn=np.int64(innn)
inn=copy.copy(cinnn)
N=copy.copy(cN)
Frequency=copy.copy(cFrequency)
'''
#varaibles
inn=np.int16(inn)
N=np.int16(N)
Frequency=np.float32(Frequency)
a=c1=f=x=o=data_max=data_min=np.int16(0)
data_avg=data_mag=temp11=np.int32(0)
scale=check=np.byte(0)
#code
if not inn.any():
return inn,inn
data_max=0
data_avg=0
data_min=0
for i in range(12): #(int i=0;i<12;i++) //calculating the levels
if Pow2[i]<=N:
o=i
a=Pow2[o];
out_r=np.zeros(a, dtype=np.int16) # int out_r[a]; //real part of transform
out_im=np.zeros(a, dtype=np.int16) # [a]; //imaginory part of transform
for i in range(a): # getting min max and average for scalling
out_r[i]=0
out_im[i]=0
data_avg=data_avg+inn[i]
if inn[i]>data_max:
data_max=inn[i]
if inn[i]<data_min:
data_min=inn[i]
# print(data_avg,o,type(data_avg),type(o))
data_avg=data_avg>>o #np.right_shift(data_avg,o)
scale=0
data_mag=data_max-data_min;
temp11=data_mag;
#scalling data from +512 to -512
if data_mag>1024:
while temp11>1024:
temp11=temp11>> 1 # np.uint64(temp11)>>np.uint32(1)
scale=scale+1
if data_mag<1024:
while temp11<1024:
temp11=temp11<<1 #np.uint64(temp11)<<np.uint32(1)
scale=scale+1;
if data_mag>1024:
for i in range(a):
inn[i]=inn[i]-data_avg
inn[i]=inn[i]>>scale #np.uint64(inn[i])>>np.uint32(scale)
scale=128-scale;
if data_mag<1024:
scale=scale-1
for i in range(a):
inn[i]=inn[i]-data_avg
inn[i]=inn[i]<<scale #np.uint64(inn[i])<<np.uint32(scale)
scale=128+scale;
x=0;
for b in range(o): # bit reversal order stored in im_out array
c1=Pow2[b]
f=Pow2[o]/(c1+c1);
for j in range(c1): #(int j=0;j<c1;j++)
x=x+1;
out_im[x]=out_im[j]+f;
for i in range(a): # update input array as per bit reverse order
out_r[i]=inn[out_im[i]];
out_im[i]=0
# int i10,i11,n1,tr,ti;
#float e;
#int c,s,temp4;
i10=i11=n1=tr=ti=np.int16
e=np.float32
c=s=temp4=np.int16
for i in range(o): # fft
i10=Pow2[i] # overall values of sine/cosine
i11=int(Pow2[o]/Pow2[i+1]); # loop with similar sine cosine
e=1024/Pow2[i+1] #1024 is equivalent to 360 deg
e=0-e;
n1=0;
for j in range(i10): #(int j=0;j<i10;j++)
c=e*j # c is angle as where 1024 unit is 360 deg
while c<0:
c=c+1024
while c>1024:
c=c-1024
n1=j
for k in range(i11):
temp4=i10+n1
if c==0:
tr=out_r[temp4]
ti=out_im[temp4]
elif c==256:
tr= -out_im[temp4]
ti=out_r[temp4]
elif c==512:
tr=-out_r[temp4]
ti=-out_im[temp4]
elif c==768:
tr=out_im[temp4]
ti=-out_r[temp4]
elif c==1024:
tr=out_r[temp4]
ti=out_im[temp4]
else:
tr=fast_cosine(out_r[temp4],c)-fast_sine(out_im[temp4],c); #the fast sine/cosine function gives direct (approx) output for A*sinx
ti=fast_sine(out_r[temp4],c)+fast_cosine(out_im[temp4],c);
#endif
out_r[n1+i10]=out_r[n1]-tr;
out_r[n1]=out_r[n1]+tr;
if out_r[n1]>15000 or out_r[n1]<-15000:
check=1 # check for int size, it can handle only +31000 to -31000,
out_im[n1+i10]=out_im[n1]-ti;
out_im[n1]=out_im[n1]+ti;
if out_im[n1]>15000 or out_im[n1]<-15000:
check=1
n1=n1+i10+i10;
#fork
#forj
if check==1:
#scalling the matrics if value higher than 15000 to prevent varible from overflowing
for m in range(a):
out_r[m]=out_r[m]>>1 #np.uint64(out_r[m])>>np.uint64(1);
out_im[m]=out_im[m]>>1 #np.uint64(out_im[m])>>np.uint64(1);
check=0;
scale=scale-1; # tracking overall scalling of input data
#endif
#fori
if scale>128:
scale=scale-128
for i in range(a):
out_r[i]=out_r[i]>>scale #np.uint64(out_r[i])>>np.ubyte(scale)
out_im[i]=out_im[i]>>scale # np.uint64(out_im[i])>>np.ubyte(scale)
scale=0
else: # revers all scalling we done till here,
scale=128-scale #in case of nnumber getting higher than 32000, we will represent in as multiple of 2^scale
#for i in range(a):
#print(out_r[i],out_im[i],i,"*2^")
half=np.int16(N/2)
return out_r[:half], out_im[:half]
'''
oo_r=out_r
oo_i=out_im
fout=fm=fstp=np.int32;
fstep=np.float64(Frequency/N)
fstp=fstep;
fout=0;fm=0;
for i in range(1,Pow2[o-1]): # getting amplitude from compex number
out_r[i]=fastRSS(out_r[i],out_im[i]);
#Approx RSS function used to calculated magnitude quickly
out_im[i]=out_im[i-1]+fstp;
if fout<out_r[i]:
fm=i
fout=out_r[i]
#un comment to print Amplitudes (1st value (offset) is not printed)
#Serial.print(out_r[i]); Serial.print("\t");
#Serial.print("*2^");Serial.println(scale);
#end for
print(out_r[i],"*2^")
#float fa,fb,fc;
fa=out_r[fm-1];
fb=out_r[fm];
fc=out_r[fm+1];
fstep=(fa*(fm-1)+fb*fm+fc*(fm+1))/(fa+fb+fc);
#return fstep*Frequency/N
print(fstep*Frequency/N)
return oo_r, oo_i #out_r, out_im
'''
'\n oo_r=out_r\n oo_i=out_im\n fout=fm=fstp=np.int32;\n \n fstep=np.float64(Frequency/N)\n fstp=fstep;\n fout=0;fm=0;\n\n for i in range(1,Pow2[o-1]): # getting amplitude from compex number\n out_r[i]=fastRSS(out_r[i],out_im[i]);\n\n #Approx RSS function used to calculated magnitude quickly \n out_im[i]=out_im[i-1]+fstp;\n if fout<out_r[i]:\n fm=i\n fout=out_r[i]\n \n #un comment to print Amplitudes (1st value (offset) is not printed)\n #Serial.print(out_r[i]); Serial.print("\t"); \n #Serial.print("*2^");Serial.println(scale); \n #end for\n print(out_r[i],"*2^")\n\n #float fa,fb,fc;\n fa=out_r[fm-1];\n fb=out_r[fm]; \n fc=out_r[fm+1];\n fstep=(fa*(fm-1)+fb*fm+fc*(fm+1))/(fa+fb+fc);\n\n #return fstep*Frequency/N\n print(fstep*Frequency/N)\n return oo_r, oo_i #out_r, out_im\n \n'
#test=np.ones(16,dtype=np.int32)
test=np.array([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],dtype=np.int32)
test
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
dtype=int32)
print(Approx_FFT(test,16,1000))
(array([ 8, -7, -8, -4, -8, -13, -8, -10], dtype=int16), array([ 0, 50, 23, 18, 8, 3, 7, 2], dtype=int16))
np.fft.fft(test)
array([120. +0.j , -8.+40.21871594j, -8.+19.3137085j ,
-8.+11.9728461j , -8. +8.j , -8. +5.3454291j ,
-8. +3.3137085j , -8. +1.59129894j, -8. +0.j ,
-8. -1.59129894j, -8. -3.3137085j , -8. -5.3454291j ,
-8. -8.j , -8.-11.9728461j , -8.-19.3137085j ,
-8.-40.21871594j])
ts=time.time_ns()
Approx_FFT(test,16,1000)
te=time.time_ns()
print(te-ts)
1243869
ts=time.time_ns()
np.fft.fft(test)
te=time.time_ns()
print(te-ts)
print(np.fft.fft(test).real)
print(np.fft.fft(test).imag)
130885 [120. -8. -8. -8. -8. -8. -8. -8. -8. -8. -8. -8. -8. -8. -8. -8.] [ 0. 40.21871594 19.3137085 11.9728461 8. 5.3454291 3.3137085 1.59129894 0. -1.59129894 -3.3137085 -5.3454291 -8. -11.9728461 -19.3137085 -40.21871594]
data_test=np.int64(56)
oo=int(5)
np.right_shift(data_test,oo)
1
import matplotlib.pyplot as plt
import numpy as np
import copy
import time
from ctypes import *
import ctypes
'''
import ctypes
import numpy
c_float_p = ctypes.POINTER(ctypes.c_float)
data = numpy.array([[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]])
data = data.astype(numpy.float32)
data_p = data.ctypes.data_as(c_float_p)
'''
test=np.arange(16)
#test=np.ones(16,dtype=np.int32);
test=test.astype(np.int32)
test=test.ctypes.data_as(POINTER(ctypes.c_int32))
xx = CDLL('imfft4.so')
xx.imff.argtypes = POINTER(c_int32),c_size_t,POINTER(c_int32)
xx.imff.restype = None
out = (c_int32*32)()
inn = (c_int16*32)()
ts=time.time_ns()
xx.imff(test,16,out)
te=time.time_ns()
print(te-ts)
#print(list(test))
print(list(out))
testnp=np.int32(out)
104206 [8, -8, -8, -9, -8, -8, -8, -9, -8, -9, -8, -8, -8, -9, -8, -8, 0, 40, 19, 11, 8, 5, 3, 1, 0, -2, -4, -6, -8, -12, -20, -41]
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
--------------------------------------------------------------------------- NameError Traceback (most recent call last) /tmp/ipykernel_3251782/2711954765.py in <module> ----> 1 print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU'))) NameError: name 'tf' is not defined
zz=np.zeros(16,dtype=np.int16);
zz=zz.astype(np.int32)
zz=zz.ctypes.data_as(POINTER(c_int32))
print(zz)
<__main__.LP_c_int object at 0x7fffc32b89c0>
ts=time.time_ns()
ts=time.time_ns()
xx.imff(zz,16,out)
te=time.time_ns()
print(te-ts)
47770
testnp
from ctypes import *
def imfft(inn, N):
N2=np.int32(N*2)
inn=inn.astype(np.int32)
inn=inn.ctypes.data_as(POINTER(c_int32))
out = (ctypes.c_int32*N2)()
xx.imff(inn,N,out)
outnp=np.int32(out)
return np.vectorize(complex)(outnp[0:N], outnp[N:]) #, dtype=complex)
ts=time.time_ns()
test=np.arange(16)
te=time.time_ns()
print(te-ts)
tt=imfft(test,16)
print(tt)
54983 [ 8. +0.j -8.+40.j -8.+19.j -9.+11.j -8. +8.j -8. +5.j -8. +3.j -9. +1.j -8. +0.j -9. -2.j -8. -4.j -8. -6.j -8. -8.j -9.-12.j -8.-20.j -8.-41.j]
zz=np.ones(16,dtype=np.int32)
ts=time.time_ns()
r=imfft(zz,16)
te=time.time_ns()
print(te-ts)
print(r)
586168 [0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j]
#fun(ctypes.c_void_p(indata.ctypes.data), ctypes.c_int(5), ctypes.c_int(6),
# ctypes.c_void_p(outdata.ctypes.data))
ts=time.time_ns()
r=np.fft.fft(np.arange(16))
te=time.time_ns()
print(te-ts)
print(r)
1183070 [120. +0.j -8.+40.21871594j -8.+19.3137085j -8.+11.9728461j -8. +8.j -8. +5.3454291j -8. +3.3137085j -8. +1.59129894j -8. +0.j -8. -1.59129894j -8. -3.3137085j -8. -5.3454291j -8. -8.j -8.-11.9728461j -8.-19.3137085j -8.-40.21871594j]
np.fft.fft(test).real
array([120., -8., -8., -8., -8., -8., -8., -8., -8., -8., -8.,
-8., -8., -8., -8., -8.])
np.fft.fft(test).imag
array([ 0. , 40.21871594, 19.3137085 , 11.9728461 ,
8. , 5.3454291 , 3.3137085 , 1.59129894,
0. , -1.59129894, -3.3137085 , -5.3454291 ,
-8. , -11.9728461 , -19.3137085 , -40.21871594])
array([496.+0.00000000e+00j, -16.+1.62450726e+02j, -16.+8.04374319e+01j,
-16.+5.27449313e+01j, -16.+3.86274170e+01j, -16.+2.99338946e+01j,
-16.+2.39456922e+01j, -16.+1.94960564e+01j, -16.+1.60000000e+01j,
-16.+1.31308607e+01j, -16.+1.06908582e+01j, -16.+8.55217818e+00j,
-16.+6.62741700e+00j, -16.+4.85354694e+00j, -16.+3.18259788e+00j,
-16.+1.57586245e+00j, -16.+4.93916428e-14j, -16.-1.57586245e+00j,
-16.-3.18259788e+00j, -16.-4.85354694e+00j, -16.-6.62741700e+00j,
-16.-8.55217818e+00j, -16.-1.06908582e+01j, -16.-1.31308607e+01j,
-16.-1.60000000e+01j, -16.-1.94960564e+01j, -16.-2.39456922e+01j,
-16.-2.99338946e+01j, -16.-3.86274170e+01j, -16.-5.27449313e+01j,
-16.-8.04374319e+01j, -16.-1.62450726e+02j])
n = 128
# definition de a
a = np.zeros(n)
a[1] = 1
# visualisation de a
# on ajoute a droite la valeur de gauche pour la periodicite
plt.subplot(311)
plt.plot( np.append(a, a[0]) )
# calcul de A
A = np.fft.fft(a)
# visualisation de A
# on ajoute a droite la valeur de gauche pour la periodicite
B = np.append(A, A[0])
plt.subplot(312)
plt.plot(np.real(B))
plt.ylabel("partie reelle")
plt.subplot(313)
plt.plot(np.imag(B))
plt.ylabel("partie imaginaire")
plt.show()
n = 128
# definition de a
a = np.zeros(n)
a[1] = 100
# visualisation de a
# on ajoute a droite la valeur de gauche pour la periodicite
plt.subplot(311)
plt.plot( np.append(a, a[0]) )
# calcul de A
#a=imhann(a)
A = imfft(a,128)
# visualisation de A
# on ajoute a droite la valeur de gauche pour la periodicite
B = np.append(A, A[0])
plt.subplot(312)
plt.plot(np.real(B))
plt.ylabel("partie reelle")
plt.subplot(313)
plt.plot(np.imag(B))
plt.ylabel("partie imaginaire")
plt.show()
Visualisation des valeurs complexes avec une échelle colorée ¶
# visualisation de a
# on ajoute a droite la valeur de gauche pour la periodicite
plt.subplot(211)
plt.plot( np.append(a, a[0]) )
# calcul de k
k = np.arange(n)
# visualisation de A - Attention au changement de variable
# on ajoute a droite la valeur de gauche pour la periodicite
plt.subplot(212)
x = np.append(k, k[-1]+k[1]-k[0]) # calcul d'une valeur supplementaire
z = np.append(A, A[0])
X = np.array([x,x])
y0 = np.zeros(len(x))
y = np.abs(z)
Y = np.array([y0,y])
Z = np.array([z,z])
C = np.angle(Z)
plt.plot(x,y,'k')
plt.pcolormesh(X, Y, C, shading="gouraud", cmap=plt.cm.hsv, vmin=-np.pi, vmax=np.pi)
plt.colorbar()
plt.show()
Exploring the FFT ¶
Let’s write some code to find out what an FFT is actually doing.
First we define a simple signal containing an addition of two sine waves. One with a frequency of 40 Hz and one with a frequency of 90 Hz.
t = np.linspace(0, 0.5, 256)
s = np.sin(40 * 2 * np.pi * t) + 0.5 * np.sin(90 * 2 * np.pi * t)*16000
plt.ylabel("Amplitude")
plt.xlabel("Time [s]")
plt.plot(t, s)
plt.show()
Complex¶
In order to retrieve a spectrum of the frequency of the time signal mentioned above we must take a FFT on that sequence.
fft = np.fft.fft(s)
for i in range(2):
print("Value at index {}:\t{}".format(i, fft[i + 1]), "\nValue at index {}:\t{}".format(fft.size -1 - i, fft[-1 - i]))
Value at index 0: (1.9473561461272766-158.67688860630324j) Value at index 255: (1.9473561461295503+158.67688860629698j) Value at index 1: (7.800987376524063-317.7769954775117j) Value at index 254: (7.800987376525427+317.77699547750626j)
fft = imfft(s,len(s))
for i in range(2):
print("Value at index {}:\t{}".format(i, fft[i + 1]), "\nValue at index {}:\t{}".format(fft.size -1 - i, fft[-1 - i]))
Value at index 0: (-28-17j) Value at index 255: (-18-24j) Value at index 1: (-18-18j) Value at index 254: (-10-6j)
Because the second half of the sequence gives us no new information we can already conclude that the half of the FFT sequence is the output we need.¶
The complex output numbers of the FFT contains the following information:
Amplitude of a certain frequency sine wave (energy). Phase offset of a certain frequency sine wave. The amplitude is retrieved by taking the absolute value of the number and the phase offset is obtained by computing the angle of the number.
Spectrum We are interested in the energy of each frequency, so we can determine the absolute value of the FFT’s output. To get a good insight in the spectrum the energy should be plotted against the frequency. Each discrete number output of the FFT corresponds to a certain frequency. The frequency resolution is determined by:
Δf=fs/N
Putting it all together we can plot the frequency spectrum for our simple sine wave function. We plot only half of the spectrum, because that is the only half giving us real information.
fft = np.fft.fft(imhann(s))
T = t[1] - t[0] # sampling interval
N = s.size
# 1/T = frequency
f = np.linspace(0, 1 / T, N)
plt.ylabel("Amplitude")
plt.xlabel("Frequency [Hz]")
plt.bar(f[:N // 2], np.abs(fft)[:N // 2] * 1 / N, width=1.5) # 1 / N is a normalization factor
plt.show()
fft = imfft(imhann(s),len(s))
T = t[1] - t[0] # sampling interval
N = s.size
# 1/T = frequency
f = np.linspace(0, 1 / T, N)
plt.ylabel("Amplitude")
plt.xlabel("Frequency [Hz]")
plt.bar(f[:N // 2], np.abs(fft)[:N // 2] * 1 / N, width=1.5) # 1 / N is a normalization factor
plt.show()
def imhann(signal):
nb=len(signal)
samplesMinusOne=nb-1
for i in range(nb):
ratio = (i / samplesMinusOne)
weighingFactor = 0.54 - (1- np.cos(2*np.pi * ratio))
signal[i]=signal[i]*weighingFactor
return signal
s=imhann(s)
fft = imfft(s,len(s))
T = t[1] - t[0] # sampling interval
N = s.size
# 1/T = frequency
f = np.linspace(0, 1 / T, N)
plt.ylabel("Amplitude")
plt.xlabel("Frequency [Hz]")
plt.bar(f[:N // 2], np.abs(fft)[:N // 2] * 1 / N, width=1.5) # 1 / N is a normalization factor
plt.show()
As we can see the FFT works! It has given us information about the frequencies of the waves in the time signal.
A FFT is a trade-off between time information and frequency information. By taking a FFT of a time signal, all time information is lost in return for frequency information. To keep information about time and frequencies in one spectrum, we must make a spectrogram. These are DFT’s taken on discrete time windows.
Alright By taking a FFT result of the time signal of Kendrick Lamar’s song, we get the spectrum shown below. The frequency scale is plotted on log scale. As we assumed before the natural frequency of my windows are about 100 Hz. In the figure we can see that the most dominant frequencies occur between 101.5-102.2 Hz (30-158 Hz). My windows natural frequency is right in the middle of the dominant frequencies of the song and thus may resonate due to the high volume.
Now it is too premature to say it wouldn’t be safe to listen to this song on full volume. However if I really want to be sure about my windows I maybe should examine the frequency of another song.
def spectrogram(samples, sample_rate, stride_ms = 10.0,
window_ms = 20.0, max_freq = None, eps = 1e-14):
stride_size = int(0.001 * sample_rate * stride_ms)
window_size = int(0.001 * sample_rate * window_ms)
# Extract strided windows
truncate_size = (len(samples) - window_size) % stride_size
samples = samples[:len(samples) - truncate_size]
nshape = (window_size, (len(samples) - window_size) // stride_size + 1)
nstrides = (samples.strides[0], samples.strides[0] * stride_size)
windows = np.lib.stride_tricks.as_strided(samples,
shape = nshape, strides = nstrides)
assert np.all(windows[:, 1] == samples[stride_size:(stride_size + window_size)])
# Window weighting, squared Fast Fourier Transform (fft), scaling
weighting = np.hanning(window_size)[:, None]
fft = np.fft.rfft(windows * weighting, axis=0)
fft = np.absolute(fft)
fft = fft**2
scale = np.sum(weighting**2) * sample_rate
fft[1:-1, :] *= (2.0 / scale)
fft[(0, -1), :] /= scale
# Prepare fft frequency list
freqs = float(sample_rate) / window_size * np.arange(fft.shape[0])
# Compute spectrogram feature
ind = np.where(freqs <= max_freq)[0][-1] + 1
specgram = np.log(fft[:ind, :] + eps)
return specgram
Arduino Code ¶
//---------------------------------lookup data------------------------------------//
byte isin_data[128]=
{0, 1, 3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20,
22, 23, 24, 26, 27, 28, 29, 31, 32, 33, 35, 36, 37, 39, 40, 41, 42,
44, 45, 46, 48, 49, 50, 52, 53, 54, 56, 57, 59, 60, 61, 63, 64, 65,
67, 68, 70, 71, 72, 74, 75, 77, 78, 80, 81, 82, 84, 85, 87, 88, 90,
91, 93, 94, 96, 97, 99, 100, 102, 104, 105, 107, 108, 110, 112, 113, 115, 117,
118, 120, 122, 124, 125, 127, 129, 131, 133, 134, 136, 138, 140, 142, 144, 146, 148,
150, 152, 155, 157, 159, 161, 164, 166, 169, 171, 174, 176, 179, 182, 185, 188, 191,
195, 198, 202, 206, 210, 215, 221, 227, 236};
unsigned int Pow2[14]={1,2,4,8,16,32,64,128,256,512,1024,2048,4096};
byte RSSdata[20]={7,6,6,5,5,5,4,4,4,4,3,3,3,3,3,3,3,2,2,2};
//---------------------------------------------------------------------------------//
//int data[256]={};
void setup()
{
Serial.begin(115200);
Serial.println("OK");
}
void loop() {
Serial.println("!");
int dd[]={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
Serial.println("before");
float f=Approx_FFT(dd,16,100);
Serial.println("after");
Serial.println(f);
delay(100000);
}
//-----------------------------FFT Function----------------------------------------------//
/*
Code to perform High speed and Accurate FFT on arduino,
setup:
1. in[] : Data array,
2. N : Number of sample (recommended sample size 2,4,8,16,32,64,128,256,512...)
3. Frequency: sampling frequency required as input (Hz)
It will by default return frequency with max aplitude,
if you need complex output or magnitudes uncomment required sections
If sample size is not in power of 2 it will be clipped to lower side of number.
i.e, for 150 number of samples, code will consider first 128 sample, remaining sample will be omitted.
For Arduino nano, FFT of more than 256 sample not possible due to mamory limitation
Code by ABHILASH
Contact: abhilashpatel121@gmail.com
Documentation & details: https://www.instructables.com/member/abhilash_patel/instructables/
*/
float Approx_FFT(int in[],int N,float Frequency)
{
int a,c1,f,o,x,data_max,data_min=0;
long data_avg,data_mag,temp11;
byte scale,check=0;
data_max=0;
data_avg=0;
data_min=0;
for(int i=0;i<12;i++) //calculating the levels
{
if(Pow2[i]<=N)
{
o=i;
}
}
a=Pow2[o];
int out_r[a]; //real part of transform
int out_im[a]; //imaginory part of transform
for(int i=0;i<a;i++) //getting min max and average for scalling
{
out_r[i]=0; out_im[i]=0;
data_avg=data_avg+in[i];
if(in[i]>data_max)
{
data_max=in[i];
}
if(in[i]<data_min)
{
data_min=in[i];
}
}
data_avg=data_avg>>o;
scale=0;
data_mag=data_max-data_min;
temp11=data_mag;
//scalling data from +512 to -512
if(data_mag>1024)
{
while(temp11>1024)
{
temp11=temp11>>1;
scale=scale+1;
}
}
if(data_mag<1024)
{
while(temp11<1024)
{
temp11=temp11<<1;
scale=scale+1;
}
}
if(data_mag>1024)
{
for(int i=0;i<a;i++)
{
in[i]=in[i]-data_avg;
in[i]=in[i]>>scale;
}
scale=128-scale;
}
if(data_mag<1024)
{
scale=scale-1;
for(int i=0;i<a;i++)
{
in[i]=in[i]-data_avg;
in[i]=in[i]<<scale;
}
scale=128+scale;
}
x=0;
for(int b=0;b<o;b++) // bit reversal order stored in im_out array
{
c1=Pow2[b];
f=Pow2[o]/(c1+c1);
for(int j=0;j<c1;j++)
{
x=x+1;
out_im[x]=out_im[j]+f;
}
}
for(int i=0;i<a;i++) // update input array as per bit reverse order
{
out_r[i]=in[out_im[i]];
out_im[i]=0;
}
int i10,i11,n1,tr,ti;
float e;
int c,s,temp4;
for(int i=0;i<o;i++) //fft
{
i10=Pow2[i]; // overall values of sine/cosine
i11=Pow2[o]/Pow2[i+1]; // loop with similar sine cosine
e=1024/Pow2[i+1]; //1024 is equivalent to 360 deg
e=0-e;
n1=0;
for(int j=0;j<i10;j++)
{
c=e*j; //c is angle as where 1024 unit is 360 deg
while(c<0){c=c+1024;}
while(c>1024){c=c-1024;}
n1=j;
for(int k=0;k<i11;k++)
{
temp4=i10+n1;
if(c==0) {tr=out_r[temp4];
ti=out_im[temp4];}
else if(c==256) {tr= -out_im[temp4];
ti=out_r[temp4];}
else if(c==512) {tr=-out_r[temp4];
ti=-out_im[temp4];}
else if(c==768) {tr=out_im[temp4];
ti=-out_r[temp4];}
else if(c==1024){tr=out_r[temp4];
ti=out_im[temp4];}
else{
tr=fast_cosine(out_r[temp4],c)-fast_sine(out_im[temp4],c); //the fast sine/cosine function gives direct (approx) output for A*sinx
ti=fast_sine(out_r[temp4],c)+fast_cosine(out_im[temp4],c);
}
out_r[n1+i10]=out_r[n1]-tr;
out_r[n1]=out_r[n1]+tr;
if(out_r[n1]>15000 || out_r[n1]<-15000){check=1;} //check for int size, it can handle only +31000 to -31000,
out_im[n1+i10]=out_im[n1]-ti;
out_im[n1]=out_im[n1]+ti;
if(out_im[n1]>15000 || out_im[n1]<-15000){check=1;}
n1=n1+i10+i10;
} // for int k=
}// for int j=
if(check==1)
{ // scalling the matrics if value higher than 15000 to prevent varible from overflowing
for(int i=0;i<a;i++)
{
out_r[i]=out_r[i]>>1;
out_im[i]=out_im[i]>>1;
}
check=0;
scale=scale-1; // tracking overall scalling of input data
}
} //for int i=
if(scale>128)
{
scale=scale-128;
for(int i=0;i<a;i++)
{
out_r[i]=out_r[i]>>scale;
out_im[i]=out_im[i]>>scale;
}
scale=0;
} // revers all scalling we done till here,
else
{
scale=128-scale;
} // in case of nnumber getting higher than 32000, we will represent in as multiple of 2^scale
for(int i=0;i<a;i++)
{
Serial.print(out_r[i]);Serial.print("\t"); // un comment to print RAW o/p
Serial.print(out_im[i]);
Serial.print("i");Serial.print("\t");
Serial.print("*2^");Serial.println(scale);
}
//---> here onward out_r contains amplitude and our_in conntains frequency (Hz)
int fout,fm,fstp;
float fstep;
fstep=Frequency/N;
fstp=fstep;
fout=0;fm=0;
for(int i=1;i<Pow2[o-1];i++) // getting amplitude from compex number
{
out_r[i]=fastRSS(out_r[i],out_im[i]);
// Approx RSS function used to calculated magnitude quickly
out_im[i]=out_im[i-1]+fstp;
if (fout<out_r[i])
{
fm=i;
fout=out_r[i];
}
// un comment to print Amplitudes (1st value (offset) is not printed)
Serial.print(out_r[i]); Serial.print("\t");
Serial.print("*2^");Serial.println(scale);
}
float fa,fb,fc;
fa=out_r[fm-1];
fb=out_r[fm];
fc=out_r[fm+1];
fstep=(fa*(fm-1)+fb*fm+fc*(fm+1))/(fa+fb+fc);
return(fstep*Frequency/N);
}
//---------------------------------fast sine/cosine---------------------------------------//
int fast_sine(int Amp, int th)
{
int temp3,m1,m2;
byte temp1,temp2, test,quad,accuracy;
accuracy=5; // set it value from 1 to 7, where 7 being most accurate but slowest
// accuracy value of 5 recommended for typical applicaiton
while(th>1024)
{
th=th-1024;
} // here 1024 = 2*pi or 360 deg
while(th<0)
{
th=th+1024;
}
quad=th>>8;
if(quad==1){th= 512-th;}
else if(quad==2){th= th-512;}
else if(quad==3){th= 1024-th;}
temp1= 0;
temp2= 128; //2 multiple
m1=0;
m2=Amp;
temp3=(m1+m2)>>1;
Amp=temp3;
for(int i=0;i<accuracy;i++)
{
test=(temp1+temp2)>>1;
temp3=temp3>>1;
if(th>isin_data[test])
{
temp1=test;
Amp=Amp+temp3;
m1=Amp;
}
else
if(th<isin_data[test])
{
temp2=test;
Amp=Amp-temp3;
m2=Amp;
}
}
if(quad==2)
{
Amp= 0-Amp;
}
else
if(quad==3)
{
Amp= 0-Amp;
}
return Amp;
}
int fast_cosine(int Amp, int th)
{
th=256-th; //cos th = sin (90-th) formula
return(fast_sine(Amp,th));
}
//--------------------------------------------------------------------------------//
//--------------------------------Fast RSS----------------------------------------//
int fastRSS(int a, int b)
{
if(a==0 && b==0)
{
return(0);
}
int min,max,temp1,temp2;
byte clevel;
if(a<0)
{
a=-a;
}
if(b<0)
{
b=-b;
}
clevel=0;
if(a>b)
{
max=a;
min=b;
}
else
{
max=b;
min=a;
}
if(max>(min+min+min))
{
return max;
}
else
{
temp1=min>>3; if(temp1==0){temp1=1;}
temp2=min;
while(temp2<max)
{
temp2=temp2+temp1;
clevel=clevel+1;
}
temp2=RSSdata[clevel];
temp1=temp1>>1;
for(int i=0;i<temp2;i++)
{
max=max+temp1;
}
return max ;
}
}
File "/tmp/ipykernel_2114829/2462532610.py", line 1 //---------------------------------lookup data------------------------------------// ^ SyntaxError: invalid syntax
Python version ¶
#!pip install librosa
#!pip install seaborn
#!pip uninstall tensorflow -y
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display
# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)
print(tf.__version__)
2.8.0
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
Num GPUs Available: 0
2022-03-29 20:12:17.264737: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64: 2022-03-29 20:12:17.264770: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
DATASET_PATH = 'data/mini_speech_commands'
data_dir = pathlib.Path(DATASET_PATH)
if not data_dir.exists():
tf.keras.utils.get_file(
'mini_speech_commands.zip',
origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
extract=True,
cache_dir='.', cache_subdir='data')
DATASET_PATH = 'data/imds'
data_dir = pathlib.Path(DATASET_PATH)
commands = np.array(tf.io.gfile.listdir(str(data_dir)))
commands = commands[commands != 'README.md']
print('Commands:', commands)
Commands: ['right' 'go' 'no' 'left' 'stop' 'up' 'down' 'yes']
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
filenames = tf.random.shuffle(filenames)
num_samples = len(filenames)
print('Number of total examples:', num_samples)
print('Number of examples per label:',
len(tf.io.gfile.listdir(str(data_dir/commands[0]))))
print('Example file tensor:', filenames[0])
Number of total examples: 8000 Number of examples per label: 1000 Example file tensor: tf.Tensor(b'data/mini_speech_commands/up/918a2473_nohash_1.wav', shape=(), dtype=string)
train_files = filenames[:6400]
val_files = filenames[6400: 6400 + 800]
test_files = filenames[-800:]
print('Training set size', len(train_files))
print('Validation set size', len(val_files))
print('Test set size', len(test_files))
Training set size 6400 Validation set size 800 Test set size 800
print(filenames[5])
tf.Tensor(b'data/imds/avance/sd1.raw', shape=(), dtype=string)
Lire les fichiers audio et leurs étiquettes¶
Dans cette section, vous allez prétraiter l'ensemble de données, en créant des tenseurs décodés pour les formes d'onde et les étiquettes correspondantes. Noter que:
Chaque fichier WAV contient des données de séries chronologiques avec un nombre défini d'échantillons par seconde. Chaque échantillon représente l' amplitude du signal audio à ce moment précis. Dans un système 16 bits , comme les fichiers WAV du jeu de données Mini Speech Commands, les valeurs d'amplitude vont de -32 768 à 32 767. Le taux d'échantillonnage pour cet ensemble de données est de 16 kHz. La forme du tenseur renvoyé par tf.audio.decode_wav est [samples, channels] , où channels est 1 pour mono ou 2 pour stéréo. Le jeu de données mini Speech Commands ne contient que des enregistrements mono.
test_file = tf.io.read_file(DATASET_PATH+'/down/0a9f9af7_nohash_0.wav')
test_audio, _ = tf.audio.decode_wav(contents=test_file)
test_audio.shape
TensorShape([13654, 1])
Définissons maintenant une fonction qui prétraite les fichiers audio WAV bruts de l'ensemble de données en tenseurs audio :¶
def decode_audio(audio_binary):
# Decode WAV-encoded audio files to `float32` tensors, normalized
# to the [-1.0, 1.0] range. Return `float32` audio and a sample rate.
audio, _ = tf.audio.decode_wav(contents=audio_binary)
# Since all the data is single channel (mono), drop the `channels`
# axis from the array.
return tf.squeeze(audio, axis=-1)
def decode_audio(audio_binary):
# Decode WAV-encoded audio files to `float32` tensors, normalized
# to the [-1.0, 1.0] range. Return `float32` audio and a sample rate.
audio=audio_binary
# Since all the data is single channel (mono), drop the `channels`
# axis from the array.
#return tf.squeeze(audio, axis=-1)
return audio
Définissez une fonction qui crée des étiquettes à l'aide des répertoires parents pour chaque fichier :¶
Divisez les chemins de fichiers en tf.RaggedTensor s (tenseurs aux dimensions irrégulières, avec des tranches pouvant avoir des longueurs différentes).
def get_label(file_path):
parts = tf.strings.split(
input=file_path,
sep=os.path.sep)
# Note: You'll use indexing here instead of tuple unpacking to enable this
# to work in a TensorFlow graph.
return parts[-2]
Définissez une autre fonction d'assistance — get_waveform_and_label — qui rassemble le tout :¶
L'entrée est le nom du fichier audio WAV. La sortie est un tuple contenant l'audio et les tenseurs d'étiquettes prêts pour l'apprentissage supervisé.
def get_waveform_and_label(file_path):
label = get_label(file_path)
audio_binary = tf.io.read_file(file_path)
waveform = decode_audio(audio_binary)
return waveform, label
Créez l'ensemble d'entraînement pour extraire les paires d'étiquettes audio :¶
Créez un tf.data.Dataset avec Dataset.from_tensor_slices et Dataset.map , en utilisant get_waveform_and_label défini précédemment. Vous créerez les ensembles de validation et de test à l'aide d'une procédure similaire ultérieurement.
AUTOTUNE = tf.data.AUTOTUNE
files_ds = tf.data.Dataset.from_tensor_slices(train_files)
waveform_ds = files_ds.map(
map_func=get_waveform_and_label,
num_parallel_calls=AUTOTUNE)
rows = 3
cols = 3
n = rows * cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 12))
for i, (audio, label) in enumerate(waveform_ds.take(n)):
r = i // cols
c = i % cols
ax = axes[r][c]
ax.plot(audio.numpy())
ax.set_yticks(np.arange(-1.2, 1.2, 0.2))
label = label.numpy().decode('utf-8')
ax.set_title(label)
plt.show()
Traçons quelques formes d'onde audio :¶
def get_spectrogram(waveform):
# Zero-padding for an audio waveform with less than 16,000 samples.
input_len = 16000
waveform = waveform[:input_len]
zero_padding = tf.zeros(
[16000] - tf.shape(waveform),
dtype=tf.float32)
# Cast the waveform tensors' dtype to float32.
waveform = tf.cast(waveform, dtype=tf.float32)
# Concatenate the waveform with `zero_padding`, which ensures all audio
# clips are of the same length.
equal_length = tf.concat([waveform, zero_padding], 0)
# Convert the waveform to a spectrogram via a STFT.
spectrogram = tf.signal.stft(
equal_length, frame_length=255, frame_step=128)
# Obtain the magnitude of the STFT.
spectrogram = tf.abs(spectrogram)
# Add a `channels` dimension, so that the spectrogram can be used
# as image-like input data with convolution layers (which expect
# shape (`batch_size`, `height`, `width`, `channels`).
spectrogram = spectrogram[..., tf.newaxis]
return spectrogram
for waveform, label in waveform_ds.take(1):
label = label.numpy().decode('utf-8')
spectrogram = get_spectrogram(waveform)
print('Label:', label)
print('Waveform shape:', waveform.shape)
print('Spectrogram shape:', spectrogram.shape)
print('Audio playback')
display.display(display.Audio(waveform, rate=16000))
Label: up Waveform shape: (15153,) Spectrogram shape: (124, 129, 1) Audio playback
def plot_spectrogram(spectrogram, ax):
if len(spectrogram.shape) > 2:
assert len(spectrogram.shape) == 3
spectrogram = np.squeeze(spectrogram, axis=-1)
# Convert the frequencies to log scale and transpose, so that the time is
# represented on the x-axis (columns).
# Add an epsilon to avoid taking a log of zero.
log_spec = np.log(spectrogram.T + np.finfo(float).eps)
height = log_spec.shape[0]
width = log_spec.shape[1]
X = np.linspace(0, np.size(spectrogram), num=width, dtype=int)
Y = range(height)
ax.pcolormesh(X, Y, log_spec)
fig, axes = plt.subplots(2, figsize=(12, 8))
timescale = np.arange(waveform.shape[0])
axes[0].plot(timescale, waveform.numpy())
axes[0].set_title('Waveform')
axes[0].set_xlim([0, 16000])
plot_spectrogram(spectrogram.numpy(), axes[1])
axes[1].set_title('Spectrogram')
plt.show()
def get_spectrogram_and_label_id(audio, label):
spectrogram = get_spectrogram(audio)
label_id = tf.argmax(label == commands)
return spectrogram, label_id
spectrogram_ds = waveform_ds.map(
map_func=get_spectrogram_and_label_id,
num_parallel_calls=AUTOTUNE)
rows = 3
cols = 3
n = rows*cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 10))
for i, (spectrogram, label_id) in enumerate(spectrogram_ds.take(n)):
r = i // cols
c = i % cols
ax = axes[r][c]
plot_spectrogram(spectrogram.numpy(), ax)
ax.set_title(commands[label_id.numpy()])
ax.axis('off')
plt.show()
#print(spectrogram.numpy()[:10])
print(spectrogram.shape)
(124, 129, 1)
Arduino Spectrom FFT ¶
def im_stft(signal, frame_length=128, frame_step=64):
#cast signal to np.int31
signal=np.int32(signal)
nbsamples=np.size(signal)
nbsteps=np.int16((nbsamples-frame_length)/frame_step)
spec_out=np.ndarray(shape=(nbsteps,np.int16(frame_length/2)), dtype=np.complex64)
print(nbsamples, nbsteps,frame_length,frame_step)
for i in np.arange(nbsteps):
#com=np.fft.fft(signal[i*frame_step:i*frame_step+frame_length] ) #Approx_FFT(signal[i*frame_step:i*frame_step+frame_length],frame_length,1)
#cc=out_r+1j*out_im
spec_out[i]=imfft(signal[i*frame_step:i*frame_step+frame_length],frame_length)[:np.int16(frame_length/2)]
ret=tf.convert_to_tensor (spec_out)
return ret
def get_im_spectrogram(waveform):
# Zero-padding for an audio waveform with less than 16,000 samples.
input_len = 4000
waveform = waveform[:input_len]
zero_padding = tf.zeros(
[4000] - tf.shape(waveform),
dtype=tf.float32)
# Cast the waveform tensors' dtype to float32.
waveform = tf.cast(waveform, dtype=tf.float32)
# Concatenate the waveform with `zero_padding`, which ensures all audio
# clips are of the same length.
equal_length = tf.concat([waveform, zero_padding], 0)
# Convert the waveform to a spectrogram via a STFT.
# A [..., frames, fft_unique_bins] Tensor of complex64/complex128 STFT values where fft_unique_bins is fft_length // 2 + 1 (the unique components of the FFT).
spectrogram = im_stft(equal_length, frame_length=128, frame_step=64)
# Obtain the magnitude of the STFT.
spectrogram = tf.abs(spectrogram)
# Add a `channels` dimension, so that the spectrogram can be used
# as image-like input data with convolution layers (which expect
# shape (`batch_size`, `height`, `width`, `channels`).
spectrogram = spectrogram[..., tf.newaxis]
return spectrogram
type(waveform)
numpy.ndarray
waveform=np.fromfile('data/imds/avance/sd8.raw', dtype='uint8')
ww=waveform
print(ww.shape)
(4000,)
for waveform, label in waveform_ds.take(1):
label = label.numpy().decode('utf-8')
spectrogram = get_im_spectrogram(waveform)
print('Label:', label)
print('Waveform shape:', waveform.shape)
print('Spectrogram shape:', spectrogram.shape)
print('Audio playback')
display.display(display.Audio(waveform, rate=4000))
2022-03-28 16:45:16.051429: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at strided_slice_op.cc:108 : INVALID_ARGUMENT: Index out of range using input dim 0; input has only 0 dims
--------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call last) /tmp/ipykernel_3251782/2707579568.py in <module> 1 for waveform, label in waveform_ds.take(1): 2 label = label.numpy().decode('utf-8') ----> 3 spectrogram = get_im_spectrogram(waveform) 4 5 print('Label:', label) /tmp/ipykernel_3251782/2567817311.py in get_im_spectrogram(waveform) 2 # Zero-padding for an audio waveform with less than 16,000 samples. 3 input_len = 4000 ----> 4 waveform = waveform[:input_len] 5 zero_padding = tf.zeros( 6 [4000] - tf.shape(waveform), ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/util/traceback_utils.py in error_handler(*args, **kwargs) 151 except Exception as e: 152 filtered_tb = _process_traceback_frames(e.__traceback__) --> 153 raise e.with_traceback(filtered_tb) from None 154 finally: 155 del filtered_tb ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name) 7184 def raise_from_not_ok_status(e, name): 7185 e.message += (" name: " + name if name is not None else "") -> 7186 raise core._status_to_exception(e) from None # pylint: disable=protected-access 7187 7188 InvalidArgumentError: Index out of range using input dim 0; input has only 0 dims [Op:StridedSlice] name: strided_slice/
print(waveform.shape)
(4000,)
print(ww[12000:12100])
[]
def plot_spectrogram(spectrogram, ax):
if len(spectrogram.shape) > 2:
assert len(spectrogram.shape) == 3
spectrogram = np.squeeze(spectrogram, axis=-1)
# Convert the frequencies to log scale and transpose, so that the time is
# represented on the x-axis (columns).
# Add an epsilon to avoid taking a log of zero.
log_spec = np.log(spectrogram.T + np.finfo(float).eps)
height = log_spec.shape[0]
width = log_spec.shape[1]
X = np.linspace(0, np.size(spectrogram), num=width, dtype=int)
Y = range(height)
ax.pcolormesh(X, Y, log_spec)
fig, axes = plt.subplots(2, figsize=(12, 8))
timescale = np.arange(waveform.shape[0])
axes[0].plot(timescale, waveform)
axes[0].set_title('Waveform')
axes[0].set_xlim([0, 4000])
plot_spectrogram(spectrogram.numpy(), axes[1])
axes[1].set_title('Spectrogram')
plt.show()
#print(spectrogram.numpy()[:10])
#print(spectrogram.shape)
Maintenant, définissez une fonction qui transforme l'ensemble de données de forme d'onde en spectrogrammes et leurs étiquettes correspondantes en identifiants entiers :
def get_spectrogram_and_label_id(audio, label):
type(audio)
ww=audio*16000
spectrogram = get_spectrogram(ww)
label_id = tf.argmax(label == commands)
return spectrogram, label_id
def get_spectrogram_and_label_id_im(audio, label):
type(audio)
ww=audio*16000
spectrogram = get_im_spectrogram(tf.compat.v1.Session().run(ww))
label_id = tf.argmax(label == commands)
return spectrogram, label_id
get_spectrogram_and_label_id sur les éléments de l'ensemble de données avec Dataset.map :
spectrogram_ds = waveform_ds.map(
map_func=get_spectrogram_and_label_id,
num_parallel_calls=AUTOTUNE)
type(spectrogram_ds)
from tensorflow.python.ops.math_ops import reduce_prod
spectrogram_ds_im = waveform_ds.map(
map_func=get_spectrogram_and_label_id_im,
num_parallel_calls=AUTOTUNE)
--------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call last) /tmp/ipykernel_3081950/4286887239.py in <module> ----> 1 spectrogram_ds_im = waveform_ds.map( 2 map_func=get_spectrogram_and_label_id_im, 3 num_parallel_calls=AUTOTUNE) ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in map(self, map_func, num_parallel_calls, deterministic, name) 2016 return MapDataset(self, map_func, preserve_cardinality=True, name=name) 2017 else: -> 2018 return ParallelMapDataset( 2019 self, 2020 map_func, ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, map_func, num_parallel_calls, deterministic, use_inter_op_parallelism, preserve_cardinality, use_legacy_function, name) 5232 self._input_dataset = input_dataset 5233 self._use_inter_op_parallelism = use_inter_op_parallelism -> 5234 self._map_func = structured_function.StructuredFunctionWrapper( 5235 map_func, 5236 self._transformation_name(), ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py in __init__(self, func, transformation_name, dataset, input_classes, input_shapes, input_types, input_structure, add_to_graph, use_legacy_function, defun_kwargs) 269 fn_factory = trace_tf_function(defun_kwargs) 270 --> 271 self._function = fn_factory() 272 # There is no graph to add in eager mode. 273 add_to_graph &= not context.executing_eagerly() ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py in get_concrete_function(self, *args, **kwargs) 3068 or `tf.Tensor` or `tf.TensorSpec`. 3069 """ -> 3070 graph_function = self._get_concrete_function_garbage_collected( 3071 *args, **kwargs) 3072 graph_function._garbage_collector.release() # pylint: disable=protected-access ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_garbage_collected(self, *args, **kwargs) 3034 args, kwargs = None, None 3035 with self._lock: -> 3036 graph_function, _ = self._maybe_define_function(args, kwargs) 3037 seen_names = set() 3038 captured = object_identity.ObjectIdentitySet( ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs) 3290 3291 self._function_cache.add_call_context(cache_key.call_context) -> 3292 graph_function = self._create_graph_function(args, kwargs) 3293 self._function_cache.add(cache_key, cache_key_deletion_observer, 3294 graph_function) ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 3128 arg_names = base_arg_names + missing_arg_names 3129 graph_function = ConcreteFunction( -> 3130 func_graph_module.func_graph_from_py_func( 3131 self._name, 3132 self._python_function, ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses) 1159 _, original_func = tf_decorator.unwrap(python_func) 1160 -> 1161 func_outputs = python_func(*func_args, **func_kwargs) 1162 1163 # invariant: `func_outputs` contains only Tensors, CompositeTensors, ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py in wrapped_fn(*args) 246 attributes=defun_kwargs) 247 def wrapped_fn(*args): # pylint: disable=missing-docstring --> 248 ret = wrapper_helper(*args) 249 ret = structure.to_tensor_list(self._output_structure, ret) 250 return [ops.convert_to_tensor(t) for t in ret] ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py in wrapper_helper(*args) 175 if not _should_unpack(nested_args): 176 nested_args = (nested_args,) --> 177 ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args) 178 if _should_pack(ret): 179 ret = tuple(ret) ~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs) 690 except Exception as e: # pylint:disable=broad-except 691 if hasattr(e, 'ag_error_metadata'): --> 692 raise e.ag_error_metadata.to_exception(e) 693 else: 694 raise InvalidArgumentError: in user code: File "/tmp/ipykernel_3081950/832668025.py", line 4, in get_spectrogram_and_label_id_im * spectrogram = get_im_spectrogram(tf.compat.v1.Session().run(ww)) InvalidArgumentError: Graph execution error: Detected at node 'args_0' defined at (most recent call last): File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 194, in _run_module_as_main return _run_code(code, main_globals, None, File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 87, in _run_code exec(code, run_globals) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module> app.launch_new_instance() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance app.start() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start self.io_loop.start() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start self.asyncio_loop.run_forever() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 570, in run_forever self._run_once() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once handle._run() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/events.py", line 81, in _run self._context.run(self._callback, *self._args) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue await self.process_one() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 446, in process_one await dispatch(*args) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell await result File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 648, in execute_request reply_content = await reply_content File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 353, in do_execute res = shell.run_cell(code, store_history=store_history, silent=silent) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 533, in run_cell return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2901, in run_cell result = self._run_cell( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2947, in _run_cell return runner(coro) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner coro.send(None) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3172, in run_cell_async has_raised = await self.run_ast_nodes(code_ast.body, cell_name, File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3364, in run_ast_nodes if (await self.run_code(code, result, async_=asy)): File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "/tmp/ipykernel_3081950/4286887239.py", line 1, in <module> spectrogram_ds_im = waveform_ds.map( Node: 'args_0' You must feed a value for placeholder tensor 'args_0' with dtype float and shape [?] [[{{node args_0}}]] Original stack trace for 'args_0': File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 194, in _run_module_as_main return _run_code(code, main_globals, None, File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 87, in _run_code exec(code, run_globals) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module> app.launch_new_instance() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance app.start() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start self.io_loop.start() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start self.asyncio_loop.run_forever() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 570, in run_forever self._run_once() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once handle._run() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/events.py", line 81, in _run self._context.run(self._callback, *self._args) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue await self.process_one() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 446, in process_one await dispatch(*args) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell await result File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 648, in execute_request reply_content = await reply_content File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 353, in do_execute res = shell.run_cell(code, store_history=store_history, silent=silent) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 533, in run_cell return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2901, in run_cell result = self._run_cell( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2947, in _run_cell return runner(coro) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner coro.send(None) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3172, in run_cell_async has_raised = await self.run_ast_nodes(code_ast.body, cell_name, File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3364, in run_ast_nodes if (await self.run_code(code, result, async_=asy)): File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "/tmp/ipykernel_3081950/4286887239.py", line 1, in <module> spectrogram_ds_im = waveform_ds.map( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2018, in map return ParallelMapDataset( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 5234, in __init__ self._map_func = structured_function.StructuredFunctionWrapper( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py", line 271, in __init__ self._function = fn_factory() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3070, in get_concrete_function graph_function = self._get_concrete_function_garbage_collected( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3036, in _get_concrete_function_garbage_collected graph_function, _ = self._maybe_define_function(args, kwargs) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3292, in _maybe_define_function graph_function = self._create_graph_function(args, kwargs) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3130, in _create_graph_function func_graph_module.func_graph_from_py_func( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1075, in func_graph_from_py_func func_args = _get_defun_inputs_from_args( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1313, in _get_defun_inputs_from_args return _get_defun_inputs( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1386, in _get_defun_inputs placeholder = graph_placeholder( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/graph_only_ops.py", line 34, in graph_placeholder op = g._create_op_internal( # pylint: disable=protected-access File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 693, in _create_op_internal return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 3776, in _create_op_internal ret = Operation( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 2175, in __init__ self._traceback = tf_stack.extract_stack_for_node(self._c_op) Original stack trace for 'args_0': File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 194, in _run_module_as_main return _run_code(code, main_globals, None, File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 87, in _run_code exec(code, run_globals) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module> app.launch_new_instance() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance app.start() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start self.io_loop.start() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start self.asyncio_loop.run_forever() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 570, in run_forever self._run_once() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once handle._run() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/events.py", line 81, in _run self._context.run(self._callback, *self._args) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue await self.process_one() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 446, in process_one await dispatch(*args) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell await result File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 648, in execute_request reply_content = await reply_content File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 353, in do_execute res = shell.run_cell(code, store_history=store_history, silent=silent) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 533, in run_cell return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2901, in run_cell result = self._run_cell( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2947, in _run_cell return runner(coro) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner coro.send(None) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3172, in run_cell_async has_raised = await self.run_ast_nodes(code_ast.body, cell_name, File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3364, in run_ast_nodes if (await self.run_code(code, result, async_=asy)): File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "/tmp/ipykernel_3081950/4286887239.py", line 1, in <module> spectrogram_ds_im = waveform_ds.map( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2018, in map return ParallelMapDataset( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 5234, in __init__ self._map_func = structured_function.StructuredFunctionWrapper( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py", line 271, in __init__ self._function = fn_factory() File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3070, in get_concrete_function graph_function = self._get_concrete_function_garbage_collected( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3036, in _get_concrete_function_garbage_collected graph_function, _ = self._maybe_define_function(args, kwargs) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3292, in _maybe_define_function graph_function = self._create_graph_function(args, kwargs) File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3130, in _create_graph_function func_graph_module.func_graph_from_py_func( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1075, in func_graph_from_py_func func_args = _get_defun_inputs_from_args( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1313, in _get_defun_inputs_from_args return _get_defun_inputs( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1386, in _get_defun_inputs placeholder = graph_placeholder( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/graph_only_ops.py", line 34, in graph_placeholder op = g._create_op_internal( # pylint: disable=protected-access File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 693, in _create_op_internal return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 3776, in _create_op_internal ret = Operation( File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 2175, in __init__ self._traceback = tf_stack.extract_stack_for_node(self._c_op)
for waveform, label in waveform_ds:
ww=waveform*16000
label = label.numpy().decode('utf-8')
spectrogram = get_im_spectrogram(ww)
spectrogram_ds_im
np.__version__
'1.22.3'
Examinez les spectrogrammes pour différents exemples de l'ensemble de données :
rows = 3
cols = 3
n = rows*cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 10))
for i, (spectrogram, label_id) in enumerate(spectrogram_ds.take(n)):
r = i // cols
c = i % cols
ax = axes[r][c]
plot_spectrogram(spectrogram.numpy(), ax)
ax.set_title(commands[label_id.numpy()])
ax.axis('off')
plt.show()
Construire et entraîner le modèle¶
Répétez le prétraitement de l'ensemble d'entraînement sur les ensembles de validation et de test :
def preprocess_dataset(files):
files_ds = tf.data.Dataset.from_tensor_slices(files)
output_ds = files_ds.map(
map_func=get_waveform_and_label,
num_parallel_calls=AUTOTUNE)
output_ds = output_ds.map(
map_func=get_spectrogram_and_label_id,
num_parallel_calls=AUTOTUNE)
return output_ds
train_ds = spectrogram_ds
val_ds = preprocess_dataset(val_files)
test_ds = preprocess_dataset(test_files)
Regroupez les ensembles d'entraînement et de validation pour l'entraînement du modèle :
batch_size = 64
train_ds = train_ds.batch(batch_size)
val_ds = val_ds.batch(batch_size)
Ajoutez les opérations Dataset.cache et Dataset.prefetch pour réduire la latence de lecture lors de l'entraînement du modèle :
train_ds = train_ds.cache().prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)
Pour le modèle, vous utiliserez un simple réseau de neurones à convolution (CNN), puisque vous avez transformé les fichiers audio en images de spectrogramme.
Votre modèle tf.keras.Sequential utilisera les couches de prétraitement Keras suivantes :
tf.keras.layers.Resizing : pour sous-échantillonner l'entrée afin de permettre au modèle de s'entraîner plus rapidement. tf.keras.layers.Normalization : pour normaliser chaque pixel de l'image en fonction de sa moyenne et de son écart type. Pour la couche de Normalization , sa méthode adapt devrait d'abord être appelée sur les données d'apprentissage afin de calculer des statistiques agrégées (c'est-à-dire la moyenne et l'écart type).
for spectrogram, _ in spectrogram_ds.take(1):
input_shape = spectrogram.shape
print('Input shape:', input_shape)
num_labels = len(commands)
# Instantiate the `tf.keras.layers.Normalization` layer.
norm_layer = layers.Normalization()
# Fit the state of the layer to the spectrograms
# with `Normalization.adapt`.
norm_layer.adapt(data=spectrogram_ds.map(map_func=lambda spec, label: spec))
model = models.Sequential([
layers.Input(shape=input_shape),
# Downsample the input.
layers.Resizing(32, 32),
# Normalize.
norm_layer,
layers.Conv2D(32, 3, activation='relu'),
layers.Conv2D(64, 3, activation='relu'),
layers.MaxPooling2D(),
layers.Dropout(0.25),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dropout(0.5),
layers.Dense(num_labels),
])
model.summary()
Input shape: (124, 129, 1)
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
resizing (Resizing) (None, 32, 32, 1) 0
normalization (Normalizatio (None, 32, 32, 1) 3
n)
conv2d (Conv2D) (None, 30, 30, 32) 320
conv2d_1 (Conv2D) (None, 28, 28, 64) 18496
max_pooling2d (MaxPooling2D (None, 14, 14, 64) 0
)
dropout (Dropout) (None, 14, 14, 64) 0
flatten (Flatten) (None, 12544) 0
dense (Dense) (None, 128) 1605760
dropout_1 (Dropout) (None, 128) 0
dense_1 (Dense) (None, 8) 1032
=================================================================
Total params: 1,625,611
Trainable params: 1,625,608
Non-trainable params: 3
_________________________________________________________________
Configurez le modèle Keras avec l'optimiseur Adam et la perte d'entropie croisée :
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'],
)
EPOCHS = 10
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=EPOCHS,
callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2),
)
Epoch 1/10 100/100 [==============================] - 122s 867ms/step - loss: 1.7344 - accuracy: 0.3734 - val_loss: 1.3217 - val_accuracy: 0.5550 Epoch 2/10 100/100 [==============================] - 3s 31ms/step - loss: 1.1920 - accuracy: 0.5728 - val_loss: 0.9821 - val_accuracy: 0.6762 Epoch 3/10 100/100 [==============================] - 3s 31ms/step - loss: 0.8940 - accuracy: 0.6831 - val_loss: 0.8034 - val_accuracy: 0.7275 Epoch 4/10 100/100 [==============================] - 3s 31ms/step - loss: 0.7548 - accuracy: 0.7280 - val_loss: 0.7289 - val_accuracy: 0.7425 Epoch 5/10 100/100 [==============================] - 3s 31ms/step - loss: 0.6399 - accuracy: 0.7719 - val_loss: 0.6883 - val_accuracy: 0.7588 Epoch 6/10 100/100 [==============================] - 3s 32ms/step - loss: 0.5737 - accuracy: 0.7920 - val_loss: 0.6596 - val_accuracy: 0.7925 Epoch 7/10 100/100 [==============================] - 3s 34ms/step - loss: 0.5105 - accuracy: 0.8189 - val_loss: 0.6418 - val_accuracy: 0.7875 Epoch 8/10 100/100 [==============================] - 3s 31ms/step - loss: 0.4574 - accuracy: 0.8355 - val_loss: 0.6458 - val_accuracy: 0.7950 Epoch 9/10 100/100 [==============================] - 3s 33ms/step - loss: 0.4079 - accuracy: 0.8567 - val_loss: 0.6581 - val_accuracy: 0.7912 Epoch 9: early stopping
metrics = history.history
plt.plot(history.epoch, metrics['loss'], metrics['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.show()
Évaluer les performances du modèle ¶
test_audio = []
test_labels = []
for audio, label in test_ds:
test_audio.append(audio.numpy())
test_labels.append(label.numpy())
test_audio = np.array(test_audio)
test_labels = np.array(test_labels)
y_pred = np.argmax(model.predict(test_audio), axis=1)
y_true = test_labels
test_acc = sum(y_pred == y_true) / len(y_true)
print(f'Test set accuracy: {test_acc:.0%}')
Test set accuracy: 85%
## Afficher une matrice de confusion
confusion_mtx = tf.math.confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_mtx,
xticklabels=commands,
yticklabels=commands,
annot=True, fmt='g')
plt.xlabel('Prediction')
plt.ylabel('Label')
plt.show()
Exécuter l'inférence sur un fichier audio¶
sample_file = data_dir/'go/0132a06d_nohash_2.wav'
sample_ds = preprocess_dataset([str(sample_file)])
for spectrogram, label in sample_ds.batch(1):
prediction = model(spectrogram)
plt.bar(commands, tf.nn.softmax(prediction[0]))
plt.title(f'Predictions for "{commands[label[0]]}"')
plt.show()
Generate a TensorFlow Lite for Microcontrollers Model ¶
Convert the TensorFlow Lite quantized model into a C source file that can be loaded by TensorFlow Lite for Microcontrollers.
# Install xxd if it is not available
#!apt-get update && apt-get -qq install xxd
# Convert to a C source file, i.e, a TensorFlow Lite for Microcontrollers model
!xxd -i {MODEL_TFLITE} > {MODEL_TFLITE_MICRO}
# Update variable names
REPLACE_TEXT = MODEL_TFLITE.replace('/', '_').replace('.', '_')
!sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}
MODEL_TFLITE_MICRO='model.cc'
from tinymlgen import port
with open(MODEL_TFLITE_MICRO, 'w') as f: # change path if needed
f.write(port(model, optimize=False))
2022-03-21 09:26:34.532575: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
INFO:tensorflow:Assets written to: /tmp/tmpuc_tky7e/assets
2022-03-21 09:26:35.258027: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format. WARNING:absl:Buffer deduplication procedure will be skipped when flatbuffer library is not properly loaded 2022-03-21 09:26:35.258075: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.
model.save('sound01.model')
INFO:tensorflow:Assets written to: sound01.model/assets
INFO:tensorflow:Assets written to: sound01.model/assets
new_model = tf.keras.models.load_model('sound01.model')
# Check its architecture
new_model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
resizing (Resizing) (None, 32, 32, 1) 0
normalization (Normalizatio (None, 32, 32, 1) 3
n)
conv2d (Conv2D) (None, 30, 30, 32) 320
conv2d_1 (Conv2D) (None, 28, 28, 64) 18496
max_pooling2d (MaxPooling2D (None, 14, 14, 64) 0
)
dropout (Dropout) (None, 14, 14, 64) 0
flatten (Flatten) (None, 12544) 0
dense (Dense) (None, 128) 1605760
dropout_1 (Dropout) (None, 128) 0
dense_1 (Dense) (None, 8) 1032
=================================================================
Total params: 1,625,611
Trainable params: 1,625,608
Non-trainable params: 3
_________________________________________________________________
# Save the entire model to a HDF5 file.
# The '.h5' extension indicates that the model should be saved to HDF5.
model.save('my_model.h5')