In [2]:
#!pip install tinymlgen

The Fast Fourier Transform¶

https://www.dspguide.com/ch12/2.htm

In [5]:
#!c:\users\pcimed\appdata\local\programs\python\python37\python.exe -m pip install --upgrade pip

Use¶

from tinymlgen import port

if name == 'main':

tf_model = create_tf_model()

c_code = port(tf_model)

Configuration¶

You can pass a few parameters to the port function:

optimize (=True): apply optimizers to the exported model. Can either be a list of optimizers or a boolean, in which case OPTIMIZE_FOR_SIZE is applied variable_name (='model_data'): give the exported array a custom name pretty_print (=False): print the array in a nicely formatted arrangement

FFT¶

https://create.arduino.cc/projecthub/abhilashpatel121/approxfft-fastest-fft-function-for-arduino-fd4917?ref=user&ref_id=1593632&offset=0

https://create.arduino.cc/projecthub/abhilashpatel121/easyfft-fast-fourier-transform-fft-for-arduino-9d2677

bad approach¶

https://create.arduino.cc/projecthub/alankrantas/eloquenttinyml-easier-voice-classifier-on-nano-33-ble-sense-ebb81e

Fourier transform This is where the Fourier Transform comes in. This method makes use of te fact that every non-linear function can be represented as a sum of (infinite) sine waves. In the underlying figure this is illustrated, as a step function is simulated by a multitude of sine waves.

image.png

N = number of samples

n = current sample

xn = value of the signal at time n

k = current frequency (0 Hz to N-1 Hz)

Xk = Result of the DFT (amplitude and phase)

Note that a dot product is defined as:

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import copy
import time
import ctypes
from ctypes import *
In [2]:
# from https://www.ritchievink.com/blog/2017/04/23/understanding-the-fourier-transform-by-example/

def DFT(x):
    """
    Compute the discrete Fourier Transform of the 1D array x
    :param x: (array)
    """

    N = x.size
    n = np.arange(N)
    k = n.reshape((N, 1))
    e = np.exp(-2j * np.pi * k * n / N)
    return np.dot(e, x)
In [3]:
ts=time.time_ns()
test=np.int32([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15])
te=time.time_ns()
print(te-ts)
print(test)
56025
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
In [4]:
ts=time.time_ns()
DFT(test)
te=time.time_ns()
print(te-ts)
20256818
In [5]:
isin_data =np.array([
0,  1,   3,   4,   5,   6,   8,   9,   10,  11,  13,  14,  15,  17,  18,  19,  20, 
22,  23,  24,  26,  27,  28,  29,  31,  32,  33,  35,  36,  37,  39,  40,  41,  42, 
44,  45,  46,  48,  49,  50,  52,  53,  54,  56,  57,  59,  60,  61,  63,  64,  65, 
67,  68,  70,  71,  72,  74,  75,  77,  78,  80,  81,  82,  84,  85,  87,  88,  90, 
91,  93,  94,  96,  97,  99,  100, 102, 104, 105, 107, 108, 110, 112, 113, 115, 117, 
118, 120, 122, 124, 125, 127, 129, 131, 133, 134, 136, 138, 140, 142, 144, 146, 148, 
150, 152, 155, 157, 159, 161, 164, 166, 169, 171, 174, 176, 179, 182, 185, 188, 191, 
195, 198, 202, 206, 210, 215, 221, 227, 236],dtype=np.byte)
 
Pow2=np.array([1,2,4,8,16,32,64,128,256,512,1024,2048,4096],dtype=np.uint16)
RSSdata=np.array([7,6,6,5,5,5,4,4,4,4,3,3,3,3,3,3,3,2,2,2],dtype=np.byte)
In [6]:
def  fastRSS(a, b):
    '''ca=np.int32(aa)
    cb=np.int32(bb)
    a=copy.copy(ca)
    b=copy.copy(cb)
    '''
    a=np.int16(a)
    b=np.int16(b)
    if a==0 and b==0:
        return 0
    #int min,max,temp1,temp2;
     #byte clevel;
    min= max=temp1= temp2=np.int16
    clevel=np.byte
    
    #code
    if a<0:
        a=-a
    if b<0:
        b=-b
        
    clevel=0;
    
    if a>b:
        max=a
        min=b
    else:
        max=b
        min=a
    
    if max> (min+min+min):
        return max
    else:
        temp1= min>>3
        if temp1==0:
            temp1=1
        temp2=min
        while temp2<max:
            temp2=temp2+temp1
            clevel=clevel+1
        temp2=RSSdata[clevel]
        temp1=temp1>>1  
        for i in range(temp2):
            max=max+temp1
        return(max)
In [7]:
def fast_sine( Amp,  th):
    #int temp3,m1,m2;
    #byte temp1,temp2, test,quad,accuracy;
    '''cAmp=np.int32(AAmp)
    cth=np.int32(tth)
    Amp=copy.copy(cAmp)
    th=copy.copy(cth)
    '''
    Amp=np.int16(Amp)
    th=np.int16(th)
    temp3= m1= m2=np.int16
    temp1=temp2= test= quad=occuracy=np.byte
    
    accuracy=5      #set it value from 1 to 7, where 7 being most accurate but slowest
                    #accuracy value of 5 recommended for typical applicaiton
    while th>1024:
        th=th-1024  #here 1024 = 2*pi or 360 deg
        
    while th<0:
        th=th+1024
       
    quad=th>>8 #np.right_shift(th,8) #np.uint32(th)>>np.uint32(8);

    if quad==1:
        th= 512-th
    elif quad==2:
        th= th-512
    elif quad==3:
        th= 1024-th

    temp1= 0
    temp2= 128     #2 multiple
    m1=0
    m2=Amp

    temp3=(m1+m2) >> 1   #np.uint32(m1+m2)>>np.uint32(1)
    Amp=temp3
    for i in range(accuracy):
        test=(temp1+temp2) >> 1  # ,1)  #np.uint32(temp1+temp2)>>np.uint32(1)
        temp3=temp3>> 1            #np.uint32(temp3)>>np.uint32(1); 
        if th>isin_data[test]:
            temp1=test
            Amp=Amp+temp3
            m1=Amp
        else:
            if th<isin_data[test]:
                temp2=test
                Amp=Amp-temp3
                m2=Amp
    if quad==2:
        Amp= 0-Amp
    else:
        if quad==3:
            Amp= 0-Amp
    return Amp

def fast_cosine(Amp, th):
    '''cAmp=np.int32(AAmp)
    cth=np.int32(tth)
    Amp=copy.copy(cAmp)
    th=copy.copy(cth)
    '''
    th=256-th #cos th = sin (90-th) formula
    return fast_sine(Amp,th) 
In [8]:
fast_sine( 1024,  0)
Out[8]:
16
In [9]:
'''
int in[],int N,float Frequency
Code to perform High speed and Accurate FFT on arduino,
setup:

1. in[]     : Data array, 
2. N        : Number of sample (recommended sample size 2,4,8,16,32,64,128,256,512...)
3. Frequency: sampling frequency required as input (Hz)

It will by default return frequency with max amplitude,
if you need complex output or magnitudes uncomment required sections

If sample size is not in power of 2 it will be clipped to lower side of number. 
i.e, for 150 number of samples, code will consider first 128 sample, remaining sample  will be omitted.
For Arduino nano, FFT of more than 256 sample not possible due to mamory limitation 
Code by ABHILASH
Contact: abhilashpatel121@gmail.com
Documentation & details: https://www.instructables.com/member/abhilash_patel/instructables/

Update(06/05/21): Correction made for support on Arduino Due
'''

def Approx_FFT( inn, N,Frequency):
    #int a,c1,f,o,x,data_max,data_min=0;
    #long data_avg,data_mag,temp11;         
    #byte scale,check=0;
    
    # parameters
    '''cN=np.int64(NN)
    cFrequency=np.float64(FFrequency)
    cinnn=np.int64(innn)
    
    inn=copy.copy(cinnn)
    N=copy.copy(cN)
    Frequency=copy.copy(cFrequency)
    '''
    #varaibles
    inn=np.int16(inn)
    N=np.int16(N)
    Frequency=np.float32(Frequency)
    a=c1=f=x=o=data_max=data_min=np.int16(0)
    data_avg=data_mag=temp11=np.int32(0)
    scale=check=np.byte(0)
    #code
    
    if not inn.any():
        return inn,inn
    
    data_max=0
    data_avg=0
    data_min=0
    
    for i in range(12): #(int i=0;i<12;i++)                 //calculating the levels
        if Pow2[i]<=N:
            o=i
    a=Pow2[o];  
    out_r=np.zeros(a, dtype=np.int16) # int out_r[a];   //real part of transform
    out_im=np.zeros(a, dtype=np.int16) # [a];  //imaginory part of transform

    for i in range(a): # getting min max and average for scalling
        out_r[i]=0
        out_im[i]=0
        data_avg=data_avg+inn[i]
        if inn[i]>data_max:
            data_max=inn[i]
        if inn[i]<data_min:
            data_min=inn[i] 
    
    # print(data_avg,o,type(data_avg),type(o))
    
    data_avg=data_avg>>o #np.right_shift(data_avg,o)
    scale=0
    data_mag=data_max-data_min;
    temp11=data_mag;
    
    #scalling data  from +512 to -512

    if data_mag>1024:
        while temp11>1024:
            temp11=temp11>> 1 # np.uint64(temp11)>>np.uint32(1)
            scale=scale+1
              
    if data_mag<1024:
        while temp11<1024:
            temp11=temp11<<1   #np.uint64(temp11)<<np.uint32(1)
            scale=scale+1;
               
    if data_mag>1024:
        for i in range(a):
            inn[i]=inn[i]-data_avg
            inn[i]=inn[i]>>scale  #np.uint64(inn[i])>>np.uint32(scale)
        scale=128-scale;
        
    if data_mag<1024:
        scale=scale-1
        for i in range(a):
            inn[i]=inn[i]-data_avg
            inn[i]=inn[i]<<scale  #np.uint64(inn[i])<<np.uint32(scale)
        scale=128+scale;


    x=0;  
    
    for b in range(o): #                      bit reversal order stored in im_out array
        c1=Pow2[b]
        f=Pow2[o]/(c1+c1);
        for j in range(c1): #(int j=0;j<c1;j++)
            x=x+1;
            out_im[x]=out_im[j]+f;
                    

    for i in range(a): #          update input array as per bit reverse order
        out_r[i]=inn[out_im[i]];
        out_im[i]=0


    # int i10,i11,n1,tr,ti;
    #float e;
    #int c,s,temp4;
    i10=i11=n1=tr=ti=np.int16
    e=np.float32
    c=s=temp4=np.int16
    
    for i in range(o):  # fft
        i10=Pow2[i]             # overall values of sine/cosine  
        i11=int(Pow2[o]/Pow2[i+1]);   # loop with similar sine cosine
        e=1024/Pow2[i+1]         #1024 is equivalent to 360 deg
        e=0-e;
        n1=0;

        for j in range(i10): #(int j=0;j<i10;j++)
            c=e*j # c is angle as where 1024 unit is 360 deg
            while c<0:
                c=c+1024
            while c>1024:
                c=c-1024
            
            n1=j
            for k in range(i11):
                temp4=i10+n1
                if c==0:
                    tr=out_r[temp4]
                    ti=out_im[temp4]
                elif c==256:
                    tr= -out_im[temp4]
                    ti=out_r[temp4]
                elif c==512:
                    tr=-out_r[temp4]
                    ti=-out_im[temp4]
                elif c==768:
                    tr=out_im[temp4]
                    ti=-out_r[temp4]
                elif c==1024:
                    tr=out_r[temp4]
                    ti=out_im[temp4]
                else:
                    tr=fast_cosine(out_r[temp4],c)-fast_sine(out_im[temp4],c);            #the fast sine/cosine function gives direct (approx) output for A*sinx
                    ti=fast_sine(out_r[temp4],c)+fast_cosine(out_im[temp4],c);                    
                #endif
                out_r[n1+i10]=out_r[n1]-tr;
                out_r[n1]=out_r[n1]+tr;
                if out_r[n1]>15000 or out_r[n1]<-15000:
                    check=1   # check for int size, it can handle only +31000 to -31000,
          
                out_im[n1+i10]=out_im[n1]-ti;
                out_im[n1]=out_im[n1]+ti;
                if out_im[n1]>15000 or out_im[n1]<-15000:
                    check=1         
                n1=n1+i10+i10;
            #fork
        #forj
        if check==1:
            #scalling the matrics if value higher than 15000 to prevent varible from overflowing
            for m in range(a): 
                out_r[m]=out_r[m]>>1  #np.uint64(out_r[m])>>np.uint64(1);           
                out_im[m]=out_im[m]>>1 #np.uint64(out_im[m])>>np.uint64(1); 
            check=0; 
            scale=scale-1;                 # tracking overall scalling of input data
        #endif
    #fori
        
    
             
    if scale>128:
        scale=scale-128
        for i in range(a):
            out_r[i]=out_r[i]>>scale  #np.uint64(out_r[i])>>np.ubyte(scale)
            out_im[i]=out_im[i]>>scale # np.uint64(out_im[i])>>np.ubyte(scale)
        scale=0
    else:                        # revers all scalling we done till here,
        scale=128-scale          #in case of nnumber getting higher than 32000, we will represent in as multiple of 2^scale
    
    #for i in range(a):
        #print(out_r[i],out_im[i],i,"*2^")
    half=np.int16(N/2)
    return out_r[:half], out_im[:half]
'''
    oo_r=out_r
    oo_i=out_im
    fout=fm=fstp=np.int32;
    
    fstep=np.float64(Frequency/N)
    fstp=fstep;
    fout=0;fm=0;

    for i in range(1,Pow2[o-1]):      #  getting amplitude from compex number
        out_r[i]=fastRSS(out_r[i],out_im[i]);

        #Approx RSS function used to calculated magnitude quickly      
        out_im[i]=out_im[i-1]+fstp;
        if fout<out_r[i]:
            fm=i
            fout=out_r[i]
         
        #un comment to print Amplitudes (1st value (offset) is not printed)
        #Serial.print(out_r[i]); Serial.print("\t"); 
        #Serial.print("*2^");Serial.println(scale); 
        #end for
        print(out_r[i],"*2^")

    #float fa,fb,fc;
    fa=out_r[fm-1];
    fb=out_r[fm]; 
    fc=out_r[fm+1];
    fstep=(fa*(fm-1)+fb*fm+fc*(fm+1))/(fa+fb+fc);

    #return fstep*Frequency/N
    print(fstep*Frequency/N)
    return oo_r, oo_i #out_r, out_im
 
'''  
Out[9]:
'\n    oo_r=out_r\n    oo_i=out_im\n    fout=fm=fstp=np.int32;\n    \n    fstep=np.float64(Frequency/N)\n    fstp=fstep;\n    fout=0;fm=0;\n\n    for i in range(1,Pow2[o-1]):      #  getting amplitude from compex number\n        out_r[i]=fastRSS(out_r[i],out_im[i]);\n\n        #Approx RSS function used to calculated magnitude quickly      \n        out_im[i]=out_im[i-1]+fstp;\n        if fout<out_r[i]:\n            fm=i\n            fout=out_r[i]\n         \n        #un comment to print Amplitudes (1st value (offset) is not printed)\n        #Serial.print(out_r[i]); Serial.print("\t"); \n        #Serial.print("*2^");Serial.println(scale); \n        #end for\n        print(out_r[i],"*2^")\n\n    #float fa,fb,fc;\n    fa=out_r[fm-1];\n    fb=out_r[fm]; \n    fc=out_r[fm+1];\n    fstep=(fa*(fm-1)+fb*fm+fc*(fm+1))/(fa+fb+fc);\n\n    #return fstep*Frequency/N\n    print(fstep*Frequency/N)\n    return oo_r, oo_i #out_r, out_im\n \n'
In [10]:
#test=np.ones(16,dtype=np.int32)
test=np.array([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],dtype=np.int32)
test
Out[10]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15],
      dtype=int32)
In [11]:
print(Approx_FFT(test,16,1000))
(array([  8,  -7,  -8,  -4,  -8, -13,  -8, -10], dtype=int16), array([ 0, 50, 23, 18,  8,  3,  7,  2], dtype=int16))
In [12]:
np.fft.fft(test)
Out[12]:
array([120. +0.j        ,  -8.+40.21871594j,  -8.+19.3137085j ,
        -8.+11.9728461j ,  -8. +8.j        ,  -8. +5.3454291j ,
        -8. +3.3137085j ,  -8. +1.59129894j,  -8. +0.j        ,
        -8. -1.59129894j,  -8. -3.3137085j ,  -8. -5.3454291j ,
        -8. -8.j        ,  -8.-11.9728461j ,  -8.-19.3137085j ,
        -8.-40.21871594j])
In [13]:
ts=time.time_ns()
Approx_FFT(test,16,1000)
te=time.time_ns()
print(te-ts)
1243869
In [14]:
ts=time.time_ns()
np.fft.fft(test)
te=time.time_ns()
print(te-ts)
print(np.fft.fft(test).real)
print(np.fft.fft(test).imag)
130885
[120.  -8.  -8.  -8.  -8.  -8.  -8.  -8.  -8.  -8.  -8.  -8.  -8.  -8.
  -8.  -8.]
[  0.          40.21871594  19.3137085   11.9728461    8.
   5.3454291    3.3137085    1.59129894   0.          -1.59129894
  -3.3137085   -5.3454291   -8.         -11.9728461  -19.3137085
 -40.21871594]
In [15]:
data_test=np.int64(56)
oo=int(5)
np.right_shift(data_test,oo)
Out[15]:
1
In [16]:
import matplotlib.pyplot as plt
import numpy as np
import copy
import time
from ctypes import *
import ctypes
In [17]:
'''
import ctypes
import numpy
c_float_p = ctypes.POINTER(ctypes.c_float)
data = numpy.array([[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]])
data = data.astype(numpy.float32)
data_p = data.ctypes.data_as(c_float_p)
'''
test=np.arange(16)
#test=np.ones(16,dtype=np.int32);

test=test.astype(np.int32)
test=test.ctypes.data_as(POINTER(ctypes.c_int32))

xx = CDLL('imfft4.so')
xx.imff.argtypes = POINTER(c_int32),c_size_t,POINTER(c_int32)
xx.imff.restype = None
out = (c_int32*32)()
inn = (c_int16*32)()
ts=time.time_ns()
xx.imff(test,16,out)
te=time.time_ns()
print(te-ts)
#print(list(test))
print(list(out))
testnp=np.int32(out)
104206
[8, -8, -8, -9, -8, -8, -8, -9, -8, -9, -8, -8, -8, -9, -8, -8, 0, 40, 19, 11, 8, 5, 3, 1, 0, -2, -4, -6, -8, -12, -20, -41]
In [18]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
/tmp/ipykernel_3251782/2711954765.py in <module>
----> 1 print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

NameError: name 'tf' is not defined
In [19]:
zz=np.zeros(16,dtype=np.int16);
zz=zz.astype(np.int32)
zz=zz.ctypes.data_as(POINTER(c_int32))
print(zz)
<__main__.LP_c_int object at 0x7fffc32b89c0>
In [20]:
ts=time.time_ns()
ts=time.time_ns()
xx.imff(zz,16,out)
te=time.time_ns()
print(te-ts)
47770
In [21]:
testnp
from ctypes import *
In [22]:
def imfft(inn, N):
    N2=np.int32(N*2)
    inn=inn.astype(np.int32)
    inn=inn.ctypes.data_as(POINTER(c_int32))
    out = (ctypes.c_int32*N2)()
    xx.imff(inn,N,out)
    outnp=np.int32(out)
    return np.vectorize(complex)(outnp[0:N], outnp[N:]) #, dtype=complex)
In [23]:
ts=time.time_ns()
test=np.arange(16)
te=time.time_ns()
print(te-ts)
tt=imfft(test,16)
print(tt)
54983
[ 8. +0.j -8.+40.j -8.+19.j -9.+11.j -8. +8.j -8. +5.j -8. +3.j -9. +1.j
 -8. +0.j -9. -2.j -8. -4.j -8. -6.j -8. -8.j -9.-12.j -8.-20.j -8.-41.j]
In [24]:
zz=np.ones(16,dtype=np.int32)
ts=time.time_ns()
r=imfft(zz,16)
te=time.time_ns()
print(te-ts)
print(r)
586168
[0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j
 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j 0.+0.j]
In [25]:
#fun(ctypes.c_void_p(indata.ctypes.data), ctypes.c_int(5), ctypes.c_int(6),
#    ctypes.c_void_p(outdata.ctypes.data))
In [38]:
ts=time.time_ns()
r=np.fft.fft(np.arange(16))
te=time.time_ns()
print(te-ts)
print(r)
1183070
[120. +0.j          -8.+40.21871594j  -8.+19.3137085j   -8.+11.9728461j
  -8. +8.j          -8. +5.3454291j   -8. +3.3137085j   -8. +1.59129894j
  -8. +0.j          -8. -1.59129894j  -8. -3.3137085j   -8. -5.3454291j
  -8. -8.j          -8.-11.9728461j   -8.-19.3137085j   -8.-40.21871594j]
In [39]:
np.fft.fft(test).real
Out[39]:
array([120.,  -8.,  -8.,  -8.,  -8.,  -8.,  -8.,  -8.,  -8.,  -8.,  -8.,
        -8.,  -8.,  -8.,  -8.,  -8.])
In [40]:
np.fft.fft(test).imag
Out[40]:
array([  0.        ,  40.21871594,  19.3137085 ,  11.9728461 ,
         8.        ,   5.3454291 ,   3.3137085 ,   1.59129894,
         0.        ,  -1.59129894,  -3.3137085 ,  -5.3454291 ,
        -8.        , -11.9728461 , -19.3137085 , -40.21871594])
In [62]:
 
Out[62]:
array([496.+0.00000000e+00j, -16.+1.62450726e+02j, -16.+8.04374319e+01j,
       -16.+5.27449313e+01j, -16.+3.86274170e+01j, -16.+2.99338946e+01j,
       -16.+2.39456922e+01j, -16.+1.94960564e+01j, -16.+1.60000000e+01j,
       -16.+1.31308607e+01j, -16.+1.06908582e+01j, -16.+8.55217818e+00j,
       -16.+6.62741700e+00j, -16.+4.85354694e+00j, -16.+3.18259788e+00j,
       -16.+1.57586245e+00j, -16.+4.93916428e-14j, -16.-1.57586245e+00j,
       -16.-3.18259788e+00j, -16.-4.85354694e+00j, -16.-6.62741700e+00j,
       -16.-8.55217818e+00j, -16.-1.06908582e+01j, -16.-1.31308607e+01j,
       -16.-1.60000000e+01j, -16.-1.94960564e+01j, -16.-2.39456922e+01j,
       -16.-2.99338946e+01j, -16.-3.86274170e+01j, -16.-5.27449313e+01j,
       -16.-8.04374319e+01j, -16.-1.62450726e+02j])
In [12]:
n = 128

# definition de a
a = np.zeros(n)
a[1] = 1

# visualisation de a
# on ajoute a droite la valeur de gauche pour la periodicite
plt.subplot(311)
plt.plot( np.append(a, a[0]) )

# calcul de A
A = np.fft.fft(a)

# visualisation de A
# on ajoute a droite la valeur de gauche pour la periodicite
B = np.append(A, A[0])
plt.subplot(312)
plt.plot(np.real(B))
plt.ylabel("partie reelle")

plt.subplot(313)
plt.plot(np.imag(B))
plt.ylabel("partie imaginaire")

plt.show()
No description has been provided for this image
In [13]:
n = 128

# definition de a
a = np.zeros(n)
a[1] = 100

# visualisation de a
# on ajoute a droite la valeur de gauche pour la periodicite
plt.subplot(311)
plt.plot( np.append(a, a[0]) )

# calcul de A
#a=imhann(a)
A = imfft(a,128)

# visualisation de A
# on ajoute a droite la valeur de gauche pour la periodicite
B = np.append(A, A[0])
plt.subplot(312)
plt.plot(np.real(B))
plt.ylabel("partie reelle")

plt.subplot(313)
plt.plot(np.imag(B))
plt.ylabel("partie imaginaire")

plt.show()
No description has been provided for this image

Visualisation des valeurs complexes avec une échelle colorée ¶

In [14]:
# visualisation de a
# on ajoute a droite la valeur de gauche pour la periodicite
plt.subplot(211)
plt.plot( np.append(a, a[0]) )

# calcul de k
k = np.arange(n)


# visualisation de A - Attention au changement de variable
# on ajoute a droite la valeur de gauche pour la periodicite
plt.subplot(212)
x = np.append(k, k[-1]+k[1]-k[0]) # calcul d'une valeur supplementaire
z = np.append(A, A[0])
X = np.array([x,x])

y0 = np.zeros(len(x))
y = np.abs(z)
Y = np.array([y0,y])

Z = np.array([z,z])
C = np.angle(Z)

plt.plot(x,y,'k')

plt.pcolormesh(X, Y, C, shading="gouraud", cmap=plt.cm.hsv, vmin=-np.pi, vmax=np.pi)
plt.colorbar()

plt.show()
No description has been provided for this image

Exploring the FFT ¶

Let’s write some code to find out what an FFT is actually doing.

First we define a simple signal containing an addition of two sine waves. One with a frequency of 40 Hz and one with a frequency of 90 Hz.

In [137]:
t = np.linspace(0, 0.5, 256)
s = np.sin(40 * 2 * np.pi * t) + 0.5 * np.sin(90 * 2 * np.pi * t)*16000

plt.ylabel("Amplitude")
plt.xlabel("Time [s]")
plt.plot(t, s)
plt.show()
No description has been provided for this image

Complex¶

In order to retrieve a spectrum of the frequency of the time signal mentioned above we must take a FFT on that sequence.

In [138]:
fft = np.fft.fft(s)


for i in range(2):
    print("Value at index {}:\t{}".format(i, fft[i + 1]), "\nValue at index {}:\t{}".format(fft.size -1 - i, fft[-1 - i]))
Value at index 0:	(1.9473561461272766-158.67688860630324j) 
Value at index 255:	(1.9473561461295503+158.67688860629698j)
Value at index 1:	(7.800987376524063-317.7769954775117j) 
Value at index 254:	(7.800987376525427+317.77699547750626j)
In [139]:
fft = imfft(s,len(s))


for i in range(2):
    print("Value at index {}:\t{}".format(i, fft[i + 1]), "\nValue at index {}:\t{}".format(fft.size -1 - i, fft[-1 - i]))
Value at index 0:	(-28-17j) 
Value at index 255:	(-18-24j)
Value at index 1:	(-18-18j) 
Value at index 254:	(-10-6j)

Because the second half of the sequence gives us no new information we can already conclude that the half of the FFT sequence is the output we need.¶

The complex output numbers of the FFT contains the following information:

Amplitude of a certain frequency sine wave (energy). Phase offset of a certain frequency sine wave. The amplitude is retrieved by taking the absolute value of the number and the phase offset is obtained by computing the angle of the number.

Spectrum We are interested in the energy of each frequency, so we can determine the absolute value of the FFT’s output. To get a good insight in the spectrum the energy should be plotted against the frequency. Each discrete number output of the FFT corresponds to a certain frequency. The frequency resolution is determined by:

Δf=fs/N

Putting it all together we can plot the frequency spectrum for our simple sine wave function. We plot only half of the spectrum, because that is the only half giving us real information.

In [140]:
fft = np.fft.fft(imhann(s))
T = t[1] - t[0]  # sampling interval 
N = s.size

# 1/T = frequency
f = np.linspace(0, 1 / T, N)

plt.ylabel("Amplitude")
plt.xlabel("Frequency [Hz]")
plt.bar(f[:N // 2], np.abs(fft)[:N // 2] * 1 / N, width=1.5)  # 1 / N is a normalization factor
plt.show()
No description has been provided for this image
In [141]:
fft = imfft(imhann(s),len(s))
T = t[1] - t[0]  # sampling interval 
N = s.size

# 1/T = frequency
f = np.linspace(0, 1 / T, N)

plt.ylabel("Amplitude")
plt.xlabel("Frequency [Hz]")
plt.bar(f[:N // 2], np.abs(fft)[:N // 2] * 1 / N, width=1.5)  # 1 / N is a normalization factor
plt.show()
No description has been provided for this image
In [154]:
def imhann(signal):
    nb=len(signal)
    samplesMinusOne=nb-1
    for i in range(nb):
        ratio = (i / samplesMinusOne)
        weighingFactor = 0.54 - (1- np.cos(2*np.pi * ratio))
        signal[i]=signal[i]*weighingFactor
    return signal
In [143]:
s=imhann(s)
In [144]:
fft = imfft(s,len(s))
T = t[1] - t[0]  # sampling interval 
N = s.size

# 1/T = frequency
f = np.linspace(0, 1 / T, N)

plt.ylabel("Amplitude")
plt.xlabel("Frequency [Hz]")
plt.bar(f[:N // 2], np.abs(fft)[:N // 2] * 1 / N, width=1.5)  # 1 / N is a normalization factor
plt.show()
No description has been provided for this image

As we can see the FFT works! It has given us information about the frequencies of the waves in the time signal.

A FFT is a trade-off between time information and frequency information. By taking a FFT of a time signal, all time information is lost in return for frequency information. To keep information about time and frequencies in one spectrum, we must make a spectrogram. These are DFT’s taken on discrete time windows.

Alright By taking a FFT result of the time signal of Kendrick Lamar’s song, we get the spectrum shown below. The frequency scale is plotted on log scale. As we assumed before the natural frequency of my windows are about 100 Hz. In the figure we can see that the most dominant frequencies occur between 101.5-102.2 Hz (30-158 Hz). My windows natural frequency is right in the middle of the dominant frequencies of the song and thus may resonate due to the high volume.

Now it is too premature to say it wouldn’t be safe to listen to this song on full volume. However if I really want to be sure about my windows I maybe should examine the frequency of another song.

image.pngimage.png

image.png

download.png

image.png

https://towardsdatascience.com/understanding-audio-data-fourier-transform-fft-spectrogram-and-speech-recognition-a4072d228520

https://www.tek.com/en/blog/window-functions-spectrum-analyzers

In [523]:
def spectrogram(samples, sample_rate, stride_ms = 10.0, 
                          window_ms = 20.0, max_freq = None, eps = 1e-14):

    stride_size = int(0.001 * sample_rate * stride_ms)
    window_size = int(0.001 * sample_rate * window_ms)

    # Extract strided windows
    truncate_size = (len(samples) - window_size) % stride_size
    samples = samples[:len(samples) - truncate_size]
    nshape = (window_size, (len(samples) - window_size) // stride_size + 1)
    nstrides = (samples.strides[0], samples.strides[0] * stride_size)
    windows = np.lib.stride_tricks.as_strided(samples, 
                                          shape = nshape, strides = nstrides)
    
    assert np.all(windows[:, 1] == samples[stride_size:(stride_size + window_size)])

    # Window weighting, squared Fast Fourier Transform (fft), scaling
    weighting = np.hanning(window_size)[:, None]
    
    fft = np.fft.rfft(windows * weighting, axis=0)
    fft = np.absolute(fft)
    fft = fft**2
    
    scale = np.sum(weighting**2) * sample_rate
    fft[1:-1, :] *= (2.0 / scale)
    fft[(0, -1), :] /= scale
    
    # Prepare fft frequency list
    freqs = float(sample_rate) / window_size * np.arange(fft.shape[0])
    
    # Compute spectrogram feature
    ind = np.where(freqs <= max_freq)[0][-1] + 1
    specgram = np.log(fft[:ind, :] + eps)
    return specgram

Arduino Code ¶

In [25]:
//---------------------------------lookup data------------------------------------//
byte isin_data[128]=
{0,  1,   3,   4,   5,   6,   8,   9,   10,  11,  13,  14,  15,  17,  18,  19,  20, 
22,  23,  24,  26,  27,  28,  29,  31,  32,  33,  35,  36,  37,  39,  40,  41,  42, 
44,  45,  46,  48,  49,  50,  52,  53,  54,  56,  57,  59,  60,  61,  63,  64,  65, 
67,  68,  70,  71,  72,  74,  75,  77,  78,  80,  81,  82,  84,  85,  87,  88,  90, 
91,  93,  94,  96,  97,  99,  100, 102, 104, 105, 107, 108, 110, 112, 113, 115, 117, 
118, 120, 122, 124, 125, 127, 129, 131, 133, 134, 136, 138, 140, 142, 144, 146, 148, 
150, 152, 155, 157, 159, 161, 164, 166, 169, 171, 174, 176, 179, 182, 185, 188, 191, 
195, 198, 202, 206, 210, 215, 221, 227, 236};
unsigned int Pow2[14]={1,2,4,8,16,32,64,128,256,512,1024,2048,4096};
byte RSSdata[20]={7,6,6,5,5,5,4,4,4,4,3,3,3,3,3,3,3,2,2,2};
//---------------------------------------------------------------------------------//


//int data[256]={};


void setup() 
        {
     Serial.begin(115200);  

     Serial.println("OK");
        }


void loop() {
  Serial.println("!");
int dd[]={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
Serial.println("before");
float f=Approx_FFT(dd,16,100);
Serial.println("after");
Serial.println(f);
delay(100000);
            }


//-----------------------------FFT Function----------------------------------------------//
/*
Code to perform High speed and Accurate FFT on arduino,
setup:

1. in[]     : Data array, 
2. N        : Number of sample (recommended sample size 2,4,8,16,32,64,128,256,512...)
3. Frequency: sampling frequency required as input (Hz)

It will by default return frequency with max aplitude,
if you need complex output or magnitudes uncomment required sections

If sample size is not in power of 2 it will be clipped to lower side of number. 
i.e, for 150 number of samples, code will consider first 128 sample, remaining sample  will be omitted.
For Arduino nano, FFT of more than 256 sample not possible due to mamory limitation 
Code by ABHILASH
Contact: abhilashpatel121@gmail.com
Documentation & details: https://www.instructables.com/member/abhilash_patel/instructables/
*/

float Approx_FFT(int in[],int N,float Frequency)
{ 
int a,c1,f,o,x,data_max,data_min=0;
long data_avg,data_mag,temp11;         
byte scale,check=0;

data_max=0;
data_avg=0;
data_min=0;

for(int i=0;i<12;i++)                 //calculating the levels
{ 
  if(Pow2[i]<=N)
  {
    o=i;
   }
}
a=Pow2[o];  
int out_r[a];   //real part of transform
int out_im[a];  //imaginory part of transform

for(int i=0;i<a;i++)                //getting min max and average for scalling
{
  out_r[i]=0; out_im[i]=0;
  data_avg=data_avg+in[i];
  if(in[i]>data_max)
  {
    data_max=in[i];
    }
  if(in[i]<data_min)
  {
    data_min=in[i];
   } 
}

data_avg=data_avg>>o;
scale=0;
data_mag=data_max-data_min;
temp11=data_mag;

//scalling data  from +512 to -512

if(data_mag>1024)
{
  while(temp11>1024)
  {
    temp11=temp11>>1;
     scale=scale+1;
   }   
}

if(data_mag<1024)
{
  
  while(temp11<1024)
{
  temp11=temp11<<1;
  scale=scale+1;
  }
}


if(data_mag>1024)
{
      for(int i=0;i<a;i++)
        {
          in[i]=in[i]-data_avg;
          in[i]=in[i]>>scale;
        }
       scale=128-scale;
}

if(data_mag<1024)
{ 
  scale=scale-1;
  for(int i=0;i<a;i++)
    {
         in[i]=in[i]-data_avg;
          in[i]=in[i]<<scale;
     }

     scale=128+scale;
}


x=0;  
for(int b=0;b<o;b++)                     // bit reversal order stored in im_out array
         {
          c1=Pow2[b];
          f=Pow2[o]/(c1+c1);
                for(int j=0;j<c1;j++)
                    { 
                     x=x+1;
                     out_im[x]=out_im[j]+f;
                    }
         }

for(int i=0;i<a;i++)            // update input array as per bit reverse order
         {
          out_r[i]=in[out_im[i]]; 
          out_im[i]=0;
         }


int i10,i11,n1,tr,ti;
float e;
int c,s,temp4;

for(int i=0;i<o;i++)                                    //fft
{
     i10=Pow2[i];              // overall values of sine/cosine  
     i11=Pow2[o]/Pow2[i+1];    // loop with similar sine cosine
     e=1024/Pow2[i+1];  //1024 is equivalent to 360 deg
     e=0-e;
     n1=0;

          for(int j=0;j<i10;j++)
          {
            c=e*j;    //c is angle as where 1024 unit is 360 deg
    while(c<0){c=c+1024;}
    while(c>1024){c=c-1024;}

          n1=j;
          
          for(int k=0;k<i11;k++)
                 {
                   temp4=i10+n1;
       if(c==0)   {tr=out_r[temp4];
                   ti=out_im[temp4];}
    else if(c==256) {tr= -out_im[temp4];
                   ti=out_r[temp4];}
    else if(c==512) {tr=-out_r[temp4];
                  ti=-out_im[temp4];}
    else if(c==768) {tr=out_im[temp4];
                   ti=-out_r[temp4];}
    else if(c==1024){tr=out_r[temp4];
                   ti=out_im[temp4];}
    else{
    tr=fast_cosine(out_r[temp4],c)-fast_sine(out_im[temp4],c);            //the fast sine/cosine function gives direct (approx) output for A*sinx
    ti=fast_sine(out_r[temp4],c)+fast_cosine(out_im[temp4],c);            
      }
          
       out_r[n1+i10]=out_r[n1]-tr;
       out_r[n1]=out_r[n1]+tr;
       if(out_r[n1]>15000 || out_r[n1]<-15000){check=1;}   //check for int size, it can handle only +31000 to -31000,
          
       out_im[n1+i10]=out_im[n1]-ti;
       out_im[n1]=out_im[n1]+ti;
       if(out_im[n1]>15000 || out_im[n1]<-15000){check=1;}          
          
        n1=n1+i10+i10;
        }   // for int k=    
     }// for int j=

     if(check==1)
     {                                             // scalling the matrics if value higher than 15000 to prevent varible from overflowing
                for(int i=0;i<a;i++)
                    {
                     out_r[i]=out_r[i]>>1;           
                     out_im[i]=out_im[i]>>1; 
                    }
                     check=0; 
                     scale=scale-1;                 // tracking overall scalling of input data
     }           

} //for int i=


if(scale>128)
{
  scale=scale-128;
  for(int i=0;i<a;i++)
  {
    out_r[i]=out_r[i]>>scale;
    out_im[i]=out_im[i]>>scale;
   }
  scale=0;
}                                                   // revers all scalling we done till here,
else
{
  scale=128-scale;
  
}                             // in case of nnumber getting higher than 32000, we will represent in as multiple of 2^scale


for(int i=0;i<a;i++)
{
Serial.print(out_r[i]);Serial.print("\t");                    // un comment to print RAW o/p    
Serial.print(out_im[i]); 
Serial.print("i");Serial.print("\t"); 
Serial.print("*2^");Serial.println(scale); 
}


//---> here onward out_r contains amplitude and our_in conntains frequency (Hz)
int fout,fm,fstp;
float fstep;
fstep=Frequency/N;
fstp=fstep;
fout=0;fm=0;

for(int i=1;i<Pow2[o-1];i++)               // getting amplitude from compex number
{ 
out_r[i]=fastRSS(out_r[i],out_im[i]);
   // Approx RSS function used to calculated magnitude quickly       
out_im[i]=out_im[i-1]+fstp;
if (fout<out_r[i])
{
  fm=i; 
  fout=out_r[i];
}
         
         // un comment to print Amplitudes (1st value (offset) is not printed)
         Serial.print(out_r[i]); Serial.print("\t"); 
         Serial.print("*2^");Serial.println(scale); 
}


float fa,fb,fc;
fa=out_r[fm-1];
fb=out_r[fm]; 
fc=out_r[fm+1];
fstep=(fa*(fm-1)+fb*fm+fc*(fm+1))/(fa+fb+fc);

return(fstep*Frequency/N);
}

//---------------------------------fast sine/cosine---------------------------------------//

int fast_sine(int Amp, int th)
{
int temp3,m1,m2;
byte temp1,temp2, test,quad,accuracy;
accuracy=5;    // set it value from 1 to 7, where 7 being most accurate but slowest
               // accuracy value of 5 recommended for typical applicaiton
while(th>1024)
{
  th=th-1024;
}   // here 1024 = 2*pi or 360 deg

while(th<0)
{
  th=th+1024;
}
quad=th>>8;

if(quad==1){th= 512-th;}
else if(quad==2){th= th-512;}
else if(quad==3){th= 1024-th;}

temp1= 0;
temp2= 128;     //2 multiple
m1=0;
m2=Amp;

temp3=(m1+m2)>>1;
Amp=temp3;
for(int i=0;i<accuracy;i++)
{ 
  test=(temp1+temp2)>>1;
  temp3=temp3>>1; 
  if(th>isin_data[test])
  {
      temp1=test; 
      Amp=Amp+temp3; 
      m1=Amp;
   }
   else 
      if(th<isin_data[test])
      {
        temp2=test; 
        Amp=Amp-temp3; 
        m2=Amp;
        }
}

if(quad==2)
{
  Amp= 0-Amp;
}
else 
   if(quad==3)
   {
    Amp= 0-Amp;
   }
   
return Amp;
}

int fast_cosine(int Amp, int th)
  {  
  th=256-th;  //cos th = sin (90-th) formula
  return(fast_sine(Amp,th)); 
  }

//--------------------------------------------------------------------------------//


//--------------------------------Fast RSS----------------------------------------//
int fastRSS(int a, int b)
{ 
  if(a==0 && b==0)
  {
    return(0);
  }
  int min,max,temp1,temp2;
  byte clevel;
  if(a<0)
  {
    a=-a;
  }
  if(b<0)
  {
    b=-b;
  }
  clevel=0;
  
  if(a>b)
  {
    max=a;
    min=b;
  } 
  else
  {
    max=b;
    min=a;
  }

  
  if(max>(min+min+min))
  {
    return max;
  }
  else
    {
     temp1=min>>3; if(temp1==0){temp1=1;}
     temp2=min;
     while(temp2<max)
     {
        temp2=temp2+temp1;
        clevel=clevel+1;
     }
     temp2=RSSdata[clevel];
     temp1=temp1>>1;  
     for(int i=0;i<temp2;i++)
          {
            max=max+temp1;
           }
     return max ;
    }
}
  File "/tmp/ipykernel_2114829/2462532610.py", line 1
    //---------------------------------lookup data------------------------------------//
    ^
SyntaxError: invalid syntax

Python version ¶

In [ ]:
 

Tuto TF off¶

https://www.tensorflow.org/tutorials/audio/simple_audio

In [7]:
#!pip install librosa
#!pip install seaborn
#!pip uninstall tensorflow -y
In [1]:
import os
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display

# Set the seed value for experiment reproducibility.
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

print(tf.__version__)
2.8.0
In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
Num GPUs Available:  0
2022-03-29 20:12:17.264737: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:
2022-03-29 20:12:17.264770: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
In [5]:
DATASET_PATH = 'data/mini_speech_commands'

data_dir = pathlib.Path(DATASET_PATH)
if not data_dir.exists():
  tf.keras.utils.get_file(
      'mini_speech_commands.zip',
      origin="http://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zip",
      extract=True,
      cache_dir='.', cache_subdir='data')
In [4]:
DATASET_PATH = 'data/imds'
data_dir = pathlib.Path(DATASET_PATH)
In [6]:
commands = np.array(tf.io.gfile.listdir(str(data_dir)))
commands = commands[commands != 'README.md']
print('Commands:', commands)
Commands: ['right' 'go' 'no' 'left' 'stop' 'up' 'down' 'yes']
In [7]:
filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')
filenames = tf.random.shuffle(filenames)
num_samples = len(filenames)
print('Number of total examples:', num_samples)
print('Number of examples per label:',
      len(tf.io.gfile.listdir(str(data_dir/commands[0]))))
print('Example file tensor:', filenames[0])
Number of total examples: 8000
Number of examples per label: 1000
Example file tensor: tf.Tensor(b'data/mini_speech_commands/up/918a2473_nohash_1.wav', shape=(), dtype=string)
In [9]:
train_files = filenames[:6400]
val_files = filenames[6400: 6400 + 800]
test_files = filenames[-800:]

print('Training set size', len(train_files))
print('Validation set size', len(val_files))
print('Test set size', len(test_files))
Training set size 6400
Validation set size 800
Test set size 800
In [86]:
print(filenames[5])
tf.Tensor(b'data/imds/avance/sd1.raw', shape=(), dtype=string)

Lire les fichiers audio et leurs étiquettes¶

Dans cette section, vous allez prétraiter l'ensemble de données, en créant des tenseurs décodés pour les formes d'onde et les étiquettes correspondantes. Noter que:

Chaque fichier WAV contient des données de séries chronologiques avec un nombre défini d'échantillons par seconde. Chaque échantillon représente l' amplitude du signal audio à ce moment précis. Dans un système 16 bits , comme les fichiers WAV du jeu de données Mini Speech Commands, les valeurs d'amplitude vont de -32 768 à 32 767. Le taux d'échantillonnage pour cet ensemble de données est de 16 kHz. La forme du tenseur renvoyé par tf.audio.decode_wav est [samples, channels] , où channels est 1 pour mono ou 2 pour stéréo. Le jeu de données mini Speech Commands ne contient que des enregistrements mono.

In [13]:
test_file = tf.io.read_file(DATASET_PATH+'/down/0a9f9af7_nohash_0.wav')
test_audio, _ = tf.audio.decode_wav(contents=test_file)
test_audio.shape
Out[13]:
TensorShape([13654, 1])

Définissons maintenant une fonction qui prétraite les fichiers audio WAV bruts de l'ensemble de données en tenseurs audio :¶

In [10]:
def decode_audio(audio_binary):
  # Decode WAV-encoded audio files to `float32` tensors, normalized
  # to the [-1.0, 1.0] range. Return `float32` audio and a sample rate.
  audio, _ = tf.audio.decode_wav(contents=audio_binary)
  # Since all the data is single channel (mono), drop the `channels`
  # axis from the array.
  return tf.squeeze(audio, axis=-1)
In [87]:
def decode_audio(audio_binary):
  # Decode WAV-encoded audio files to `float32` tensors, normalized
  # to the [-1.0, 1.0] range. Return `float32` audio and a sample rate.
  audio=audio_binary
  # Since all the data is single channel (mono), drop the `channels`
  # axis from the array.
  #return tf.squeeze(audio, axis=-1)
  return audio

Définissez une fonction qui crée des étiquettes à l'aide des répertoires parents pour chaque fichier :¶

Divisez les chemins de fichiers en tf.RaggedTensor s (tenseurs aux dimensions irrégulières, avec des tranches pouvant avoir des longueurs différentes).

In [11]:
def get_label(file_path):
  parts = tf.strings.split(
      input=file_path,
      sep=os.path.sep)
  # Note: You'll use indexing here instead of tuple unpacking to enable this
  # to work in a TensorFlow graph.
  return parts[-2]

Définissez une autre fonction d'assistance — get_waveform_and_label — qui rassemble le tout :¶

L'entrée est le nom du fichier audio WAV. La sortie est un tuple contenant l'audio et les tenseurs d'étiquettes prêts pour l'apprentissage supervisé.

In [12]:
def get_waveform_and_label(file_path):
  label = get_label(file_path)
  audio_binary = tf.io.read_file(file_path)
  waveform = decode_audio(audio_binary)
  return waveform, label

Créez l'ensemble d'entraînement pour extraire les paires d'étiquettes audio :¶

Créez un tf.data.Dataset avec Dataset.from_tensor_slices et Dataset.map , en utilisant get_waveform_and_label défini précédemment. Vous créerez les ensembles de validation et de test à l'aide d'une procédure similaire ultérieurement.

In [13]:
AUTOTUNE = tf.data.AUTOTUNE

files_ds = tf.data.Dataset.from_tensor_slices(train_files)

waveform_ds = files_ds.map(
    map_func=get_waveform_and_label,
    num_parallel_calls=AUTOTUNE)
In [15]:
rows = 3
cols = 3
n = rows * cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 12))

for i, (audio, label) in enumerate(waveform_ds.take(n)):
  r = i // cols
  c = i % cols
  ax = axes[r][c]
  ax.plot(audio.numpy())
  ax.set_yticks(np.arange(-1.2, 1.2, 0.2))
  label = label.numpy().decode('utf-8')
  ax.set_title(label)

plt.show()
No description has been provided for this image

Traçons quelques formes d'onde audio :¶

In [16]:
def get_spectrogram(waveform):
  # Zero-padding for an audio waveform with less than 16,000 samples.
  input_len = 16000
  waveform = waveform[:input_len]
  zero_padding = tf.zeros(
      [16000] - tf.shape(waveform),
      dtype=tf.float32)
  # Cast the waveform tensors' dtype to float32.
  waveform = tf.cast(waveform, dtype=tf.float32)
  # Concatenate the waveform with `zero_padding`, which ensures all audio
  # clips are of the same length.
  equal_length = tf.concat([waveform, zero_padding], 0)
  # Convert the waveform to a spectrogram via a STFT.
  spectrogram = tf.signal.stft(
      equal_length, frame_length=255, frame_step=128)
  # Obtain the magnitude of the STFT.
  spectrogram = tf.abs(spectrogram)
  # Add a `channels` dimension, so that the spectrogram can be used
  # as image-like input data with convolution layers (which expect
  # shape (`batch_size`, `height`, `width`, `channels`).
  spectrogram = spectrogram[..., tf.newaxis]
  return spectrogram
In [17]:
for waveform, label in waveform_ds.take(1):
  label = label.numpy().decode('utf-8')
  spectrogram = get_spectrogram(waveform)

print('Label:', label)
print('Waveform shape:', waveform.shape)
print('Spectrogram shape:', spectrogram.shape)
print('Audio playback')
display.display(display.Audio(waveform, rate=16000))
Label: up
Waveform shape: (15153,)
Spectrogram shape: (124, 129, 1)
Audio playback
Your browser does not support the audio element.
In [18]:
def plot_spectrogram(spectrogram, ax):
  if len(spectrogram.shape) > 2:
    assert len(spectrogram.shape) == 3
    spectrogram = np.squeeze(spectrogram, axis=-1)
  # Convert the frequencies to log scale and transpose, so that the time is
  # represented on the x-axis (columns).
  # Add an epsilon to avoid taking a log of zero.
  log_spec = np.log(spectrogram.T + np.finfo(float).eps)
  height = log_spec.shape[0]
  width = log_spec.shape[1]
  X = np.linspace(0, np.size(spectrogram), num=width, dtype=int)
  Y = range(height)
  ax.pcolormesh(X, Y, log_spec)
In [19]:
fig, axes = plt.subplots(2, figsize=(12, 8))
timescale = np.arange(waveform.shape[0])
axes[0].plot(timescale, waveform.numpy())
axes[0].set_title('Waveform')
axes[0].set_xlim([0, 16000])

plot_spectrogram(spectrogram.numpy(), axes[1])
axes[1].set_title('Spectrogram')
plt.show()
No description has been provided for this image
In [20]:
def get_spectrogram_and_label_id(audio, label):
  spectrogram = get_spectrogram(audio)
  label_id = tf.argmax(label == commands)
  return spectrogram, label_id
In [21]:
spectrogram_ds = waveform_ds.map(
  map_func=get_spectrogram_and_label_id,
  num_parallel_calls=AUTOTUNE)
In [25]:
rows = 3
cols = 3
n = rows*cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 10))

for i, (spectrogram, label_id) in enumerate(spectrogram_ds.take(n)):
  r = i // cols
  c = i % cols
  ax = axes[r][c]
  plot_spectrogram(spectrogram.numpy(), ax)
  ax.set_title(commands[label_id.numpy()])
  ax.axis('off')

plt.show()
No description has been provided for this image
In [29]:
#print(spectrogram.numpy()[:10])
In [30]:
print(spectrogram.shape)
(124, 129, 1)

Arduino Spectrom FFT ¶

image.png

In [95]:
def im_stft(signal, frame_length=128, frame_step=64):
    #cast signal to np.int31
    signal=np.int32(signal)
    nbsamples=np.size(signal)
    nbsteps=np.int16((nbsamples-frame_length)/frame_step)
    spec_out=np.ndarray(shape=(nbsteps,np.int16(frame_length/2)), dtype=np.complex64)
    print(nbsamples, nbsteps,frame_length,frame_step)
    for i in np.arange(nbsteps):
        #com=np.fft.fft(signal[i*frame_step:i*frame_step+frame_length] ) #Approx_FFT(signal[i*frame_step:i*frame_step+frame_length],frame_length,1)
        #cc=out_r+1j*out_im
        spec_out[i]=imfft(signal[i*frame_step:i*frame_step+frame_length],frame_length)[:np.int16(frame_length/2)]
    ret=tf.convert_to_tensor (spec_out)
    return ret
        
    
In [96]:
def get_im_spectrogram(waveform):
  # Zero-padding for an audio waveform with less than 16,000 samples.
  input_len = 4000
  waveform = waveform[:input_len]
  zero_padding = tf.zeros(
      [4000] - tf.shape(waveform),
      dtype=tf.float32)
  # Cast the waveform tensors' dtype to float32.
  waveform = tf.cast(waveform, dtype=tf.float32)
  # Concatenate the waveform with `zero_padding`, which ensures all audio
  # clips are of the same length.
  equal_length = tf.concat([waveform, zero_padding], 0)
  # Convert the waveform to a spectrogram via a STFT.
  # A [..., frames, fft_unique_bins] Tensor of complex64/complex128 STFT values where fft_unique_bins is fft_length // 2 + 1 (the unique components of the FFT).
  spectrogram = im_stft(equal_length, frame_length=128, frame_step=64)
  # Obtain the magnitude of the STFT.
  spectrogram = tf.abs(spectrogram)
  # Add a `channels` dimension, so that the spectrogram can be used
  # as image-like input data with convolution layers (which expect
  # shape (`batch_size`, `height`, `width`, `channels`).
  spectrogram = spectrogram[..., tf.newaxis]
  return spectrogram
In [57]:
type(waveform)
Out[57]:
numpy.ndarray
In [65]:
waveform=np.fromfile('data/imds/avance/sd8.raw', dtype='uint8')
In [66]:
ww=waveform
print(ww.shape)
(4000,)
In [98]:
for waveform, label in waveform_ds.take(1):
  label = label.numpy().decode('utf-8')
  spectrogram = get_im_spectrogram(waveform)

print('Label:', label)
print('Waveform shape:', waveform.shape)
print('Spectrogram shape:', spectrogram.shape)
print('Audio playback')
display.display(display.Audio(waveform, rate=4000))
2022-03-28 16:45:16.051429: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at strided_slice_op.cc:108 : INVALID_ARGUMENT: Index out of range using input dim 0; input has only 0 dims
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/tmp/ipykernel_3251782/2707579568.py in <module>
      1 for waveform, label in waveform_ds.take(1):
      2   label = label.numpy().decode('utf-8')
----> 3   spectrogram = get_im_spectrogram(waveform)
      4 
      5 print('Label:', label)

/tmp/ipykernel_3251782/2567817311.py in get_im_spectrogram(waveform)
      2   # Zero-padding for an audio waveform with less than 16,000 samples.
      3   input_len = 4000
----> 4   waveform = waveform[:input_len]
      5   zero_padding = tf.zeros(
      6       [4000] - tf.shape(waveform),

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/util/traceback_utils.py in error_handler(*args, **kwargs)
    151     except Exception as e:
    152       filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153       raise e.with_traceback(filtered_tb) from None
    154     finally:
    155       del filtered_tb

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name)
   7184 def raise_from_not_ok_status(e, name):
   7185   e.message += (" name: " + name if name is not None else "")
-> 7186   raise core._status_to_exception(e) from None  # pylint: disable=protected-access
   7187 
   7188 

InvalidArgumentError: Index out of range using input dim 0; input has only 0 dims [Op:StridedSlice] name: strided_slice/
In [68]:
print(waveform.shape)
(4000,)
In [69]:
print(ww[12000:12100])
[]
In [70]:
def plot_spectrogram(spectrogram, ax):
  if len(spectrogram.shape) > 2:
    assert len(spectrogram.shape) == 3
    spectrogram = np.squeeze(spectrogram, axis=-1)
  # Convert the frequencies to log scale and transpose, so that the time is
  # represented on the x-axis (columns).
  # Add an epsilon to avoid taking a log of zero.
  log_spec = np.log(spectrogram.T + np.finfo(float).eps)
  height = log_spec.shape[0]
  width = log_spec.shape[1]
  X = np.linspace(0, np.size(spectrogram), num=width, dtype=int)
  Y = range(height)
  ax.pcolormesh(X, Y, log_spec)
In [71]:
fig, axes = plt.subplots(2, figsize=(12, 8))
timescale = np.arange(waveform.shape[0])
axes[0].plot(timescale, waveform)
axes[0].set_title('Waveform')
axes[0].set_xlim([0, 4000])

plot_spectrogram(spectrogram.numpy(), axes[1])
axes[1].set_title('Spectrogram')
plt.show()
No description has been provided for this image
In [55]:
#print(spectrogram.numpy()[:10])
#print(spectrogram.shape)

Maintenant, définissez une fonction qui transforme l'ensemble de données de forme d'onde en spectrogrammes et leurs étiquettes correspondantes en identifiants entiers :

In [33]:
def get_spectrogram_and_label_id(audio, label):
  type(audio)
  ww=audio*16000
  spectrogram = get_spectrogram(ww)
  label_id = tf.argmax(label == commands)
  return spectrogram, label_id
In [43]:
def get_spectrogram_and_label_id_im(audio, label):
  type(audio)
  ww=audio*16000
  spectrogram = get_im_spectrogram(tf.compat.v1.Session().run(ww))
  label_id = tf.argmax(label == commands)
  return spectrogram, label_id

get_spectrogram_and_label_id sur les éléments de l'ensemble de données avec Dataset.map :

In [35]:
spectrogram_ds = waveform_ds.map(
  map_func=get_spectrogram_and_label_id,
  num_parallel_calls=AUTOTUNE)
In [36]:
type(spectrogram_ds)
from tensorflow.python.ops.math_ops import reduce_prod
In [44]:
spectrogram_ds_im = waveform_ds.map(
  map_func=get_spectrogram_and_label_id_im,
  num_parallel_calls=AUTOTUNE)
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/tmp/ipykernel_3081950/4286887239.py in <module>
----> 1 spectrogram_ds_im = waveform_ds.map(
      2   map_func=get_spectrogram_and_label_id_im,
      3   num_parallel_calls=AUTOTUNE)

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in map(self, map_func, num_parallel_calls, deterministic, name)
   2016       return MapDataset(self, map_func, preserve_cardinality=True, name=name)
   2017     else:
-> 2018       return ParallelMapDataset(
   2019           self,
   2020           map_func,

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py in __init__(self, input_dataset, map_func, num_parallel_calls, deterministic, use_inter_op_parallelism, preserve_cardinality, use_legacy_function, name)
   5232     self._input_dataset = input_dataset
   5233     self._use_inter_op_parallelism = use_inter_op_parallelism
-> 5234     self._map_func = structured_function.StructuredFunctionWrapper(
   5235         map_func,
   5236         self._transformation_name(),

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py in __init__(self, func, transformation_name, dataset, input_classes, input_shapes, input_types, input_structure, add_to_graph, use_legacy_function, defun_kwargs)
    269         fn_factory = trace_tf_function(defun_kwargs)
    270 
--> 271     self._function = fn_factory()
    272     # There is no graph to add in eager mode.
    273     add_to_graph &= not context.executing_eagerly()

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py in get_concrete_function(self, *args, **kwargs)
   3068          or `tf.Tensor` or `tf.TensorSpec`.
   3069     """
-> 3070     graph_function = self._get_concrete_function_garbage_collected(
   3071         *args, **kwargs)
   3072     graph_function._garbage_collector.release()  # pylint: disable=protected-access

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _get_concrete_function_garbage_collected(self, *args, **kwargs)
   3034       args, kwargs = None, None
   3035     with self._lock:
-> 3036       graph_function, _ = self._maybe_define_function(args, kwargs)
   3037       seen_names = set()
   3038       captured = object_identity.ObjectIdentitySet(

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _maybe_define_function(self, args, kwargs)
   3290 
   3291           self._function_cache.add_call_context(cache_key.call_context)
-> 3292           graph_function = self._create_graph_function(args, kwargs)
   3293           self._function_cache.add(cache_key, cache_key_deletion_observer,
   3294                                    graph_function)

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   3128     arg_names = base_arg_names + missing_arg_names
   3129     graph_function = ConcreteFunction(
-> 3130         func_graph_module.func_graph_from_py_func(
   3131             self._name,
   3132             self._python_function,

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes, acd_record_initial_resource_uses)
   1159         _, original_func = tf_decorator.unwrap(python_func)
   1160 
-> 1161       func_outputs = python_func(*func_args, **func_kwargs)
   1162 
   1163       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py in wrapped_fn(*args)
    246           attributes=defun_kwargs)
    247       def wrapped_fn(*args):  # pylint: disable=missing-docstring
--> 248         ret = wrapper_helper(*args)
    249         ret = structure.to_tensor_list(self._output_structure, ret)
    250         return [ops.convert_to_tensor(t) for t in ret]

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py in wrapper_helper(*args)
    175       if not _should_unpack(nested_args):
    176         nested_args = (nested_args,)
--> 177       ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
    178       if _should_pack(ret):
    179         ret = tuple(ret)

~/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    690       except Exception as e:  # pylint:disable=broad-except
    691         if hasattr(e, 'ag_error_metadata'):
--> 692           raise e.ag_error_metadata.to_exception(e)
    693         else:
    694           raise

InvalidArgumentError: in user code:

    File "/tmp/ipykernel_3081950/832668025.py", line 4, in get_spectrogram_and_label_id_im  *
        spectrogram = get_im_spectrogram(tf.compat.v1.Session().run(ww))

    InvalidArgumentError: Graph execution error:
    
    Detected at node 'args_0' defined at (most recent call last):
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 194, in _run_module_as_main
          return _run_code(code, main_globals, None,
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 87, in _run_code
          exec(code, run_globals)
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
          app.launch_new_instance()
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
          app.start()
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start
          self.io_loop.start()
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
          self.asyncio_loop.run_forever()
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
          self._run_once()
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
          handle._run()
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/events.py", line 81, in _run
          self._context.run(self._callback, *self._args)
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue
          await self.process_one()
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 446, in process_one
          await dispatch(*args)
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell
          await result
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 648, in execute_request
          reply_content = await reply_content
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 353, in do_execute
          res = shell.run_cell(code, store_history=store_history, silent=silent)
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
          return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2901, in run_cell
          result = self._run_cell(
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2947, in _run_cell
          return runner(coro)
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
          coro.send(None)
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3172, in run_cell_async
          has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3364, in run_ast_nodes
          if (await self.run_code(code, result,  async_=asy)):
        File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code
          exec(code_obj, self.user_global_ns, self.user_ns)
        File "/tmp/ipykernel_3081950/4286887239.py", line 1, in <module>
          spectrogram_ds_im = waveform_ds.map(
    Node: 'args_0'
    You must feed a value for placeholder tensor 'args_0' with dtype float and shape [?]
    	 [[{{node args_0}}]]
    
    Original stack trace for 'args_0':
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 194, in _run_module_as_main
        return _run_code(code, main_globals, None,
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 87, in _run_code
        exec(code, run_globals)
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
        app.launch_new_instance()
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
        app.start()
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start
        self.io_loop.start()
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
        self.asyncio_loop.run_forever()
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
        self._run_once()
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
        handle._run()
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/events.py", line 81, in _run
        self._context.run(self._callback, *self._args)
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue
        await self.process_one()
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 446, in process_one
        await dispatch(*args)
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell
        await result
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 648, in execute_request
        reply_content = await reply_content
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 353, in do_execute
        res = shell.run_cell(code, store_history=store_history, silent=silent)
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
        return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2901, in run_cell
        result = self._run_cell(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2947, in _run_cell
        return runner(coro)
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
        coro.send(None)
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3172, in run_cell_async
        has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3364, in run_ast_nodes
        if (await self.run_code(code, result,  async_=asy)):
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code
        exec(code_obj, self.user_global_ns, self.user_ns)
      File "/tmp/ipykernel_3081950/4286887239.py", line 1, in <module>
        spectrogram_ds_im = waveform_ds.map(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2018, in map
        return ParallelMapDataset(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 5234, in __init__
        self._map_func = structured_function.StructuredFunctionWrapper(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py", line 271, in __init__
        self._function = fn_factory()
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3070, in get_concrete_function
        graph_function = self._get_concrete_function_garbage_collected(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3036, in _get_concrete_function_garbage_collected
        graph_function, _ = self._maybe_define_function(args, kwargs)
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3292, in _maybe_define_function
        graph_function = self._create_graph_function(args, kwargs)
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3130, in _create_graph_function
        func_graph_module.func_graph_from_py_func(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1075, in func_graph_from_py_func
        func_args = _get_defun_inputs_from_args(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1313, in _get_defun_inputs_from_args
        return _get_defun_inputs(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1386, in _get_defun_inputs
        placeholder = graph_placeholder(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/graph_only_ops.py", line 34, in graph_placeholder
        op = g._create_op_internal(  # pylint: disable=protected-access
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 693, in _create_op_internal
        return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 3776, in _create_op_internal
        ret = Operation(
      File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 2175, in __init__
        self._traceback = tf_stack.extract_stack_for_node(self._c_op)
    


Original stack trace for 'args_0':
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
    app.start()
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start
    self.io_loop.start()
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
    self._run_once()
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
    handle._run()
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/asyncio/events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 457, in dispatch_queue
    await self.process_one()
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 446, in process_one
    await dispatch(*args)
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 353, in dispatch_shell
    await result
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 648, in execute_request
    reply_content = await reply_content
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 353, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2901, in run_cell
    result = self._run_cell(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2947, in _run_cell
    return runner(coro)
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3172, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3364, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3444, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_3081950/4286887239.py", line 1, in <module>
    spectrogram_ds_im = waveform_ds.map(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 2018, in map
    return ParallelMapDataset(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 5234, in __init__
    self._map_func = structured_function.StructuredFunctionWrapper(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/data/ops/structured_function.py", line 271, in __init__
    self._function = fn_factory()
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3070, in get_concrete_function
    graph_function = self._get_concrete_function_garbage_collected(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3036, in _get_concrete_function_garbage_collected
    graph_function, _ = self._maybe_define_function(args, kwargs)
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3292, in _maybe_define_function
    graph_function = self._create_graph_function(args, kwargs)
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3130, in _create_graph_function
    func_graph_module.func_graph_from_py_func(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1075, in func_graph_from_py_func
    func_args = _get_defun_inputs_from_args(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1313, in _get_defun_inputs_from_args
    return _get_defun_inputs(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 1386, in _get_defun_inputs
    placeholder = graph_placeholder(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/eager/graph_only_ops.py", line 34, in graph_placeholder
    op = g._create_op_internal(  # pylint: disable=protected-access
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 693, in _create_op_internal
    return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 3776, in _create_op_internal
    ret = Operation(
  File "/feynman/home/dedip/lis/im267926/.conda/envs/ime/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 2175, in __init__
    self._traceback = tf_stack.extract_stack_for_node(self._c_op)
In [ ]:
for waveform, label in waveform_ds:
  ww=waveform*16000
  label = label.numpy().decode('utf-8')
  spectrogram = get_im_spectrogram(ww)
  spectrogram_ds_im
In [58]:
np.__version__
Out[58]:
'1.22.3'

Examinez les spectrogrammes pour différents exemples de l'ensemble de données :

In [219]:
rows = 3
cols = 3
n = rows*cols
fig, axes = plt.subplots(rows, cols, figsize=(10, 10))

for i, (spectrogram, label_id) in enumerate(spectrogram_ds.take(n)):
  r = i // cols
  c = i % cols
  ax = axes[r][c]
  plot_spectrogram(spectrogram.numpy(), ax)
  ax.set_title(commands[label_id.numpy()])
  ax.axis('off')

plt.show()
No description has been provided for this image

Construire et entraîner le modèle¶

Répétez le prétraitement de l'ensemble d'entraînement sur les ensembles de validation et de test :

In [311]:
def preprocess_dataset(files):
  files_ds = tf.data.Dataset.from_tensor_slices(files)
  output_ds = files_ds.map(
      map_func=get_waveform_and_label,
      num_parallel_calls=AUTOTUNE)
  output_ds = output_ds.map(
      map_func=get_spectrogram_and_label_id,
      num_parallel_calls=AUTOTUNE)
  return output_ds
In [312]:
train_ds = spectrogram_ds
val_ds = preprocess_dataset(val_files)
test_ds = preprocess_dataset(test_files)

Regroupez les ensembles d'entraînement et de validation pour l'entraînement du modèle :

In [313]:
batch_size = 64
train_ds = train_ds.batch(batch_size)
val_ds = val_ds.batch(batch_size)

Ajoutez les opérations Dataset.cache et Dataset.prefetch pour réduire la latence de lecture lors de l'entraînement du modèle :

In [314]:
train_ds = train_ds.cache().prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)

Pour le modèle, vous utiliserez un simple réseau de neurones à convolution (CNN), puisque vous avez transformé les fichiers audio en images de spectrogramme.

Votre modèle tf.keras.Sequential utilisera les couches de prétraitement Keras suivantes :

tf.keras.layers.Resizing : pour sous-échantillonner l'entrée afin de permettre au modèle de s'entraîner plus rapidement. tf.keras.layers.Normalization : pour normaliser chaque pixel de l'image en fonction de sa moyenne et de son écart type. Pour la couche de Normalization , sa méthode adapt devrait d'abord être appelée sur les données d'apprentissage afin de calculer des statistiques agrégées (c'est-à-dire la moyenne et l'écart type).

In [315]:
for spectrogram, _ in spectrogram_ds.take(1):
  input_shape = spectrogram.shape
print('Input shape:', input_shape)
num_labels = len(commands)

# Instantiate the `tf.keras.layers.Normalization` layer.
norm_layer = layers.Normalization()
# Fit the state of the layer to the spectrograms
# with `Normalization.adapt`.
norm_layer.adapt(data=spectrogram_ds.map(map_func=lambda spec, label: spec))

model = models.Sequential([
    layers.Input(shape=input_shape),
    # Downsample the input.
    layers.Resizing(32, 32),
    # Normalize.
    norm_layer,
    layers.Conv2D(32, 3, activation='relu'),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_labels),
])

model.summary()
Input shape: (124, 129, 1)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 resizing (Resizing)         (None, 32, 32, 1)         0         
                                                                 
 normalization (Normalizatio  (None, 32, 32, 1)        3         
 n)                                                              
                                                                 
 conv2d (Conv2D)             (None, 30, 30, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 14, 14, 64)        0         
                                                                 
 flatten (Flatten)           (None, 12544)             0         
                                                                 
 dense (Dense)               (None, 128)               1605760   
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 8)                 1032      
                                                                 
=================================================================
Total params: 1,625,611
Trainable params: 1,625,608
Non-trainable params: 3
_________________________________________________________________

Configurez le modèle Keras avec l'optimiseur Adam et la perte d'entropie croisée :

In [316]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'],
)
In [317]:
EPOCHS = 10
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2),
)
Epoch 1/10
100/100 [==============================] - 122s 867ms/step - loss: 1.7344 - accuracy: 0.3734 - val_loss: 1.3217 - val_accuracy: 0.5550
Epoch 2/10
100/100 [==============================] - 3s 31ms/step - loss: 1.1920 - accuracy: 0.5728 - val_loss: 0.9821 - val_accuracy: 0.6762
Epoch 3/10
100/100 [==============================] - 3s 31ms/step - loss: 0.8940 - accuracy: 0.6831 - val_loss: 0.8034 - val_accuracy: 0.7275
Epoch 4/10
100/100 [==============================] - 3s 31ms/step - loss: 0.7548 - accuracy: 0.7280 - val_loss: 0.7289 - val_accuracy: 0.7425
Epoch 5/10
100/100 [==============================] - 3s 31ms/step - loss: 0.6399 - accuracy: 0.7719 - val_loss: 0.6883 - val_accuracy: 0.7588
Epoch 6/10
100/100 [==============================] - 3s 32ms/step - loss: 0.5737 - accuracy: 0.7920 - val_loss: 0.6596 - val_accuracy: 0.7925
Epoch 7/10
100/100 [==============================] - 3s 34ms/step - loss: 0.5105 - accuracy: 0.8189 - val_loss: 0.6418 - val_accuracy: 0.7875
Epoch 8/10
100/100 [==============================] - 3s 31ms/step - loss: 0.4574 - accuracy: 0.8355 - val_loss: 0.6458 - val_accuracy: 0.7950
Epoch 9/10
100/100 [==============================] - 3s 33ms/step - loss: 0.4079 - accuracy: 0.8567 - val_loss: 0.6581 - val_accuracy: 0.7912
Epoch 9: early stopping
In [318]:
metrics = history.history
plt.plot(history.epoch, metrics['loss'], metrics['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.show()
No description has been provided for this image

Évaluer les performances du modèle ¶

In [319]:
test_audio = []
test_labels = []

for audio, label in test_ds:
  test_audio.append(audio.numpy())
  test_labels.append(label.numpy())

test_audio = np.array(test_audio)
test_labels = np.array(test_labels)

y_pred = np.argmax(model.predict(test_audio), axis=1)
y_true = test_labels

test_acc = sum(y_pred == y_true) / len(y_true)
print(f'Test set accuracy: {test_acc:.0%}')
Test set accuracy: 85%
In [28]:
## Afficher une matrice de confusion
In [320]:
confusion_mtx = tf.math.confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_mtx,
            xticklabels=commands,
            yticklabels=commands,
            annot=True, fmt='g')
plt.xlabel('Prediction')
plt.ylabel('Label')
plt.show()
No description has been provided for this image

Exécuter l'inférence sur un fichier audio¶

In [321]:
sample_file = data_dir/'go/0132a06d_nohash_2.wav'

sample_ds = preprocess_dataset([str(sample_file)])

for spectrogram, label in sample_ds.batch(1):
  prediction = model(spectrogram)
  plt.bar(commands, tf.nn.softmax(prediction[0]))
  plt.title(f'Predictions for "{commands[label[0]]}"')
  plt.show()
No description has been provided for this image

Generate a TensorFlow Lite for Microcontrollers Model ¶

Convert the TensorFlow Lite quantized model into a C source file that can be loaded by TensorFlow Lite for Microcontrollers.

In [ ]:
# Install xxd if it is not available
#!apt-get update && apt-get -qq install xxd
# Convert to a C source file, i.e, a TensorFlow Lite for Microcontrollers model
!xxd -i {MODEL_TFLITE} > {MODEL_TFLITE_MICRO}
# Update variable names
REPLACE_TEXT = MODEL_TFLITE.replace('/', '_').replace('.', '_')
!sed -i 's/'{REPLACE_TEXT}'/g_model/g' {MODEL_TFLITE_MICRO}
In [35]:
MODEL_TFLITE_MICRO='model.cc'
In [36]:
from tinymlgen import port
with open(MODEL_TFLITE_MICRO, 'w') as f:  # change path if needed
    f.write(port(model, optimize=False))
2022-03-21 09:26:34.532575: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
INFO:tensorflow:Assets written to: /tmp/tmpuc_tky7e/assets
2022-03-21 09:26:35.258027: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format.
WARNING:absl:Buffer deduplication procedure will be skipped when flatbuffer library is not properly loaded
2022-03-21 09:26:35.258075: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.
In [37]:
model.save('sound01.model')
INFO:tensorflow:Assets written to: sound01.model/assets
INFO:tensorflow:Assets written to: sound01.model/assets
In [39]:
new_model = tf.keras.models.load_model('sound01.model')

# Check its architecture
new_model.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 resizing (Resizing)         (None, 32, 32, 1)         0         
                                                                 
 normalization (Normalizatio  (None, 32, 32, 1)        3         
 n)                                                              
                                                                 
 conv2d (Conv2D)             (None, 30, 30, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 14, 14, 64)        0         
                                                                 
 flatten (Flatten)           (None, 12544)             0         
                                                                 
 dense (Dense)               (None, 128)               1605760   
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 8)                 1032      
                                                                 
=================================================================
Total params: 1,625,611
Trainable params: 1,625,608
Non-trainable params: 3
_________________________________________________________________
In [ ]:
# Save the entire model to a HDF5 file.
# The '.h5' extension indicates that the model should be saved to HDF5.
model.save('my_model.h5')