3.6.1. fft.hpp#

3.6.1.1. Source code#

  1// University of Florida EEL6528
  2// Tan F. Wong
  3// Feb 2, 2021
  4
  5#pragma once
  6
  7#include <complex>
  8#include <fftw3.h>
  9#ifdef USE_VOLK
 10#include <volk/volk.h>
 11#endif
 12
 13// 1-d, single-precision FFT class
 14// All samples in the arrays in, out are contiguous
 15// out_nsteps >= fftsize
 16// Use multi-threading in FFTW3
 17class fft{
 18  private:
 19    fftwf_plan plan; // FFT plan
 20    int nthreads;   // Number of threads
 21    int fftsize;    // FFT size
 22    int nblocks;    // Number of FFT blocks to calculate along a row
 23    int in_nsteps;  // How many samples to next block in array in
 24    int out_nsteps; // How many samples to next block in array out
 25    int U_D;          // # of rows in 2-d array = Up/Down-sampling factor
 26    bool inverse;   // FFT = false, IFFT = true
 27    std::complex<float>* in;      // Pointer to input array
 28    std::complex<float>* out;     // Pointer to output array
 29    std::complex<float>* ptr_in;  // Pointer to hold if want to allocate memory for input array
 30    float one_over_fftsize; // 1/fftsize
 31    std::complex<float> one_over_fftsize_c; // 1/fftsize complex value
 32 
 33  public:
 34    // Constructor for calculating multiple FFTs along a 1-d array
 35    fft(int nt, int n, int nblks, int in_ns, int out_ns, bool inv=false, std::complex<float>* const pin=NULL) {
 36        // Set class variables
 37        nthreads = nt;
 38        fftsize = n;
 39        one_over_fftsize = 1.0f/fftsize;
 40        one_over_fftsize_c =  std::complex<float>(one_over_fftsize, 0.0f);
 41        nblocks = nblks;
 42        in_nsteps = in_ns;
 43        out_nsteps = std::max(n, out_ns);
 44        U_D = 1;
 45        inverse = inv;
 46        ptr_in = pin;
 47        // Allocate input array
 48        if (ptr_in==NULL) 
 49            // a null ptr tells fft to allocate memory to input array
 50            in = (std::complex<float>*) fftwf_malloc(nblocks*std::max(in_nsteps,fftsize)*sizeof(std::complex<float>));
 51        else
 52            // a non-null ptr_in tells fft not to allocate memory to input array
 53            in = ptr_in;
 54        // Allocate output array
 55        out = (std::complex<float>*) fftwf_malloc(nblocks*out_nsteps*sizeof(std::complex<float>));
 56        // Initialize FFTW3 multi-threaded implementation
 57        fftwf_init_threads();
 58        fftwf_plan_with_nthreads(nthreads);
 59        // Instantiate FFT plan using FFTW3's advanced interface 
 60        plan = fftwf_plan_many_dft(1, &fftsize, nblocks, 
 61                                  reinterpret_cast<fftwf_complex *>(in), 
 62                                  NULL, 1, in_nsteps, 
 63                                  reinterpret_cast<fftwf_complex *>(out), 
 64                                  NULL, 1, out_nsteps, 
 65                                  inverse ? FFTW_BACKWARD : FFTW_FORWARD, 
 66                                  FFTW_MEASURE);
 67    }
 68    // Constructor calculating multiple FFTs along each row of a 2-d array
 69    // for use in polyphase filtering
 70    // Assume input array arranged with signals in uord rows 
 71    // in_ns and out_ns correspond to the number of samples along each row
 72    // nblks also indicates the number of FFT block per row
 73    fft(int nt, int n, int nblks, int in_ns, int out_ns, int uord, bool inv=false, std::complex<float>* const pin=NULL) {
 74        // Set class variables
 75        nthreads = nt;
 76        U_D = uord;
 77        fftsize = n;
 78        one_over_fftsize = 1.0f/fftsize;
 79        one_over_fftsize_c =  std::complex<float>(one_over_fftsize, 0.0f);
 80        nblocks = nblks;
 81        in_nsteps = in_ns;
 82        out_nsteps = std::max(n, out_ns);
 83        inverse = inv;
 84        ptr_in = pin;
 85        // Allocate input array
 86        if (ptr_in==NULL) 
 87            // a null ptr tells fft to allocate memory to input array
 88            in = (std::complex<float>*) fftwf_malloc(nblocks*U_D*std::max(in_nsteps,fftsize)*sizeof(std::complex<float>));
 89        else
 90            // a non-null ptr_in tells fft not to allocate memory to input array
 91            in = ptr_in;
 92        // Allocate output array
 93        out = (std::complex<float>*) fftwf_malloc(nblocks*U_D*out_nsteps*sizeof(std::complex<float>));
 94        // Calculate parameters for multi-threaded implementation
 95        // Initialize FFTW3 multi-threaded implementation
 96        fftwf_init_threads();
 97        fftwf_plan_with_nthreads(nthreads);
 98        // InstantiateForward FFT plan using FFTW3's guru interface
 99        // Set up fft and howmany dimensions
100        fftwf_iodim fft_dim = {fftsize, 1, 1};
101        fftwf_iodim howmany_dim[2]; // 3d arrays for input and output
102        // This is to match the 1d array of input
103        howmany_dim[1] = {nblocks, in_nsteps, out_nsteps}; 
104        howmany_dim[0] = {U_D, nblocks*fftsize, nblocks*fftsize};
105        plan = fftwf_plan_guru_dft(1, &fft_dim, 2, howmany_dim,
106                                  reinterpret_cast<fftwf_complex *>(in), 
107                                  reinterpret_cast<fftwf_complex *>(out), 
108                                  inverse ? FFTW_BACKWARD : FFTW_FORWARD, 
109                                  FFTW_MEASURE);
110    }
111    // Destructor
112    ~fft(void) {
113        if (ptr_in==NULL) fftwf_free(in);
114        fftwf_free(out);
115        fftwf_destroy_plan(plan);
116    }
117    // Parameter getters
118    int get_fftsize(void) {
119        return fftsize;
120    }
121    int get_nblocks(void) {
122        return nblocks;
123    }
124    int get_in_nsteps() {
125        return in_nsteps;
126    }
127    int get_out_nsteps(void) {
128        return out_nsteps;
129    }
130    std::complex<float>* get_in(void) {
131        return in;
132    }
133    std::complex<float>* get_out(void) {
134        return out;
135    }
136    bool is_inverse(void) {
137        return inverse;
138    }
139    // Clean up
140    static void cleanup(void) {
141        fftwf_cleanup();
142        fftwf_cleanup_threads();
143    }
144    // Calculate FFT/IFFT
145    void calculate(void) {
146        // Call FFTW3 to Calculate FFT/IFFT
147        fftwf_execute(plan);
148        // If IFFT, divide by fftsize
149        if (inverse) {
150            for (int i=0; i<U_D*nblocks; i++) {
151                unsigned idx = i*out_nsteps;
152#ifdef USE_VOLK
153                volk_32fc_s32fc_multiply_32fc(out+idx, out+idx, one_over_fftsize_c, fftsize);
154#else
155                for (int j=0; j<fftsize; j++) {
156                    out[idx++] *= one_over_fftsize;
157                }
158#endif
159            }
160        }
161    }
162};
  • The fft class defined above uses the FFTW3 advanced interface fftwf_plan_many_dft to create a plan to calculate multiple single-precision, 1-d FFTs along a 1-d array of samples and the guru interface fftwf_plan_guru_dft to create a plan to calculate multiple single-precision, 1-d FFTs along each row of a 2-d array of samples, using potentially multiple threads.

  • Both fftwf_plan_many_dft and fftwf_plan_guru_dft are actually more flexible that the way they are used here. Please refer to [2] for more usage information.

  • Can define the compile flag USE_VOLK to use the VOLK library for SIMD-accelerated arithmetic kernels such that volk_32fc_s32fc_multiply_32fc().

  • The internal multi-threaded implementation of FFTW is used.

3.6.1.2. Usage#

  • 1-d constructor:

    fft(int nthreads, int fftsize, int nblocks, int in_nsteps, int out_nsteps, bool inverse=false, std::complex<float>* const in_ptr=NULL) 
    
  • 2-d constructor:

    fft(int nthreads, int fftsize, int nblocks, int in_nsteps, int out_nsteps, int nrows, bool inverse=false, std::complex<float>* const in_ptr=NULL) 
    
    • nthreads: number of threads to use

    • fftsize : size of FFT/IFFT

    • nblocks: number of FFTs/IFFTs to calculate along a row

    • in_nsteps: number of samples to step to the next block along a row in the input array

    • out_nsteps: number of samples to step to the next block along a row in the output array

    • nrows: number of rows in the 2-d array

    • inverse: FFT = false , IFFT = true (default = false)

    • in_ptr: pointer to the input array if non-NULL, internally allocate memory for the input array if NULL (default = NULL)

  • The restriction that out_nsteps must be at least as large as fftsize is imposed while in_nsteps may be smaller than fftsize.

  • The methods fft::get_in() and fft::get_out() can be used to obtain the pointers to the input and output arrays. The elements of both arrays should be of type std::complex<float>. The numbers of elements allocated for the input and output arrays are nblocks\(\cdot \max(\)fftsize\(,\) in_nsteps\()\) and nblocks\(\cdot \max(\)fftsize\(,\) out_nsteps\()\), respectively.

  • For nice termination, call the method fft::cleanup() before your main() exits.

  • For usage examples of the 1-d constructor, see test_fft.cpp.

  • For usage examples of the 2-d constructor, see filters.cpp.