3.6.1. fft.hpp
#
3.6.1.1. Source code#
1// University of Florida EEL6528
2// Tan F. Wong
3// Feb 2, 2021
4
5#pragma once
6
7#include <complex>
8#include <fftw3.h>
9#ifdef USE_VOLK
10#include <volk/volk.h>
11#endif
12
13// 1-d, single-precision FFT class
14// All samples in the arrays in, out are contiguous
15// out_nsteps >= fftsize
16// Use multi-threading in FFTW3
17class fft{
18 private:
19 fftwf_plan plan; // FFT plan
20 int nthreads; // Number of threads
21 int fftsize; // FFT size
22 int nblocks; // Number of FFT blocks to calculate along a row
23 int in_nsteps; // How many samples to next block in array in
24 int out_nsteps; // How many samples to next block in array out
25 int U_D; // # of rows in 2-d array = Up/Down-sampling factor
26 bool inverse; // FFT = false, IFFT = true
27 std::complex<float>* in; // Pointer to input array
28 std::complex<float>* out; // Pointer to output array
29 std::complex<float>* ptr_in; // Pointer to hold if want to allocate memory for input array
30 float one_over_fftsize; // 1/fftsize
31 std::complex<float> one_over_fftsize_c; // 1/fftsize complex value
32
33 public:
34 // Constructor for calculating multiple FFTs along a 1-d array
35 fft(int nt, int n, int nblks, int in_ns, int out_ns, bool inv=false, std::complex<float>* const pin=NULL) {
36 // Set class variables
37 nthreads = nt;
38 fftsize = n;
39 one_over_fftsize = 1.0f/fftsize;
40 one_over_fftsize_c = std::complex<float>(one_over_fftsize, 0.0f);
41 nblocks = nblks;
42 in_nsteps = in_ns;
43 out_nsteps = std::max(n, out_ns);
44 U_D = 1;
45 inverse = inv;
46 ptr_in = pin;
47 // Allocate input array
48 if (ptr_in==NULL)
49 // a null ptr tells fft to allocate memory to input array
50 in = (std::complex<float>*) fftwf_malloc(nblocks*std::max(in_nsteps,fftsize)*sizeof(std::complex<float>));
51 else
52 // a non-null ptr_in tells fft not to allocate memory to input array
53 in = ptr_in;
54 // Allocate output array
55 out = (std::complex<float>*) fftwf_malloc(nblocks*out_nsteps*sizeof(std::complex<float>));
56 // Initialize FFTW3 multi-threaded implementation
57 fftwf_init_threads();
58 fftwf_plan_with_nthreads(nthreads);
59 // Instantiate FFT plan using FFTW3's advanced interface
60 plan = fftwf_plan_many_dft(1, &fftsize, nblocks,
61 reinterpret_cast<fftwf_complex *>(in),
62 NULL, 1, in_nsteps,
63 reinterpret_cast<fftwf_complex *>(out),
64 NULL, 1, out_nsteps,
65 inverse ? FFTW_BACKWARD : FFTW_FORWARD,
66 FFTW_MEASURE);
67 }
68 // Constructor calculating multiple FFTs along each row of a 2-d array
69 // for use in polyphase filtering
70 // Assume input array arranged with signals in uord rows
71 // in_ns and out_ns correspond to the number of samples along each row
72 // nblks also indicates the number of FFT block per row
73 fft(int nt, int n, int nblks, int in_ns, int out_ns, int uord, bool inv=false, std::complex<float>* const pin=NULL) {
74 // Set class variables
75 nthreads = nt;
76 U_D = uord;
77 fftsize = n;
78 one_over_fftsize = 1.0f/fftsize;
79 one_over_fftsize_c = std::complex<float>(one_over_fftsize, 0.0f);
80 nblocks = nblks;
81 in_nsteps = in_ns;
82 out_nsteps = std::max(n, out_ns);
83 inverse = inv;
84 ptr_in = pin;
85 // Allocate input array
86 if (ptr_in==NULL)
87 // a null ptr tells fft to allocate memory to input array
88 in = (std::complex<float>*) fftwf_malloc(nblocks*U_D*std::max(in_nsteps,fftsize)*sizeof(std::complex<float>));
89 else
90 // a non-null ptr_in tells fft not to allocate memory to input array
91 in = ptr_in;
92 // Allocate output array
93 out = (std::complex<float>*) fftwf_malloc(nblocks*U_D*out_nsteps*sizeof(std::complex<float>));
94 // Calculate parameters for multi-threaded implementation
95 // Initialize FFTW3 multi-threaded implementation
96 fftwf_init_threads();
97 fftwf_plan_with_nthreads(nthreads);
98 // InstantiateForward FFT plan using FFTW3's guru interface
99 // Set up fft and howmany dimensions
100 fftwf_iodim fft_dim = {fftsize, 1, 1};
101 fftwf_iodim howmany_dim[2]; // 3d arrays for input and output
102 // This is to match the 1d array of input
103 howmany_dim[1] = {nblocks, in_nsteps, out_nsteps};
104 howmany_dim[0] = {U_D, nblocks*fftsize, nblocks*fftsize};
105 plan = fftwf_plan_guru_dft(1, &fft_dim, 2, howmany_dim,
106 reinterpret_cast<fftwf_complex *>(in),
107 reinterpret_cast<fftwf_complex *>(out),
108 inverse ? FFTW_BACKWARD : FFTW_FORWARD,
109 FFTW_MEASURE);
110 }
111 // Destructor
112 ~fft(void) {
113 if (ptr_in==NULL) fftwf_free(in);
114 fftwf_free(out);
115 fftwf_destroy_plan(plan);
116 }
117 // Parameter getters
118 int get_fftsize(void) {
119 return fftsize;
120 }
121 int get_nblocks(void) {
122 return nblocks;
123 }
124 int get_in_nsteps() {
125 return in_nsteps;
126 }
127 int get_out_nsteps(void) {
128 return out_nsteps;
129 }
130 std::complex<float>* get_in(void) {
131 return in;
132 }
133 std::complex<float>* get_out(void) {
134 return out;
135 }
136 bool is_inverse(void) {
137 return inverse;
138 }
139 // Clean up
140 static void cleanup(void) {
141 fftwf_cleanup();
142 fftwf_cleanup_threads();
143 }
144 // Calculate FFT/IFFT
145 void calculate(void) {
146 // Call FFTW3 to Calculate FFT/IFFT
147 fftwf_execute(plan);
148 // If IFFT, divide by fftsize
149 if (inverse) {
150 for (int i=0; i<U_D*nblocks; i++) {
151 unsigned idx = i*out_nsteps;
152#ifdef USE_VOLK
153 volk_32fc_s32fc_multiply_32fc(out+idx, out+idx, one_over_fftsize_c, fftsize);
154#else
155 for (int j=0; j<fftsize; j++) {
156 out[idx++] *= one_over_fftsize;
157 }
158#endif
159 }
160 }
161 }
162};
The
fft
class defined above uses the FFTW3 advanced interfacefftwf_plan_many_dft
to create a plan to calculate multiple single-precision, 1-d FFTs along a 1-d array of samples and the guru interfacefftwf_plan_guru_dft
to create a plan to calculate multiple single-precision, 1-d FFTs along each row of a 2-d array of samples, using potentially multiple threads.Both
fftwf_plan_many_dft
andfftwf_plan_guru_dft
are actually more flexible that the way they are used here. Please refer to [2] for more usage information.Can define the compile flag
USE_VOLK
to use the VOLK library for SIMD-accelerated arithmetic kernels such thatvolk_32fc_s32fc_multiply_32fc()
.The internal multi-threaded implementation of FFTW is used.
3.6.1.2. Usage#
1-d constructor:
fft(int nthreads, int fftsize, int nblocks, int in_nsteps, int out_nsteps, bool inverse=false, std::complex<float>* const in_ptr=NULL)
2-d constructor:
fft(int nthreads, int fftsize, int nblocks, int in_nsteps, int out_nsteps, int nrows, bool inverse=false, std::complex<float>* const in_ptr=NULL)
nthreads
: number of threads to usefftsize
: size of FFT/IFFTnblocks
: number of FFTs/IFFTs to calculate along a rowin_nsteps
: number of samples to step to the next block along a row in the input arrayout_nsteps
: number of samples to step to the next block along a row in the output arraynrows
: number of rows in the 2-d arrayinverse
: FFT =false
, IFFT =true
(default =false
)in_ptr
: pointer to the input array if non-NULL
, internally allocate memory for the input array ifNULL
(default =NULL
)
The restriction that
out_nsteps
must be at least as large asfftsize
is imposed whilein_nsteps
may be smaller thanfftsize
.The methods
fft::get_in()
andfft::get_out()
can be used to obtain the pointers to the input and output arrays. The elements of both arrays should be of typestd::complex<float>
. The numbers of elements allocated for the input and output arrays arenblocks
\(\cdot \max(\)fftsize
\(,\)in_nsteps
\()\) andnblocks
\(\cdot \max(\)fftsize
\(,\)out_nsteps
\()\), respectively.For nice termination, call the method
fft::cleanup()
before yourmain()
exits.For usage examples of the 1-d constructor, see
test_fft.cpp
.For usage examples of the 2-d constructor, see
filters.cpp
.