3.6.5. test_filters.cpp#

3.6.5.1. Source code#

  1// University of Florida EEL6528
  2// Tan F. Wong
  3// Feb 2, 2021
  4
  5#include <uhd/utils/safe_main.hpp>
  6#include <uhd/utils/thread.hpp>
  7#include <boost/program_options.hpp>
  8#include <csignal>
  9#include <chrono>
 10#include <thread>
 11#include "filters.hpp"
 12
 13namespace po = boost::program_options;
 14namespace clk = std::chrono;
 15
 16bool stop_signal_called = false;
 17void sig_int_handler(int){
 18    stop_signal_called = true;
 19}
 20
 21int UHD_SAFE_MAIN(int argc, char *argv[]){
 22
 23    //variables to be set by po
 24    int U, D, nthreads, in_len, h_len;
 25    float freq;
 26 
 27    //setup the program options
 28    po::options_description desc("Allowed options");
 29    desc.add_options()
 30        ("help", "help message")
 31        ("freq,f", po::value<float>(&freq)->default_value(1.0/32), "Use a low-freq sinusoid as input instead of random input")
 32        ("D,D", po::value<int>(&D)->default_value(1), "Down-sampling factor")
 33        ("U,U", po::value<int>(&U)->default_value(1), "Up-sampling factor")
 34        ("in_len,i", po::value<int>(&in_len)->default_value(1000000), "input signal length")
 35        ("h_len,h", po::value<int>(&h_len)->default_value(64), "Single-rate filter length (multi-rate/polyphase filter length = 4*U*h_len, where h_len = a power of 2)")
 36        ("nthreads,t", po::value<int>(&nthreads)->default_value(1), "Number of FFT threads to use")
 37        ;
 38 
 39    po::variables_map vm;
 40    po::store(po::parse_command_line(argc, argv, desc), vm);
 41    po::notify(vm);
 42 
 43    //print the help message
 44    if (vm.count("help")){
 45        std::cout << desc << std::endl;
 46        return EXIT_SUCCESS;
 47    }
 48
 49    bool sine = false;
 50    if (vm.count("sine")) sine = true;
 51  
 52    std::signal(SIGINT, &sig_int_handler);
 53
 54    // Define filter1's impulse response
 55    int h1_len = h_len;
 56    std::complex<float> h1[h1_len];
 57    int delay1 = 2;
 58    h1[delay1] = 1.0;
 59
 60    // Filter2 and 3 are the same lowpass filter 
 61    // This filter design doesn't work too well when D is not a power of 2
 62    int maxUD = (U>D)?U:D;
 63    int h2_len = h1_len*U;
 64    std::complex<float> h2[h2_len];
 65    fft invfft(1, h2_len, 1, h2_len, h2_len, true);
 66    std::complex<float>* H2 = invfft.get_in();
 67    H2[0] = 1.0; 
 68    for (int i=1; i<h2_len/2/maxUD; i++) {
 69        H2[i] = H2[0];
 70        H2[h2_len-i] = H2[0];
 71    }
 72    for (int i=h2_len/2/maxUD; i<=h2_len-h2_len/2/maxUD; i++) {
 73        H2[i] = 0.0;
 74    }
 75    invfft.calculate();
 76    std::copy(invfft.get_out(),invfft.get_out()+h2_len/2, h2+h2_len/2);
 77    std::copy(invfft.get_out()+h2_len/2,invfft.get_out()+h2_len, h2);
 78    std::complex<float> h3[h2_len];
 79    for (int i=0; i<h2_len; i++) {
 80        h3[i] = h2[i]*float(D)*float(U);
 81    }
 82    int delay23 = h2_len/U;
 83    // Additional delay of 2*D caused by polyphase implementation in
 84    // filters 4 and 5
 85    int delay45 = h2_len/U + 2*D; 
 86
 87    // Construct single-rate OverlapSave filter object
 88    FilterOverlapSave filter1(in_len, h1_len, h1, nthreads);
 89
 90    // Construct multi-rate OverlapAdd filter objects
 91    FilterOverlapSave filter2(U, D, in_len, h2_len, h2, nthreads);
 92    FilterOverlapSave filter3(D, U, filter2.out_len(), h2_len, h3, nthreads);
 93
 94    // Construct polyphase OverlapSave filter objects
 95    FilterPolyphase filter4(U, D, in_len, h2_len, h2, nthreads);
 96    FilterPolyphase filter5(D, U, filter4.out_len(), h2_len, h3, nthreads);
 97
 98    // Construct in and out arrays
 99    int out1_len = filter1.out_len(); // Output length for filter1
100    int out2_len = filter2.out_len(); // Output length for filter2
101    int out3_len = filter3.out_len(); // Output length for filter3
102    int out4_len = filter4.out_len(); // Output length for filter4
103    int out5_len = filter5.out_len(); // Output length for filter5
104    std::complex<float>* in = new std::complex<float>[in_len]();
105    std::complex<float>* out1 = new std::complex<float>[out1_len]();
106    std::complex<float>* out2 = new std::complex<float>[out2_len]();
107    std::complex<float>* out3 = new std::complex<float>[out3_len]();
108    std::complex<float>* out4 = new std::complex<float>[out4_len]();
109    std::complex<float>* out5 = new std::complex<float>[out5_len]();
110
111    // Initial timing statistics
112    double avg_filter1_time = 0.0;
113    double avg_filter2_time = 0.0;
114    double avg_filter3_time = 0.0;
115    double avg_filter4_time = 0.0;
116    double avg_filter5_time = 0.0;
117    unsigned count = 0;
118    unsigned long icont = 0;
119    while (not stop_signal_called) {
120        for (int i=0; i<in_len; i++) {
121            // complex-valued sinusoid input 
122            in[i] = std::complex<float>(cos(2*M_PI*(i+icont)*freq), sin(2*M_PI*(i+icont)*freq));
123        }
124        
125        // Test single-rate overlap-save algorithm using filter1
126        clk::steady_clock::time_point start_filter1 = clk::steady_clock::now();
127        bool reset = (icont==0);
128        filter1.set_head(reset); // clear head of input to filter1
129        filter1.filter(in, out1); // Do filtering
130        clk::steady_clock::time_point done_filter1 = clk::steady_clock::now();
131        clk::microseconds filter1_time = clk::duration_cast<clk::microseconds>(done_filter1-start_filter1);
132        // Check output
133        float diff = 0.0;
134        for (int i=0; i<out1_len-delay1; i++) {
135            diff += std::norm(in[i]-out1[i+delay1]);
136        }
137        printf("\nSingle-rate overlap-save squared error per input sample = %1.4e\n", diff/in_len);
138
139        // Test multi-rate direct overlap-save algorithm using filter2 and then filter3
140        clk::steady_clock::time_point start_filter2 = clk::steady_clock::now();
141        filter2.set_head(reset);
142        filter2.filter(in, out2);
143        clk::steady_clock::time_point done_filter2 = clk::steady_clock::now();
144        clk::microseconds filter2_time = clk::duration_cast<clk::microseconds>(done_filter2-start_filter2);
145        clk::steady_clock::time_point start_filter3 = clk::steady_clock::now();
146        filter3.set_head(reset);
147        filter3.filter(out2, out3);
148        clk::steady_clock::time_point done_filter3 = clk::steady_clock::now();
149        clk::microseconds filter3_time = clk::duration_cast<clk::microseconds>(done_filter3-start_filter3);
150        // Check output
151        diff = 0.0;
152        int nstart = (icont==0) ? delay23 : 0;
153        for (int i=nstart; i<out3_len-delay23; i++) {
154            diff += std::norm(in[i]-out3[i+delay23]);
155        }
156        printf("Multi-rate overlap-save squared error per input sample = %1.4e\n", diff/(out3_len-nstart-delay23));
157
158        // Test polyphase overlap-add algorithm using filter4 and then filter5
159        clk::steady_clock::time_point start_filter4 = clk::steady_clock::now();
160        filter4.set_head(reset);
161        filter4.filter(in, out4);
162        clk::steady_clock::time_point done_filter4 = clk::steady_clock::now();
163        clk::microseconds filter4_time = clk::duration_cast<clk::microseconds>(done_filter4-start_filter4);
164        clk::steady_clock::time_point start_filter5 = clk::steady_clock::now();
165        filter5.set_head(reset);
166        filter5.filter(out4, out5);
167        clk::steady_clock::time_point done_filter5 = clk::steady_clock::now();
168        clk::microseconds filter5_time = clk::duration_cast<clk::microseconds>(done_filter5-start_filter5);
169
170        // Check output
171        diff = 0.0;
172        nstart = (icont==0) ? delay45 : 0;
173        for (int i=nstart; i<out5_len-delay45; i++) {
174            diff += std::norm(in[i]-out5[i+delay45]);
175        }
176        printf("Polyphase overlap-save squared error per input sample = %1.4e\n", diff/(out5_len-nstart-delay45));
177        // Print calculation times of filters
178        printf("Single-rate overlap-save filter1 processing time = %ld us\n", filter1_time.count());
179        printf("Multi-rate overlap-save filter2 processing time = %ld us\n", filter2_time.count());
180        printf("Multi-rate overlap-save filter3 processing time = %ld us\n", filter3_time.count());
181        printf("Polyphase overlap-save filter4 processing time = %ld us\n", filter4_time.count());
182        printf("Polyphase overlap-save filter5 processing time = %ld us\n", filter5_time.count());
183
184        // Accummulate timing stats
185        count++;
186        avg_filter1_time += filter1_time.count();
187        avg_filter2_time += filter2_time.count();
188        avg_filter3_time += filter3_time.count();
189        avg_filter4_time += filter4_time.count();
190        avg_filter5_time += filter5_time.count();
191
192        icont += in_len;
193    } 
194
195    printf("\n\nSingle-rate overlap-save filter1 average processing time = %1.4e us\n", avg_filter1_time/count);
196    printf("Multi-rate overlap-save filter2 average processing time = %1.4e us\n", avg_filter2_time/count);
197    printf("Multi-rate overlap-save filter3 average processing time = %1.4e us\n", avg_filter3_time/count);
198    printf("Polyphase overlap-save filter4 average processing time = %1.4e us\n", avg_filter4_time/count);
199    printf("Polyphase overlap-save filter5 average processing time = %1.4e us\n", avg_filter5_time/count);
200    
201    delete in;
202    delete out1;
203    delete out2;
204    delete out3;
205    delete out4;
206    delete out5;
207    fft::cleanup();
208    return EXIT_SUCCESS;
209}
  • filter1 implements single-rate frequency-domain filtering using the overlap-save algorithm. The impulse response of filter1 is chosen to delay the input samples by 2 so that we can easily verify if the single-rate filter implementation in FilterOverlapSave is correct or not.

  • filter2 and filter3 implement direct multi-rate frequency-domain filtering using the overlap-save algorithm. The interpolation and decimation rates of filter2 and filter3 are reversed. The filter impulse responses are chosen for interpolation and anti-aliasing purposes, resepctively. The filter design only works well when the single-rate filter length parameter h_len is set to some power of 2. Because filter3 aims to reverse the action of filter2, we can easily verify if the multi-rate filter implementation in FilterOverlapSave is correct or not.

  • filter4 and filter5 are polyphase impoementations of filter2 and filter3, respectively. The only difference is that our polyphase implementation introduces an additional delay of \(U\) samples, where \(U\) is the up-sampling factor. Hence, filter4 adds a delay of \(U\) samples at its output. Recall that filter5 flips the up- and down-sampling factors of filter4. Hence, the \(U\)-sample delay of filter4 is converted to \(D\) samples by filter5, which adds another delay of \(D\) samples to its own output. Thus, the overall delay at the cascade of the two filters are \(2D\) samples at filter5’s output.

3.6.5.2. Build#

  • Example Makefile for building test_fft and test_filters in Unix-based system: Makefile

Experiment

  1. Compile and run the test code.

  2. Try different choices of \(U\) and \(D\) to see if the advantages in computational complexity predicted in the polyphase filtering discussion match up with the implementation in filters.cpp.

  3. Try to increase the number of threads used in the FFT calculations to see if and whichfiltering operations can be sped up.

  4. Implement your own direct multi-rate and polyphase filters by doing convolutions in time-domain. Compare the filtering speeds of your time-domain implementations with the respective frequency-domain implementations in filters.cpp.