/* * Copyright (C) 2008 GEOLAB Ltd. ( http://www.geo-lab.ru ) * * Contents: SRMP 2D prototype for conventional CPUs * */ #include <string.h> #include <stdlib.h> #include <stdio.h> #include <errno.h> #include <math.h> #include <pthread.h> #include <unistd.h> #include <sys/times.h> #include <fftw3.h> #include "srmp_x86.h" pthread_t thread_ids[MAX_THREADS]; float datai [SIZE * NTR]; /* input traces */ float dataf [DOUBLE_SIZE * NTR]; /* FFT traces */ float datao [SIZE * NTR]; /* output traces */ thread_data td [MAX_THREADS]; /* thread data */ pthread_mutex_t op_lock = PTHREAD_MUTEX_INITIALIZER; void * fft_thread(void * data); void * srmp_thread(void * data); /* * Purpose: SRMP 2D prototype for conventional CPUs - main program * * Date: 2008-05-14 * * Modified: * * Author: KuE * */ int main(int argc, char ** argv) { int ret = 0; int status = 0; int i, j; int num_threads = MAX_THREADS; clock_t start_time, stop_time; /* * init parameters from command line */ if ( argc != 2 ) { fprintf(stderr, "srmp_x86: error: must have 1 parameter - number of threads\n"); return 1; } num_threads = atoi(argv[1]); if ( num_threads < 1 || num_threads > MAX_THREADS ) { fprintf(stderr, "srmp_x86: error: number of threads not in [1,%d]\n", MAX_THREADS); return 1; } /* * read input */ for (i = 0; i < NTR; i++) { if ( fread(datai + i * SIZE, BYTE_SIZE, 1, stdin) < 1 ) { fprintf(stderr, "srmp_x86: error: fread failed: %s\n", strerror(errno)); return 1; } } /* * set time mark */ start_time = times(NULL); /* * create threads to execute fft_thread */ for (i = 0; i < num_threads; i++) { /* * initialize thread data */ td[i].n = N; td[i].ntr = NTR; td[i].n0 = i; td[i].dn = num_threads; td[i].datai = datai; td[i].dataf = dataf; td[i].datao = datao; /* * start thread */ if ( pthread_create(&thread_ids[i], 0, &fft_thread, &td[i]) != 0 ) { fprintf(stderr, "srmp_x86: error: pthread_create failed: fft_thread%s\n", strerror(errno)); return 1; } } /* * wait for threads to complete execution */ for (i = 0; i < num_threads; i++) { pthread_join(thread_ids[i], NULL); } /* produce amplitude spectra for (i = 0; i < NTR; i++) { for (j = 0; j < SIZE; j++) { int ire = j * 2; int iim = j * 2 + 1; float re = dataf[i * DOUBLE_SIZE + ire]; float im = dataf[i * DOUBLE_SIZE + iim]; datao[i * SIZE + j] = sqrt(re*re + im*im); } } goto WRITE; */ /* * create threads to execute srmp_thread */ for (i = 0; i < num_threads; i++) { /* * start thread */ if ( pthread_create(&thread_ids[i], 0, &srmp_thread, &td[i]) != 0 ) { fprintf(stderr, "srmp_x86: error: pthread_create failed: srmp_thread%s\n", strerror(errno)); return 1; } } /* * wait for threads to complete execution */ for (i = 0; i < num_threads; i++) { pthread_join(thread_ids[i], NULL); } /* * display program computation time */ stop_time = times(NULL); fprintf(stderr, "%12.3lf\n", (double)(stop_time - start_time) / sysconf(_SC_CLK_TCK)); WRITE: /* * write output */ for (i = 0; i < NTR; i++) { if ( fwrite(datao + i * SIZE, BYTE_SIZE, 1, stdout) < 1 ) { fprintf(stderr, "srmp_x86: error: fwrite failed: %s\n", strerror(errno)); return 1; } } return 0; } /* * Purpose: thread implementing FFT of given data * * Date: 2008-05-14 * * Modified: * * Author: KuE * * Remarks: * */ void * fft_thread(void * data) { thread_data * td; /* thread data */ float d[DOUBLE_SIZE]; /* work array for FFT */ float * di; /* pointer to current input trace */ int i, j; fftwf_plan p; /* FFTW execution plan */ /* * get thread data */ td = (thread_data *)data; /* * create FFTW plan */ pthread_mutex_lock(&op_lock); p = fftwf_plan_dft_1d(SIZE, (fftwf_complex *)d, (fftwf_complex *)td->dataf, FFTW_FORWARD, FFTW_ESTIMATE); pthread_mutex_unlock(&op_lock); /* * loop on input traces */ for (i = td->n0; i < td->ntr; i += td->dn) { /* * initialize input for FFT */ di = td->datai + i * SIZE; for (j = 0; j < SIZE; j++) { d[2*j] = di[j]; d[2*j+1] = 0; } /* * do FFT */ fftwf_execute_dft(p, (fftwf_complex *)d, (fftwf_complex *)(td->dataf + i * DOUBLE_SIZE)); } } void ro_filter_2d (float * in, int n); void * srmp_thread(void * data) { thread_data * td;/* thread data */ float * di1; /* input 1 */ float * di2; /* input 2 */ float * do1; /* output */ float acc[DOUBLE_SIZE]; /* accumulator*/ float d[DOUBLE_SIZE]; /* work array */ int i, j, k, l, n; fftwf_plan p; /* FFTW execution plan */ /* * get thread data */ td = (thread_data *)data; /* * create FFTW plan */ pthread_mutex_lock(&op_lock); p = fftwf_plan_dft_1d(SIZE, (fftwf_complex *)acc, (fftwf_complex *)d, FFTW_BACKWARD, FFTW_ESTIMATE); pthread_mutex_unlock(&op_lock); /* * loop on output SPs */ n = td->n; for (i = td->n0; i < n; i += td->dn) { /* * loop on output RPs */ for (j = 0; j < n; j++) { float norm = 1; /* * clean the accumulator */ memset(acc, 0, DOUBLE_BYTE_SIZE); /* * loop on intermediate positions */ for (k = 0; k < n; k++) { /* * get trace of SP gather */ di1 = td->dataf + (i * n + k) * DOUBLE_SIZE; /* * get trace of RP gather */ di2 = td->dataf + (k * n + j) * DOUBLE_SIZE; /* * complex multiply SP and RP traces */ for (l = 0; l < SIZE; l++) { int ire = 2 * l, iim = 2 * l + 1; acc[ire] += di1[ire] * di2[ire] - di1[iim] * di2[iim]; acc[iim] += di1[iim] * di2[ire] + di1[ire] * di2[iim]; } } if ( k > 1 ) { norm = -1.f / k; } /* * apply compensation filtering */ /*ro_filter_2d (acc, SIZE);*/ /* * apply inverse FFT to accumulator */ fftwf_execute_dft(p, (fftwf_complex *)acc, (fftwf_complex *)d); /* * store real samples to output */ do1 = td->datao + (i * n + j) * SIZE; for (l = 0; l < SIZE; l++) { do1[l] = d[2 * l] * norm; } } /* for j - output RPs */ } /* for i - output SPs */ return 0; } /* * Purpose: compensation (ro) filter in frequency domain * * Date: 2008-05-04 * * Modified: * * Author: KuE * * Remarks: 1. in[] has size 2*n and contains complex numbers stored like this: * - real parts at 2*k; * - imaginary parts at 2*k+1 */ void ro_filter_2d (float * in, int n) { float w = M_PI / (float)n * 0.5; int i, j; for (i = 0, j = n - 1; i < n/2; i++, j--) { float amp = sqrt(i * w * 0.5); int ire = 2 * i; int iim = ire + 1; float a = in[ire]; float b = in[iim]; in[ire] = amp * (a + b); in[iim] = amp * (b - a); ire = 2 * j; iim = ire + 1; a = in[ire]; b = in[iim]; in[ire] = amp * (a + b); in[iim] = amp * (b - a); } }