/*
 * Copyright (C) 2008 GEOLAB Ltd. ( http://www.geo-lab.ru )
 *
 * Contents: SRMP 2D prototype for conventional CPUs
 *
 */

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <math.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/times.h>

#include <fftw3.h>

#include "srmp_x86.h"

pthread_t thread_ids[MAX_THREADS];

float datai [SIZE * NTR];  /* input traces */
float dataf [DOUBLE_SIZE * NTR]; /* FFT traces */
float datao [SIZE * NTR];  /* output traces */
thread_data td [MAX_THREADS]; /* thread data */

pthread_mutex_t op_lock = PTHREAD_MUTEX_INITIALIZER;

void * fft_thread(void * data);
void * srmp_thread(void * data);


/*
 * Purpose: SRMP 2D prototype for conventional CPUs - main program
 *
 * Date: 2008-05-14
 *
 * Modified:
 *
 * Author: KuE
 *
 */

int main(int argc, char ** argv)
{
  int ret = 0;
  int status = 0;
  int i, j;
  int num_threads = MAX_THREADS;
  clock_t start_time, stop_time;

  /*
   *  init parameters from command line
   */
  if ( argc != 2 )
  {
    fprintf(stderr, 
        "srmp_x86: error: must have 1 parameter - number of threads\n");
    return 1;
  }
  num_threads = atoi(argv[1]);

  if ( num_threads < 1 || num_threads > MAX_THREADS )
  {
    fprintf(stderr, "srmp_x86: error: number of threads not in [1,%d]\n",
        MAX_THREADS);
    return 1;
  }
  
  /*
   * read input
   */
  for (i = 0; i < NTR; i++)
  {
    if ( fread(datai + i * SIZE, BYTE_SIZE, 1, stdin) < 1 )
    {
      fprintf(stderr, "srmp_x86: error: fread failed: %s\n", strerror(errno));
      return 1;
    }
  }

  /*
   * set time mark
   */
  start_time = times(NULL);

  /* 
   * create threads to execute fft_thread
   */
  for (i = 0; i < num_threads; i++) {
    /* 
     * initialize thread data
     */
    td[i].n = N;
    td[i].ntr = NTR;
    td[i].n0 = i;
    td[i].dn = num_threads;
    td[i].datai = datai;
    td[i].dataf = dataf;
    td[i].datao = datao;
    /* 
     * start thread
     */
    if ( pthread_create(&thread_ids[i], 0, &fft_thread, &td[i]) != 0 ) {
      fprintf(stderr, "srmp_x86: error: pthread_create failed: fft_thread%s\n",
          strerror(errno));
      return 1;
    }
  }
  /* 
   * wait for threads to complete execution
   */
  for (i = 0; i < num_threads; i++) {
    pthread_join(thread_ids[i], NULL);
  }

/* produce amplitude spectra
  for (i = 0; i < NTR; i++)
  {
    for (j = 0; j < SIZE; j++)
    {
      int ire = j * 2;
      int iim = j * 2 + 1;
      float re = dataf[i * DOUBLE_SIZE + ire];
      float im = dataf[i * DOUBLE_SIZE + iim];
      datao[i * SIZE + j] = sqrt(re*re + im*im);
    }
  }
  goto WRITE;
*/

  /* 
   * create threads to execute srmp_thread
   */
  for (i = 0; i < num_threads; i++) {
    /* 
     * start thread
     */
    if ( pthread_create(&thread_ids[i], 0, &srmp_thread, &td[i]) != 0 ) {
      fprintf(stderr, "srmp_x86: error: pthread_create failed: srmp_thread%s\n",
          strerror(errno));
      return 1;
    }
  }
  /* 
   * wait for threads to complete execution
   */
  for (i = 0; i < num_threads; i++) {
    pthread_join(thread_ids[i], NULL);
  }

  /*
   * display program computation time
   */
  stop_time = times(NULL);
  fprintf(stderr, "%12.3lf\n", 
      (double)(stop_time - start_time) / sysconf(_SC_CLK_TCK));

WRITE:
  /*
   * write output
   */
  for (i = 0; i < NTR; i++)
  {
    if ( fwrite(datao + i * SIZE, BYTE_SIZE, 1, stdout) < 1 )
    {
      fprintf(stderr, "srmp_x86: error: fwrite failed: %s\n", strerror(errno));
      return 1;
    }
  }

  return 0;
}


/*
 * Purpose: thread implementing FFT of given data
 *
 * Date: 2008-05-14
 *
 * Modified:
 *
 * Author: KuE
 *
 * Remarks:
 *
 */

void * fft_thread(void * data)
{
  thread_data * td; /* thread data */
  float d[DOUBLE_SIZE]; /* work array for FFT */
  float * di; /* pointer to current input trace */
  int i, j;
  fftwf_plan p; /* FFTW execution plan */


  /* 
   * get thread data 
   */
  td = (thread_data *)data;

  /* 
   * create FFTW plan
   */
  pthread_mutex_lock(&op_lock);
  p = fftwf_plan_dft_1d(SIZE, (fftwf_complex *)d, 
      (fftwf_complex *)td->dataf, FFTW_FORWARD, FFTW_ESTIMATE);
  pthread_mutex_unlock(&op_lock);

  /* 
   * loop on input traces
   */
  for (i = td->n0; i < td->ntr; i += td->dn) {

    /* 
     * initialize input for FFT
     */
    di = td->datai + i * SIZE;
    for (j = 0; j < SIZE; j++) {
      d[2*j] = di[j];
      d[2*j+1] = 0;
    }

    /* 
     * do FFT
     */
    fftwf_execute_dft(p, (fftwf_complex *)d, 
        (fftwf_complex *)(td->dataf + i * DOUBLE_SIZE));
  }
}

void ro_filter_2d (float * in, int n);

void * srmp_thread(void * data)
{
  thread_data * td;/* thread data */
  float * di1; /* input 1 */
  float * di2; /* input 2 */
  float * do1; /* output */
  float acc[DOUBLE_SIZE]; /* accumulator*/
  float d[DOUBLE_SIZE]; /* work array */
  int i, j, k, l, n;
  fftwf_plan p; /* FFTW execution plan */

  /* 
   * get thread data
   */
  td = (thread_data *)data;

  /* 
   * create FFTW plan
   */
  pthread_mutex_lock(&op_lock);
  p = fftwf_plan_dft_1d(SIZE, (fftwf_complex *)acc, (fftwf_complex *)d, 
      FFTW_BACKWARD, FFTW_ESTIMATE);
  pthread_mutex_unlock(&op_lock);

  /* 
   * loop on output SPs
   */
  n = td->n;
  for (i = td->n0; i < n; i += td->dn) 
  {
    /*
     * loop on output RPs
     */
    for (j = 0; j < n; j++)
    {
      float norm = 1;
      /*
       * clean the accumulator
       */
      memset(acc, 0, DOUBLE_BYTE_SIZE);
      /*
       * loop on intermediate positions
       */
      for (k = 0; k < n; k++)
      {
        /* 
         * get trace of SP gather
         */
        di1 = td->dataf + (i * n + k) * DOUBLE_SIZE;

        /* 
         * get trace of RP gather
         */
        di2 = td->dataf + (k * n + j) * DOUBLE_SIZE;

        /*
         * complex multiply SP and RP traces
         */
        for (l = 0; l < SIZE; l++)
        {
          int ire = 2 * l, iim = 2 * l + 1;
          acc[ire] += di1[ire] * di2[ire] - di1[iim] * di2[iim];
          acc[iim] += di1[iim] * di2[ire] + di1[ire] * di2[iim];
        }
      }
      if ( k > 1 )
      {
        norm = -1.f / k;
      }
      
      /*
       * apply compensation filtering
       */
      /*ro_filter_2d (acc, SIZE);*/

      /*
       * apply inverse FFT to accumulator
       */
      fftwf_execute_dft(p, (fftwf_complex *)acc, (fftwf_complex *)d);

      /*
       * store real samples to output
       */
      do1 = td->datao + (i * n + j) * SIZE;
      for (l = 0; l < SIZE; l++)
      {
        do1[l] = d[2 * l] * norm;
      }
    } /* for j - output RPs */
  } /* for i - output SPs */
  
  return 0;
}

/*
 * Purpose: compensation (ro) filter in frequency domain
 *
 * Date: 2008-05-04
 *
 * Modified:
 *
 * Author: KuE
 *
 * Remarks: 1. in[] has size 2*n and contains complex numbers stored like this:
 *             - real parts at 2*k;
 *             - imaginary parts at 2*k+1
 */

void ro_filter_2d (float * in, int n)
{
  float w = M_PI / (float)n * 0.5;
  int i, j;
  for (i = 0, j = n - 1; i < n/2; i++, j--)
  {
    float amp = sqrt(i * w * 0.5);
    int ire = 2 * i;
    int iim = ire + 1;
    float a = in[ire];
    float b = in[iim];
    in[ire] = amp * (a + b);
    in[iim] = amp * (b - a);
    ire = 2 * j;
    iim = ire + 1;
    a = in[ire];
    b = in[iim];
    in[ire] = amp * (a + b);
    in[iim] = amp * (b - a);
  }
}

Hosted by uCoz