#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <libgen.h>
#include <pthread.h>
/*
* Copyright (C) 2008 GEOLAB Ltd. ( http://www.geo-lab.ru )
*
* This file contains a program for testing multi-CPU computers for their fit
* to migration-type seismic processing. The test is based on the 'kirchslow'
* subroutine from the book "Basic Earth Imaging" by Jon F.Claerbout and
* James L.Black. The subroutine implements a simplified version of 'Kirchhoff'
* migration in the homogenious medium.
*
* The program uses the mechanizm of POSIX threads for running
* parallel computations. Each thread produces migration for its portion of
* output, as the common input is all in memory.
*
* The input is a set of traces with all their samples but one set to zeroes,
* as the only sample is set to 1. Thus the migration output should look like
* a half-round.
*
* The program takes 3 positional command line parameters (see 'main()'):
* 1. Number of traces in input/output.
* 2. Number of samples in input/output.
* 3. Number of computing threads.
*
* Date: Feb-03-2007
*
* Modified: Nov-25-2007 - KuE - mig_one_trace(): introduced optimization
* made by the Intel ICSC CRT Team
*
* Author(s): KuE
*
* Remarks:
* There are also original functions from BEI book by Claerbout and Black.
* They are here for reference and for making modeling if required.
*/
#define max(a,b) ((a)>(b)?(a):(b))
#define min(a,b) ((a)<(b)?(a):(b))
/*
* Purpose: 'slow' implementation of "Kirchhoff" modeling/migration.
* Borrowed from the book "Basic Earth Imaging (Version 2.4)"
* by Jon F.Claerbout and James L.Black pp.65-67.
* Translated from Ratfor to C by KuE.
*/
void kirchslow(
int adj, /* 0 - modeling, 1 - migration */
float velhalf, /* half the interval velocity */
float t0, /* */
float dt, /* sampling rate, sec */
float dx, /* distance between traces, m */
float ** modl, /* input for modeling, output for migration */
int nt, /* number of samples */
int nx, /* number of traces */
float ** data /* input for migration, output for modeling */
)
{
int ix, iy, it, iz, ib, nz;
float x0, y0, dy, z0, dz, t, x, y, z, hs;
x0 = 0; y0 = 0; dy = dx; z0 = t0; dz = dt; nz = nt;
for (ix = 0; ix < nx; ix++) {
x = x0 + dx * ix;
for (iy = 0; iy < nx; iy++) {
y = y0 + dy * iy;
for (iz = 0; iz < nz; iz++) {
z = z0 + dz * iz;
hs = (x - y) / velhalf;
t = sqrt(z*z+hs*hs);
it = 0.5+(t-t0)/dt;
if ( it < nt ) {
if ( adj == 0 )
data[iy][it] += modl[ix][iz];
else
modl[ix][iz] += data[iy][it];
}
}
}
}
}
/*
* Purpose: 'fast' implementation of "Kirchhoff" modeling/migration.
* Borrowed from the book "Basic Earth Imaging (Version 2.4)"
* by Jon F.Claerbout and James L.Black pp.67-68.
* Translated from Ratfor to C by KuE.
*/
void kirchfast(
int adj, /* 0 - modeling, 1 - migration */
float * vrms, /* Vrms law, 'nt' samples, m/sec */
float t0, /* */
float dt, /* sampling rate, sec */
float dx, /* distance between traces, m */
float ** modl, /* input for modeling, output for migration */
int nt, /* number of samples */
int nx, /* number of traces */
float ** data /* input for migration, output for modeling */
)
{
int ix, iz, it, ib;
float amp, t, z, b, x;
for (ib = -nx+1; ib < nx; ib++) {
b = dx*ib;
for (iz = 1; iz < nt; iz++) {
x = b*2./vrms[iz];
z = t0+dt*iz;
t = sqrt(z*z+x*x);
it = 0.5+(t-t0)/dt;
if (it >= nt)
break;
amp = z/t*sqrt(nt*dt/t);
for (ix = max(0, 1-ib); ix < min(nx,nx-ib); ix++)
if ( adj == 0 )
data[ix+ib][it] += modl[ix][iz]*amp;
else
modl[ix][iz] += (data[ix+ib][it]*amp);
}
}
}
/*
* Purpose: a modification of 'kirchslow()' by J.Claerbout to produce
* migration of only 1 trace; the function is designed to test
* performance of SMP machines
*
* Remarks:
* 2007-11-25 - this is a version optimized by the members of
* Intel ICSC CRT Team: Justin Chen, Wang Zhe,
* Qiao Nan, Mao Xiaowei, and others;
* they have introduced two arrays itt[] and ftt[] of
* indices in order to avoid computing the index just
* before its usage in the loop by iz - this makes it
* possible for the Intel icc compiler to make
* straightforward vectorization;
* use extra compiler flags: "-no-prec-div -msse3"
* when building this program with the Intel icc compiler
*
*/
void mig_one_trace(
float ** data, /* input for migration */
int nx, /* number of traces */
int nt, /* number of samples */
float dx, /* distance between traces, m */
float dt, /* sampling rate, sec */
float velhalf, /* half the velocity, m/sec */
float * modl, /* migration output */
int ix /* migration output trace number */
)
{
int iy, it, iz, ib, nz;
float x0, y0, dy, z0, dz, t, x, y, z, hs;
int itt[nt];
float ftt[nt];
x0 = 0; y0 = 0; dy = dx; z0 = 0; dz = dt; nz = nt;
x = x0 + dx * ix;
for (iy = 0; iy < nx; iy++) {
y = y0 + dy * iy;
for (iz = 0; iz < nz; iz++) {
z = z0 + dz * iz;
hs = (x - y) / velhalf;
t = sqrt(z*z+hs*hs);
ftt[iz] = 0.5+t/dt;
}
for (iz = 0; iz < nz; iz++) {
itt[iz] = ftt[iz];
}
for (iz = 0; iz < nz; iz++) {
if ( itt[iz] < nt ) {
modl[iz] += data[iy][itt[iz]];
}
}
}
// for (iz = 0; iz < nz; iz++) {
// printf("%g\n",modl[iz]);
// }
}
/*
* structure to pass data to a computing thread
*/
typedef struct tagTHREAD_DATA {
float ** data; /* input */
int nx; /* number of traces */
int nt; /* number of samples */
float dx; /* distance between traces, m */
float dt; /* sampling rate, sec */
float velhalf; /* half the velocity, m/sec */
float ** modl; /* migration output */
int ix0; /* migrate from this trace */
int dix; /* migrate every dix-th trace */
pthread_t id; /* id of computing thread */
} THREAD_DATA;
/*
* Purpose: computing thread
*/
void * thread(void * _data)
{
THREAD_DATA * data = (THREAD_DATA *)_data;
if ( data != NULL ) {
int ix = 0;
for (ix = data->ix0; ix < data->nx; ix += data->dix ) {
mig_one_trace(data->data, data->nx, data->nt, data->dx, data->dt,
data->velhalf, data->modl[ix], ix);
}
}
return NULL;
}
/*
* Purpose: prints 1D array of floats to stdout
*/
void print_1d_array(const char * what, float * data, int n)
{
int i;
printf("\n");
printf("# %s\n", what);
printf("\n");
for (i = 0; i < n; i++)
printf("%+14.6f\n", data[i]);
}
/*
* Purpose: prints 2D array of floats to stdout
*/
void print_2d_array(const char * what, float ** data, int n1, int n2,
int transpose, int reverse1, int reverse2)
{
int i, j;
printf("\n");
printf("# %s\n", what);
printf("\n");
if ( transpose == 0 ) {
for (i = 0; i < n1; i++) {
int ix = i;
if ( reverse1 ) ix = n1 - i - 1;
for (j = 0; j < n2; j++) {
int jx = j;
if ( reverse2 ) jx = n2 - j - 1;
if ( j > 0) printf(" ");
printf("%+14.6f", data[ix][jx]);
}
if (i < n1 - 1) printf("\n");
}
}
else {
for (j = 0; j < n2; j++) {
int jx = j;
if ( reverse2 ) jx = n2 - j - 1;
for (i = 0; i < n1; i++) {
int ix = i;
if ( reverse1 ) ix = n1 - i - 1;
if ( i > 0) printf(" ");
printf("%+14.6f", data[ix][jx]);
}
if (j < n2 - 1) printf("\n");
}
}
printf("\n");
}
const int MAX_TRACES = 1000000; /* maximum number of traces */
const int MAX_SAMPLES = 100000; /* maximum number of samples */
const int MAX_THREADS = 64; /* maximum number of computing threads */
/*
* Purpose: main function to run multi-CPU performance tests
*/
int main(int argc, char ** argv)
{
int ret = 0; /* return code */
float velhalf = 1000; /* half the velocity */
float dt = 0.004; /* sampling rate */
float dx = 25; /* distance between traces */
float ** modl = NULL; /* migration output */
float ** data = NULL; /* input */
int nx = MAX_TRACES; /* number of traces */
int nt = MAX_SAMPLES; /* number of samples */
int n_threads = MAX_THREADS; /* number of computing threads */
THREAD_DATA * thread_data = NULL; /* data for computing threads */
char * prog = basename(argv[0]); /* program name */
int i = 0;
/*
* get command line parameters
*/
if ( argc != 4 ) {
fprintf(stderr, "%s: error: too few arguments\n", prog);
fprintf(stderr, "Usage: %s <traces> <samples> <threads>\n", prog);
return -1;
}
nx = atoi(argv[1]);
if ( nx < 1 || nx > MAX_TRACES ) {
fprintf(stderr,
"%s: error: wrong number of traces: must be 1-%d\n",
prog, MAX_TRACES);
return -1;
}
nt = atoi(argv[2]);
if ( nt < 1 || nt > MAX_SAMPLES ) {
fprintf(stderr,
"%s: error: wrong number of samples: must be 1-%d\n",
prog, MAX_SAMPLES);
return -1;
}
n_threads = atoi(argv[3]);
if ( n_threads < 1 || n_threads > MAX_THREADS ) {
fprintf(stderr,
"%s: error: wrong number of threads: must be 1-%d\n",
prog, MAX_THREADS);
return -1;
}
/*
* allocate memory
*/
modl = (float **)calloc(nx, sizeof(float *));
if ( modl == NULL ) {
fprintf(stderr, "%s: error: cannot allocate memory: modl\n", prog);
ret = -1;
goto FINISH;
}
for ( i = 0; i < nx; i++ ) {
modl[i] = (float *)calloc(nt, sizeof(float));
if ( modl[i] == NULL ) {
fprintf(stderr, "%s: error: cannot allocate memory: modl[%d]\n",
prog, i);
ret = -1;
goto FINISH;
}
}
data = (float **)calloc(nx, sizeof(float *));
if ( data == NULL ) {
fprintf(stderr, "%s: error: cannot allocate memory: data\n", prog);
ret = -1;
goto FINISH;
}
for ( i = 0; i < nx; i++ ) {
data[i] = (float *)calloc(nt, sizeof(float));
if ( data[i] == NULL ) {
fprintf(stderr, "%s: error: cannot allocate memory: data[%d]\n",
prog, i);
ret = -1;
goto FINISH;
}
}
thread_data = (THREAD_DATA *)calloc(n_threads, sizeof(THREAD_DATA));
/*
* init data for computing threads
*/
for (i = 0; i < n_threads; i++) {
thread_data[i].data = data;
thread_data[i].nx = nx;
thread_data[i].nt = nt;
thread_data[i].dx = dx;
thread_data[i].dt = dt;
thread_data[i].velhalf = velhalf;
thread_data[i].modl = modl;
thread_data[i].ix0 = i;
thread_data[i].dix = n_threads;
thread_data[i].id = 0;
}
/*
* modeling
* do not remove - true modeling with 'kirchslow()' goes here
*
modl[nx/2][nt/2] = 1;
kirchslow(0, velhalf, 0, dt, dx, modl, nt, nx, data);
modl[nx/2][nt/2] = 0;
*/
/*
* init input for migration in case no true modeling is done
*/
data[nx/2][nt/2] = 1;
/*
* migration
*/
/*
* do not remove - original 'kirchslow()' call may be used for debugging
kirchslow(1, velhalf, 0, dt, dx, modl, nt, nx, data);
goto FINISH;
*/
/*
* start computing threads
*/
for (i = 0; i < n_threads; i++)
if ( pthread_create(&thread_data[i].id, 0, &thread,
&thread_data[i] ) != 0 ) {
fprintf(stdout, "main: error: cannot create thread %i\n", i);
ret = -1;
goto FINISH;
}
/*
* wait for computing threads to finish
*/
for ( i = 0; i < n_threads; i++ )
pthread_join(thread_data[i].id, NULL);
FINISH:
/*
* print output to stdout - can be plotted with gnuplot like this:
* 1. run this program (e.g. 301 traces, 1001 samples, 2 threads)
* prog 301 1001 2 > output.txt
* 2. start gnuplot
* gnuplot
* 3. in gnuplot's command line
* set pm3d map
* splot 'output.txt' matrix
*
print_2d_array("modl after", modl, nx, nt, 1, 0, 1);
*/
/*
* release memory
*/
if ( modl != NULL ) {
for (i = 0; i < nx; i++) if ( modl[i] != NULL ) free(modl[i]);
free(modl);
}
if ( data != NULL ) {
for (i = 0; i < nx; i++) if ( data[i] != NULL ) free(data[i]);
free(data);
}
if ( thread_data != NULL ) free(thread_data);
return ret;
}
