#include <math.h> #include <stdlib.h> #include <stdio.h> #include <libgen.h> #include <pthread.h> /* * Copyright (C) 2008 GEOLAB Ltd. ( http://www.geo-lab.ru ) * * This file contains a program for testing multi-CPU computers for their fit * to migration-type seismic processing. The test is based on the 'kirchslow' * subroutine from the book "Basic Earth Imaging" by Jon F.Claerbout and * James L.Black. The subroutine implements a simplified version of 'Kirchhoff' * migration in the homogenious medium. * * The program uses the mechanizm of POSIX threads for running * parallel computations. Each thread produces migration for its portion of * output, as the common input is all in memory. * * The input is a set of traces with all their samples but one set to zeroes, * as the only sample is set to 1. Thus the migration output should look like * a half-round. * * The program takes 3 positional command line parameters (see 'main()'): * 1. Number of traces in input/output. * 2. Number of samples in input/output. * 3. Number of computing threads. * * Date: Feb-03-2007 * * Modified: Nov-25-2007 - KuE - mig_one_trace(): introduced optimization * made by the Intel ICSC CRT Team * * Author(s): KuE * * Remarks: * There are also original functions from BEI book by Claerbout and Black. * They are here for reference and for making modeling if required. */ #define max(a,b) ((a)>(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b)) /* * Purpose: 'slow' implementation of "Kirchhoff" modeling/migration. * Borrowed from the book "Basic Earth Imaging (Version 2.4)" * by Jon F.Claerbout and James L.Black pp.65-67. * Translated from Ratfor to C by KuE. */ void kirchslow( int adj, /* 0 - modeling, 1 - migration */ float velhalf, /* half the interval velocity */ float t0, /* */ float dt, /* sampling rate, sec */ float dx, /* distance between traces, m */ float ** modl, /* input for modeling, output for migration */ int nt, /* number of samples */ int nx, /* number of traces */ float ** data /* input for migration, output for modeling */ ) { int ix, iy, it, iz, ib, nz; float x0, y0, dy, z0, dz, t, x, y, z, hs; x0 = 0; y0 = 0; dy = dx; z0 = t0; dz = dt; nz = nt; for (ix = 0; ix < nx; ix++) { x = x0 + dx * ix; for (iy = 0; iy < nx; iy++) { y = y0 + dy * iy; for (iz = 0; iz < nz; iz++) { z = z0 + dz * iz; hs = (x - y) / velhalf; t = sqrt(z*z+hs*hs); it = 0.5+(t-t0)/dt; if ( it < nt ) { if ( adj == 0 ) data[iy][it] += modl[ix][iz]; else modl[ix][iz] += data[iy][it]; } } } } } /* * Purpose: 'fast' implementation of "Kirchhoff" modeling/migration. * Borrowed from the book "Basic Earth Imaging (Version 2.4)" * by Jon F.Claerbout and James L.Black pp.67-68. * Translated from Ratfor to C by KuE. */ void kirchfast( int adj, /* 0 - modeling, 1 - migration */ float * vrms, /* Vrms law, 'nt' samples, m/sec */ float t0, /* */ float dt, /* sampling rate, sec */ float dx, /* distance between traces, m */ float ** modl, /* input for modeling, output for migration */ int nt, /* number of samples */ int nx, /* number of traces */ float ** data /* input for migration, output for modeling */ ) { int ix, iz, it, ib; float amp, t, z, b, x; for (ib = -nx+1; ib < nx; ib++) { b = dx*ib; for (iz = 1; iz < nt; iz++) { x = b*2./vrms[iz]; z = t0+dt*iz; t = sqrt(z*z+x*x); it = 0.5+(t-t0)/dt; if (it >= nt) break; amp = z/t*sqrt(nt*dt/t); for (ix = max(0, 1-ib); ix < min(nx,nx-ib); ix++) if ( adj == 0 ) data[ix+ib][it] += modl[ix][iz]*amp; else modl[ix][iz] += (data[ix+ib][it]*amp); } } } /* * Purpose: a modification of 'kirchslow()' by J.Claerbout to produce * migration of only 1 trace; the function is designed to test * performance of SMP machines * * Remarks: * 2007-11-25 - this is a version optimized by the members of * Intel ICSC CRT Team: Justin Chen, Wang Zhe, * Qiao Nan, Mao Xiaowei, and others; * they have introduced two arrays itt[] and ftt[] of * indices in order to avoid computing the index just * before its usage in the loop by iz - this makes it * possible for the Intel icc compiler to make * straightforward vectorization; * use extra compiler flags: "-no-prec-div -msse3" * when building this program with the Intel icc compiler * */ void mig_one_trace( float ** data, /* input for migration */ int nx, /* number of traces */ int nt, /* number of samples */ float dx, /* distance between traces, m */ float dt, /* sampling rate, sec */ float velhalf, /* half the velocity, m/sec */ float * modl, /* migration output */ int ix /* migration output trace number */ ) { int iy, it, iz, ib, nz; float x0, y0, dy, z0, dz, t, x, y, z, hs; int itt[nt]; float ftt[nt]; x0 = 0; y0 = 0; dy = dx; z0 = 0; dz = dt; nz = nt; x = x0 + dx * ix; for (iy = 0; iy < nx; iy++) { y = y0 + dy * iy; for (iz = 0; iz < nz; iz++) { z = z0 + dz * iz; hs = (x - y) / velhalf; t = sqrt(z*z+hs*hs); ftt[iz] = 0.5+t/dt; } for (iz = 0; iz < nz; iz++) { itt[iz] = ftt[iz]; } for (iz = 0; iz < nz; iz++) { if ( itt[iz] < nt ) { modl[iz] += data[iy][itt[iz]]; } } } // for (iz = 0; iz < nz; iz++) { // printf("%g\n",modl[iz]); // } } /* * structure to pass data to a computing thread */ typedef struct tagTHREAD_DATA { float ** data; /* input */ int nx; /* number of traces */ int nt; /* number of samples */ float dx; /* distance between traces, m */ float dt; /* sampling rate, sec */ float velhalf; /* half the velocity, m/sec */ float ** modl; /* migration output */ int ix0; /* migrate from this trace */ int dix; /* migrate every dix-th trace */ pthread_t id; /* id of computing thread */ } THREAD_DATA; /* * Purpose: computing thread */ void * thread(void * _data) { THREAD_DATA * data = (THREAD_DATA *)_data; if ( data != NULL ) { int ix = 0; for (ix = data->ix0; ix < data->nx; ix += data->dix ) { mig_one_trace(data->data, data->nx, data->nt, data->dx, data->dt, data->velhalf, data->modl[ix], ix); } } return NULL; } /* * Purpose: prints 1D array of floats to stdout */ void print_1d_array(const char * what, float * data, int n) { int i; printf("\n"); printf("# %s\n", what); printf("\n"); for (i = 0; i < n; i++) printf("%+14.6f\n", data[i]); } /* * Purpose: prints 2D array of floats to stdout */ void print_2d_array(const char * what, float ** data, int n1, int n2, int transpose, int reverse1, int reverse2) { int i, j; printf("\n"); printf("# %s\n", what); printf("\n"); if ( transpose == 0 ) { for (i = 0; i < n1; i++) { int ix = i; if ( reverse1 ) ix = n1 - i - 1; for (j = 0; j < n2; j++) { int jx = j; if ( reverse2 ) jx = n2 - j - 1; if ( j > 0) printf(" "); printf("%+14.6f", data[ix][jx]); } if (i < n1 - 1) printf("\n"); } } else { for (j = 0; j < n2; j++) { int jx = j; if ( reverse2 ) jx = n2 - j - 1; for (i = 0; i < n1; i++) { int ix = i; if ( reverse1 ) ix = n1 - i - 1; if ( i > 0) printf(" "); printf("%+14.6f", data[ix][jx]); } if (j < n2 - 1) printf("\n"); } } printf("\n"); } const int MAX_TRACES = 1000000; /* maximum number of traces */ const int MAX_SAMPLES = 100000; /* maximum number of samples */ const int MAX_THREADS = 64; /* maximum number of computing threads */ /* * Purpose: main function to run multi-CPU performance tests */ int main(int argc, char ** argv) { int ret = 0; /* return code */ float velhalf = 1000; /* half the velocity */ float dt = 0.004; /* sampling rate */ float dx = 25; /* distance between traces */ float ** modl = NULL; /* migration output */ float ** data = NULL; /* input */ int nx = MAX_TRACES; /* number of traces */ int nt = MAX_SAMPLES; /* number of samples */ int n_threads = MAX_THREADS; /* number of computing threads */ THREAD_DATA * thread_data = NULL; /* data for computing threads */ char * prog = basename(argv[0]); /* program name */ int i = 0; /* * get command line parameters */ if ( argc != 4 ) { fprintf(stderr, "%s: error: too few arguments\n", prog); fprintf(stderr, "Usage: %s <traces> <samples> <threads>\n", prog); return -1; } nx = atoi(argv[1]); if ( nx < 1 || nx > MAX_TRACES ) { fprintf(stderr, "%s: error: wrong number of traces: must be 1-%d\n", prog, MAX_TRACES); return -1; } nt = atoi(argv[2]); if ( nt < 1 || nt > MAX_SAMPLES ) { fprintf(stderr, "%s: error: wrong number of samples: must be 1-%d\n", prog, MAX_SAMPLES); return -1; } n_threads = atoi(argv[3]); if ( n_threads < 1 || n_threads > MAX_THREADS ) { fprintf(stderr, "%s: error: wrong number of threads: must be 1-%d\n", prog, MAX_THREADS); return -1; } /* * allocate memory */ modl = (float **)calloc(nx, sizeof(float *)); if ( modl == NULL ) { fprintf(stderr, "%s: error: cannot allocate memory: modl\n", prog); ret = -1; goto FINISH; } for ( i = 0; i < nx; i++ ) { modl[i] = (float *)calloc(nt, sizeof(float)); if ( modl[i] == NULL ) { fprintf(stderr, "%s: error: cannot allocate memory: modl[%d]\n", prog, i); ret = -1; goto FINISH; } } data = (float **)calloc(nx, sizeof(float *)); if ( data == NULL ) { fprintf(stderr, "%s: error: cannot allocate memory: data\n", prog); ret = -1; goto FINISH; } for ( i = 0; i < nx; i++ ) { data[i] = (float *)calloc(nt, sizeof(float)); if ( data[i] == NULL ) { fprintf(stderr, "%s: error: cannot allocate memory: data[%d]\n", prog, i); ret = -1; goto FINISH; } } thread_data = (THREAD_DATA *)calloc(n_threads, sizeof(THREAD_DATA)); /* * init data for computing threads */ for (i = 0; i < n_threads; i++) { thread_data[i].data = data; thread_data[i].nx = nx; thread_data[i].nt = nt; thread_data[i].dx = dx; thread_data[i].dt = dt; thread_data[i].velhalf = velhalf; thread_data[i].modl = modl; thread_data[i].ix0 = i; thread_data[i].dix = n_threads; thread_data[i].id = 0; } /* * modeling * do not remove - true modeling with 'kirchslow()' goes here * modl[nx/2][nt/2] = 1; kirchslow(0, velhalf, 0, dt, dx, modl, nt, nx, data); modl[nx/2][nt/2] = 0; */ /* * init input for migration in case no true modeling is done */ data[nx/2][nt/2] = 1; /* * migration */ /* * do not remove - original 'kirchslow()' call may be used for debugging kirchslow(1, velhalf, 0, dt, dx, modl, nt, nx, data); goto FINISH; */ /* * start computing threads */ for (i = 0; i < n_threads; i++) if ( pthread_create(&thread_data[i].id, 0, &thread, &thread_data[i] ) != 0 ) { fprintf(stdout, "main: error: cannot create thread %i\n", i); ret = -1; goto FINISH; } /* * wait for computing threads to finish */ for ( i = 0; i < n_threads; i++ ) pthread_join(thread_data[i].id, NULL); FINISH: /* * print output to stdout - can be plotted with gnuplot like this: * 1. run this program (e.g. 301 traces, 1001 samples, 2 threads) * prog 301 1001 2 > output.txt * 2. start gnuplot * gnuplot * 3. in gnuplot's command line * set pm3d map * splot 'output.txt' matrix * print_2d_array("modl after", modl, nx, nt, 1, 0, 1); */ /* * release memory */ if ( modl != NULL ) { for (i = 0; i < nx; i++) if ( modl[i] != NULL ) free(modl[i]); free(modl); } if ( data != NULL ) { for (i = 0; i < nx; i++) if ( data[i] != NULL ) free(data[i]); free(data); } if ( thread_data != NULL ) free(thread_data); return ret; }