/* -*- c-file-style: "GNU" -*- */
/*
 * Copyright  CNRS, INRIA, Universit Bordeaux 1
 * See COPYING in top-level directory.
 */

#define _GNU_SOURCE 1
#define _REENTRANT

#include <unistd.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/timeb.h>
#include <dlfcn.h>
#include <string.h>

#include "mpi.h"
#include "mpi_eztrace.h"
#include "mpi_ev_codes.h"
#include "eztrace.h"


/* Process identifier.
 * It corresponds to the global MPI rank unless the process was spawned.
 * In that case, the identifier is the concatenation of the parent process id
 * and the global rank.
 * For example process id 0_1_3 has a global rank of 3 and is has been spawned by process 0_1
 * Process 0_1 has a global rank of 1 and was spawned by process 0
 */
char *proc_id;


/* pointers to actual MPI functions (C version)  */
int ( *libMPI_Init) (int *, char ***);
int ( *libMPI_Init_thread) (int *, char ***, int, int*);
int ( *libMPI_Comm_size) (MPI_Comm, int *);
int ( *libMPI_Comm_rank) (MPI_Comm, int *);
int ( *libMPI_Finalize) (void);
int ( *libMPI_Initialized) (int *);
int ( *libMPI_Abort) (MPI_Comm, int);

int ( *libMPI_Send) (void *buf, int count, MPI_Datatype datatype,int dest, int tag,MPI_Comm comm);
int ( *libMPI_Recv) (void *buf, int count, MPI_Datatype datatype,int source, int tag, MPI_Comm comm, MPI_Status *status);

int ( *libMPI_Bsend) (void*, int, MPI_Datatype, int, int, MPI_Comm);
int ( *libMPI_Ssend) (void*, int, MPI_Datatype, int, int, MPI_Comm);
int ( *libMPI_Rsend) (void*, int, MPI_Datatype, int, int, MPI_Comm);
int ( *libMPI_Isend) (void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Request *);
int ( *libMPI_Ibsend) (void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Request *);
int ( *libMPI_Issend) (void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Request *);
int ( *libMPI_Irsend) (void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Request *);
int ( *libMPI_Irecv) (void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Request *);

int ( *libMPI_Sendrecv) (void *, int, MPI_Datatype,int, int, void *, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status *);
int ( *libMPI_Sendrecv_replace) (void*, int, MPI_Datatype, int, int, int, int, MPI_Comm, MPI_Status *);

int ( *libMPI_Send_init) (void*, int, MPI_Datatype, int, int, MPI_Comm, MPI_Request *);
int ( *libMPI_Bsend_init) (void*, int, MPI_Datatype, int,int, MPI_Comm, MPI_Request *);
int ( *libMPI_Ssend_init) (void*, int, MPI_Datatype, int,int, MPI_Comm, MPI_Request *);
int ( *libMPI_Rsend_init) (void*, int, MPI_Datatype, int,int, MPI_Comm, MPI_Request *);
int ( *libMPI_Recv_init) (void*, int, MPI_Datatype, int,int, MPI_Comm, MPI_Request *);
int ( *libMPI_Start) (MPI_Request *);
int ( *libMPI_Startall) (int, MPI_Request *);

int ( *libMPI_Wait) (MPI_Request *, MPI_Status *);
int ( *libMPI_Test) (MPI_Request *, int *, MPI_Status *);
int ( *libMPI_Waitany) (int, MPI_Request *, int *, MPI_Status *);
int ( *libMPI_Testany) (int, MPI_Request *, int *, int *, MPI_Status *);
int ( *libMPI_Waitall) (int, MPI_Request *, MPI_Status *);
int ( *libMPI_Testall) (int, MPI_Request *, int *, MPI_Status *);
int ( *libMPI_Waitsome) (int, MPI_Request *, int *, int *, MPI_Status *);
int ( *libMPI_Testsome) (int, MPI_Request *, int *, int *, MPI_Status *);

int ( *libMPI_Probe)( int source, int tag, MPI_Comm comm, MPI_Status *status );
int ( *libMPI_Iprobe)( int source, int tag, MPI_Comm comm, int *flag, MPI_Status *status );

int ( *libMPI_Barrier) (MPI_Comm );
int ( *libMPI_Bcast) (void*, int, MPI_Datatype, int, MPI_Comm );
int ( *libMPI_Gather) (void* , int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm);
int ( *libMPI_Gatherv) (void* , int, MPI_Datatype, void*, int *, int *, MPI_Datatype, int, MPI_Comm);
int ( *libMPI_Scatter) (void* , int, MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm);
int ( *libMPI_Scatterv) (void* , int *, int *,  MPI_Datatype, void*, int, MPI_Datatype, int, MPI_Comm);
int ( *libMPI_Allgather) (void* , int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
int ( *libMPI_Allgatherv) (void* , int, MPI_Datatype, void*, int *, int *, MPI_Datatype, MPI_Comm);
int ( *libMPI_Alltoall) (void* , int, MPI_Datatype, void*, int, MPI_Datatype, MPI_Comm);
int ( *libMPI_Alltoallv) (void* , int *, int *, MPI_Datatype, void*, int *, int *, MPI_Datatype, MPI_Comm);
int ( *libMPI_Reduce) (void* , void*, int, MPI_Datatype, MPI_Op, int, MPI_Comm);
int ( *libMPI_Allreduce) (void* , void*, int, MPI_Datatype, MPI_Op, MPI_Comm);
int ( *libMPI_Reduce_scatter) (void* , void*, int *, MPI_Datatype, MPI_Op, MPI_Comm);
int ( *libMPI_Scan) (void* , void*, int, MPI_Datatype, MPI_Op, MPI_Comm );

int ( *libMPI_Get) (void *, int, MPI_Datatype, int, MPI_Aint, int, MPI_Datatype,
			   MPI_Win);
int ( *libMPI_Put) (void *, int, MPI_Datatype, int, MPI_Aint, int, MPI_Datatype,
			   MPI_Win);

int ( *libMPI_Comm_spawn)(char *command,
			  char *argv[],
			  int maxprocs,
			  MPI_Info info,
			  int root,
			  MPI_Comm comm,
			  MPI_Comm *intercomm,
			  int array_of_errcodes[]);

/* fortran bindings */
void (*libmpi_init_)(int*e);
void (*libmpi_init_thread_)(int*, int*, int*);
void (*libmpi_finalize_)(int*);
void (*libmpi_barrier_)(MPI_Comm*, int*);
void (*libmpi_comm_size_)(MPI_Comm*, int*, int*);
void (*libmpi_comm_rank_)(MPI_Comm*, int*, int*);

void (*libmpi_send_)(void*, int*, MPI_Datatype*, int*, int*, int*);
void (*libmpi_recv_)(void*, int*, MPI_Datatype*, int*, int *, MPI_Status *, int*);

void ( *libmpi_sendrecv_) (void *, int, MPI_Datatype,int, int, void *, int, MPI_Datatype, int, int, MPI_Comm, MPI_Status *, int*);
void ( *libmpi_sendrecv_replace_) (void*, int, MPI_Datatype, int, int, int, int, MPI_Comm, MPI_Status *, int*);

void (*libmpi_bsend_)(void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, int*);
void (*libmpi_ssend_)(void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, int*);
void (*libmpi_rsend_)(void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, int*);
void (*libmpi_isend_)(void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request*, int*);
void (*libmpi_ibsend_) (void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request*, int*);
void (*libmpi_issend_)(void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request *, int*);
void (*libmpi_irsend_)(void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request *, int*);
void (*libmpi_irecv_)(void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request *,  int*);

void (*libmpi_wait_)(MPI_Request*, MPI_Status*, int*);
void (*libmpi_test_)(MPI_Request*, int*, MPI_Status*, int*);
void (*libmpi_waitany_) (int*, MPI_Request *, int *, MPI_Status *, int*);
void (*libmpi_testany_) (int*, MPI_Request *, int *, int *, MPI_Status *, int*);
void (*libmpi_waitall_) (int*, MPI_Request *, MPI_Status *, int*);
void (*libmpi_testall_) (int*, MPI_Request *, int *, MPI_Status *, int*);
void (*libmpi_waitsome_) (int*, MPI_Request *, int *, int *, MPI_Status *, int*);
void (*libmpi_testsome_) (int*, MPI_Request *, int *, int *, MPI_Status *, int*);

void ( *libmpi_probe_)( int* source, int* tag, MPI_Comm* comm, MPI_Status *status, int* err );
void ( *libmpi_iprobe_)( int* source, int* tag, MPI_Comm* comm, int *flag, MPI_Status *status, int* err );

void (*libmpi_get_)(void *, int*, MPI_Datatype*, int*, MPI_Aint*, int*, MPI_Datatype*, MPI_Win*, int*);
void (*libmpi_put_)(void *, int*, MPI_Datatype*, int*, MPI_Aint*, int*, MPI_Datatype*, MPI_Win*, int*);

void (*libmpi_bcast_)(void*, int*, MPI_Datatype*, int*, MPI_Comm*, int*);
void (*libmpi_gather_)(void*, int*, MPI_Datatype*, void*, int*, MPI_Datatype*, int*, MPI_Comm*, int*);
void (*libmpi_gatherv_)(void*, int*, MPI_Datatype*, void*, int*, int*, MPI_Datatype*, int*, MPI_Comm*);
void (*libmpi_scatter_)(void*, int*, MPI_Datatype*, void*, int*, MPI_Datatype*, int*, MPI_Comm*, int*);
void (*libmpi_scatterv_)(void*, int*, int*,  MPI_Datatype*, void*, int*, MPI_Datatype*, int*, MPI_Comm*, int*);
void (*libmpi_allgather_)(void*, int*, MPI_Datatype*, void*, int*, MPI_Datatype*, MPI_Comm*, int*);
void (*libmpi_allgatherv_)(void*, int*, MPI_Datatype*, void*, int*, int*, MPI_Datatype*, MPI_Comm*);
void (*libmpi_alltoall_)(void*, int*, MPI_Datatype*, void*, int*, MPI_Datatype*, MPI_Comm*, int*);
void (*libmpi_alltoallv_)(void*, int*, int*, MPI_Datatype*, void*, int*, int*, MPI_Datatype*, MPI_Comm*, int*);
void (*libmpi_reduce_)(void*, void*, int*, MPI_Datatype*, MPI_Op*, int*, MPI_Comm*, int*);
void (*libmpi_allreduce_)(void*, void*, int*, MPI_Datatype*, MPI_Op*, MPI_Comm*, int*);
void (*libmpi_reduce_scatter_)(void*, void*, int*, MPI_Datatype*, MPI_Op*, MPI_Comm*, int*);
void (*libmpi_scan_)(void*, void*, int*, MPI_Datatype*, MPI_Op*, MPI_Comm*, int*);

void (*libmpi_comm_spawn_)(char *command, char **argv, int *maxprocs,
			   MPI_Info *info, int *root, MPI_Comm *comm,
			   MPI_Comm *intercomm, int *array_of_errcodes, int*error);

void ( *libmpi_send_init_) (void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request *, int*);
void ( *libmpi_bsend_init_) (void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request *, int*);
void ( *libmpi_ssend_init_) (void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request *, int*);
void ( *libmpi_rsend_init_) (void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request *, int*);
void ( *libmpi_recv_init_) (void*, int*, MPI_Datatype*, int*, int*, MPI_Comm*, MPI_Request *, int*);
void ( *libmpi_start_) (MPI_Request *, int*);
void ( *libmpi_startall_) (int*, MPI_Request *, int*);



/* Functions that intercept MPI calls
 * Basically each function create an event this the arguments
 * passed to the function.
 * It then call the actual MPI function (using the appropriate
 * callback) with the same args
 */
int MPI_Comm_spawn(char *command,
		   char *argv[],
		   int maxprocs,
		   MPI_Info info,
		   int root,
		   MPI_Comm comm,
		   MPI_Comm *intercomm,
		   int array_of_errcodes[])
{
  /* Instead of running command argv, we have to 
     run 'env LD_PRELOAD=xxx command argv'
     Thus, we have to provide a new argv array
  */

  /* retrieve LD_PRELOAD command set by EZTrace */
  char* ld_preload = getenv(LD_PRELOAD_NAME);
  char* ld_preload_str = NULL;
  int ret = asprintf(&ld_preload_str, "%s=%s", LD_PRELOAD_NAME, ld_preload);

  /* count the number of args */
  int argc = 0;
  if(argv != MPI_ARGV_NULL)
    for(argc=0; argv[argc] != NULL; argc++) { }

  /* create a new argv array */
  int new_argc = argc+3;
  char **new_argv = (char**) malloc(sizeof(char*) * new_argc );

  new_argv[0] = ld_preload_str;
  new_argv[1] = command;
  int i;
  for(i=0; i<argc; i++)
    new_argv[i+2] = argv[i];

  new_argv[i+2]=NULL;

  ret = libMPI_Comm_spawn("env", new_argv, maxprocs, info, root, comm, intercomm, array_of_errcodes);

  /* Now that the processes are launched, tell them our proc_id so that the filenames are not messed up */
  int f_size; 			/* number of children actually created */
  int proc_id_len = strlen(proc_id) + 1;
  int my_pid = getpid();
  MPI_Comm_remote_size(*intercomm, &f_size);

  EZTRACE_EVENT2 (FUT_MPI_SPAWN, my_pid, f_size);

  for(i=0; i<f_size; i++) {
    MPI_Send(&proc_id_len, 1 , MPI_INTEGER, i, 0, *intercomm);
    MPI_Send(proc_id, proc_id_len , MPI_CHAR, i, 0, *intercomm);
    MPI_Send(&my_pid, 1 , MPI_INTEGER, i, 0, *intercomm);
  }

  /* Here, we shall not free ld_preload, since it may modify the environment of the process ! (man getenv) */
  free(new_argv);
  free(ld_preload_str);

  FUNCTION_ENTRY;
  return ret;
}


int MPI_Comm_size(MPI_Comm c, int *s)
{
  return libMPI_Comm_size(c, s);
}

int MPI_Comm_rank(MPI_Comm c, int *r)
{
  return libMPI_Comm_rank(c, r);
}

int MPI_Finalize()
{
  FUNCTION_ENTRY;
  return libMPI_Finalize();
}

/* internal function
 * This function is used by the various MPI_Init* functions (C 
 * and Fortran versions)
 * This function add informations to the trace (rank, etc.)
 * and set the trace filename.
 */
void __mpi_init_generic()
{
  int rank = -1;
  int size = -1;
  int ret;
  static int __mpi_initialized = 0;

  MPI_Comm parentcomm;
  MPI_Comm_get_parent( &parentcomm );

  libMPI_Comm_size(MPI_COMM_WORLD, &size);
  libMPI_Comm_rank(MPI_COMM_WORLD, &rank);

  char* filename=NULL;
  if(parentcomm == MPI_COMM_NULL) {
    /* This process is a 'normal' process (ie. it wasn't spawned) */
    ret = asprintf(&proc_id, "%d", rank);
  } else {
    /* This process was spawned.
     * We have to get the parent process information
     */
    char *father_proc_id;
    int father_proc_id_len = -1;
    int ppid = -1;

    /* Get the parent process id */
    libMPI_Recv(&father_proc_id_len, 1, MPI_INTEGER, 0, 0, parentcomm, MPI_STATUS_IGNORE);
    father_proc_id = (char*) malloc(sizeof(char) * father_proc_id_len);
    libMPI_Recv(father_proc_id, father_proc_id_len, MPI_CHAR, 0, 0, parentcomm, MPI_STATUS_IGNORE);

    libMPI_Recv(&ppid, 1, MPI_INTEGER, 0, 0, parentcomm, MPI_STATUS_IGNORE);

    if(!__mpi_initialized)
      EZTRACE_EVENT2 (FUT_MPI_SPAWNED, ppid, rank);

    ret = asprintf(&proc_id, "%s_%d", father_proc_id, rank);
    free(father_proc_id);
  }

  ret = asprintf(&filename, "eztrace_log_rank_%s", proc_id);
  eztrace_set_filename(filename);

  libMPI_Barrier(MPI_COMM_WORLD);
  if(!__mpi_initialized)
    EZTRACE_EVENT5 (FUT_MPI_INIT, rank, size, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_REQUEST_NULL);

  __mpi_initialized = 1;
}

int MPI_Init_thread (int *argc, char ***argv, int required, int *provided)
{
  int ret = libMPI_Init_thread(argc, argv, required, provided);
  __mpi_init_generic();
  FUNCTION_ENTRY;
  return ret;
}


int MPI_Init(int * argc, char***argv)
{
  int ret = libMPI_Init(argc, argv);
  __mpi_init_generic();
  FUNCTION_ENTRY;
  return ret;
}



void libinit(void) __attribute__ ((constructor));
void libinit(void)
{

  INTERCEPT("MPI_Init_thread", libMPI_Init_thread);
  INTERCEPT("MPI_Init", libMPI_Init);
  INTERCEPT("MPI_Finalize", libMPI_Finalize);
  INTERCEPT("MPI_Barrier", libMPI_Barrier);
  INTERCEPT("MPI_Comm_size", libMPI_Comm_size);
  INTERCEPT("MPI_Comm_rank", libMPI_Comm_rank);

  INTERCEPT("MPI_Send", libMPI_Send);
  INTERCEPT("MPI_Recv", libMPI_Recv);

  INTERCEPT("MPI_Sendrecv", libMPI_Sendrecv);
  INTERCEPT("MPI_Sendrecv_replace", libMPI_Sendrecv_replace);

  INTERCEPT("MPI_Bsend", libMPI_Bsend);
  INTERCEPT("MPI_Ssend", libMPI_Ssend);
  INTERCEPT("MPI_Rsend", libMPI_Rsend);
  INTERCEPT("MPI_Isend", libMPI_Isend);
  INTERCEPT("MPI_Ibsend", libMPI_Ibsend);
  INTERCEPT("MPI_Issend", libMPI_Issend);
  INTERCEPT("MPI_Irsend", libMPI_Irsend);
  INTERCEPT("MPI_Irecv", libMPI_Irecv);

  INTERCEPT("MPI_Wait", libMPI_Wait);
  INTERCEPT("MPI_Waitall", libMPI_Waitall);
  INTERCEPT("MPI_Waitany", libMPI_Waitany);
  INTERCEPT("MPI_Waitsome", libMPI_Waitsome);
  INTERCEPT("MPI_Test", libMPI_Test);
  INTERCEPT("MPI_Testall", libMPI_Testall);
  INTERCEPT("MPI_Testany", libMPI_Testany);
  INTERCEPT("MPI_Testsome", libMPI_Testsome);

  INTERCEPT("MPI_Iprobe", libMPI_Iprobe);
  INTERCEPT("MPI_Probe", libMPI_Probe);

  INTERCEPT("MPI_Get", libMPI_Get);
  INTERCEPT("MPI_Put", libMPI_Put);

  INTERCEPT("MPI_Bcast", libMPI_Bcast);
  INTERCEPT("MPI_Gather", libMPI_Gather);
  INTERCEPT("MPI_Gatherv", libMPI_Gatherv);
  INTERCEPT("MPI_Scatter", libMPI_Scatter);
  INTERCEPT("MPI_Scatterv", libMPI_Scatterv);
  INTERCEPT("MPI_Allgather", libMPI_Allgather);
  INTERCEPT("MPI_Allgatherv", libMPI_Allgatherv);
  INTERCEPT("MPI_Alltoall", libMPI_Alltoall);
  INTERCEPT("MPI_Alltoallv", libMPI_Alltoallv);
  INTERCEPT("MPI_Reduce", libMPI_Reduce);
  INTERCEPT("MPI_Allreduce", libMPI_Allreduce);
  INTERCEPT("MPI_Reduce_scatter", libMPI_Reduce_scatter);
  INTERCEPT("MPI_Scan", libMPI_Scan);

  INTERCEPT("MPI_Comm_spawn", libMPI_Comm_spawn);

  INTERCEPT("MPI_Send_init", libMPI_Send_init);
  INTERCEPT("MPI_Bsend_init", libMPI_Bsend_init);
  INTERCEPT("MPI_Ssend_init", libMPI_Ssend_init);
  INTERCEPT("MPI_Rsend_init", libMPI_Rsend_init);
  INTERCEPT("MPI_Recv_init", libMPI_Recv_init);
  INTERCEPT("MPI_Start", libMPI_Start);
  INTERCEPT("MPI_Startall", libMPI_Startall);

  /* fortran binding */
  INTERCEPT("mpi_init_", libmpi_init_);
  INTERCEPT("mpi_init_thread_", libmpi_init_thread_);
  INTERCEPT("mpi_init_", libmpi_init_);
  INTERCEPT("mpi_finalize_", libmpi_finalize_);
  INTERCEPT("mpi_barrier_", libmpi_barrier_);
  INTERCEPT("mpi_comm_size_", libmpi_comm_size_);
  INTERCEPT("mpi_comm_rank_", libmpi_comm_rank_);

  INTERCEPT("mpi_send_", libmpi_send_);
  INTERCEPT("mpi_recv_", libmpi_recv_);

  INTERCEPT("mpi_sendrecv_", libmpi_sendrecv_);
  INTERCEPT("mpi_sendrecv_replace_", libmpi_sendrecv_replace_);
  INTERCEPT("mpi_bsend_", libmpi_bsend_);
  INTERCEPT("mpi_ssend_", libmpi_ssend_);
  INTERCEPT("mpi_rsend_", libmpi_rsend_);
  INTERCEPT("mpi_isend_", libmpi_isend_);
  INTERCEPT("mpi_ibsend_", libmpi_ibsend_);
  INTERCEPT("mpi_issend_", libmpi_issend_);
  INTERCEPT("mpi_irsend_", libmpi_irsend_);
  INTERCEPT("mpi_irecv_", libmpi_irecv_);

  INTERCEPT("mpi_wait_", libmpi_wait_);
  INTERCEPT("mpi_waitall_", libmpi_waitall_);
  INTERCEPT("mpi_waitany_", libmpi_waitany_);
  INTERCEPT("mpi_waitsome_", libmpi_waitsome_);
  INTERCEPT("mpi_test_", libmpi_test_);
  INTERCEPT("mpi_testall_", libmpi_testall_);
  INTERCEPT("mpi_testany_", libmpi_testany_);
  INTERCEPT("mpi_testsome_", libmpi_testsome_);

  INTERCEPT("mpi_probe_", libmpi_probe_);
  INTERCEPT("mpi_iprobe_", libmpi_iprobe_);

  INTERCEPT("mpi_get_", libmpi_get_);
  INTERCEPT("mpi_put_", libmpi_put_);

  INTERCEPT("mpi_bcast_", libmpi_bcast_);
  INTERCEPT("mpi_gather_", libmpi_gather_);
  INTERCEPT("mpi_gatherv_", libmpi_gatherv_);
  INTERCEPT("mpi_scatter_", libmpi_scatter_);
  INTERCEPT("mpi_scatterv_", libmpi_scatterv_);
  INTERCEPT("mpi_allgather_", libmpi_allgather_);
  INTERCEPT("mpi_allgatherv_", libmpi_allgatherv_);
  INTERCEPT("mpi_alltoall_", libmpi_alltoall_);
  INTERCEPT("mpi_alltoallv_", libmpi_alltoallv_);
  INTERCEPT("mpi_reduce_", libmpi_reduce_);
  INTERCEPT("mpi_allreduce_", libmpi_allreduce_);
  INTERCEPT("mpi_reduce_scatter_", libmpi_reduce_scatter_);
  INTERCEPT("mpi_scan_", libmpi_scan_);

  INTERCEPT("mpi_comm_spawn_", libmpi_comm_spawn_);


  INTERCEPT("mpi_send_init_", libmpi_send_init_);
  INTERCEPT("mpi_bsend_init_", libmpi_bsend_init_);
  INTERCEPT("mpi_ssend_init_", libmpi_ssend_init_);
  INTERCEPT("mpi_rsend_init_", libmpi_rsend_init_);
  INTERCEPT("mpi_recv_init_", libmpi_recv_init_);
  INTERCEPT("mpi_start_", libmpi_start_);
  INTERCEPT("mpi_startall_", libmpi_startall_);


#ifdef EZTRACE_AUTOSTART
  eztrace_start ();
#else
  /* when the application calls eztrace_start(),
   * we need to execute mpi_init_generic
   */
  eztrace_register_init_routine(&__mpi_init_generic);
#endif
}

void libfinalize(void) __attribute__ ((destructor));
void libfinalize(void)
{
  eztrace_stop ();
  free(proc_id);
}
