//********************************************************************************
//
// IterSolvers: A collection of Iterative Solvers
// Written by James Sandham
// 3 March 2015
//
//********************************************************************************

//********************************************************************************
//
// IterSolvers is free software; you can redistribute it and/or modify it under the
// terms of the GNU Lesser General Public License (as published by the Free
// Software Foundation) version 2.1 dated February 1999.
//
//********************************************************************************

#include<stdlib.h>
#include<stdio.h>
#include"pPCG.h"
#include"pSLAF.h"
#include"math.h"
#include<mpi.h>


//****************************************************************************
//
// Preconditioned Conjugate Gradient 
//
//****************************************************************************

#define DEBUG 1


//-------------------------------------------------------------------------------
// preconditioned conjugate gradient
//-------------------------------------------------------------------------------
int ppcg(const int r[], const int c[], const double v[], double x[], const double b[], 
         const int m, const int n, const double tol, const int max_iter, const int id, 
         const int np)
{
  // m - number of rows of A sent to this process
  // n - total number of rows in full A and the size of the vector x and b

  //find out which processes need to send/recv data to/from the current process id
  int *temp = malloc(np*sizeof(temp));
  for(int i=0;i<np;i++){temp[i] = 0;}
  for(int i=0;i<m;i++){
    for(int j=r[i];j<r[i+1];j++){
      temp[c[j]/m] = 1;
    }
  }
  int map_size = 0, index = 0;
  for(int i=0;i<np;i++){if(temp[i]>0){map_size++;}}
  int *proc_map = malloc(map_size*sizeof(proc_map));
  for(int i=0;i<np;i++){
    if(temp[i]>0){
      proc_map[index] = i;
      index++;
    }
  }

  //if(id==0){for(int i=0;i<map_size;i++){printf("\n%d\n",proc_map[i]);}}

  //determine diagonal entries of A matrix for Jacobi preconditioner
  double *diag = malloc(m*sizeof(diag));
  for(int i=0;i<m;i++){
    for(int j=r[i];j<r[i+1];j++){
      if(c[j]==(i+id*m)){  //assumes m is the same accross all processes (fix this later)
        diag[i] = v[j];
        break;
      }
    }
  }  
  

  //res = b-A*x and initial error
  double *res = malloc(m*sizeof(res));  
  //pmatrixVectorProduct(r,c,v,x,res,m,n,id,np);
  pmatrixVectorProduct2(r,c,v,x,res,m,n,id,np,proc_map,map_size);
  double err = error(r,c,v,x,b,m,n,id,np);
  for(int i=0;i<m;i++){res[i] = b[i] - res[i];}
  if(err<tol){return 1;}

  //create z and p vector
  double *z = malloc(m*sizeof(z)); 
  double *p = malloc(m*sizeof(p));

  //z = (M^-1)*r
  for(int i=0;i<m;i++)
    z[i] = res[i]/diag[i];

  //p = z
  for(int i=0;i<m;i++)
    p[i] = z[i];

  int iter = 0, inner_iter = 0;
  while(iter<max_iter && err>tol){
    //z = A*p and alpha = (z,r)/(Ap,p)
    double alpha1 = 0.0, alpha2 = 0.0, alpha = 0.0;
    alpha1 = pdotProduct(z,res,m,n,id,np);
    //pmatrixVectorProduct(r,c,v,p,z,m,n,id,np);
    pmatrixVectorProduct2(r,c,v,p,z,m,n,id,np,proc_map,map_size);
    alpha2 = pdotProduct(z,p,m,n,id,np);
    alpha = alpha1/alpha2;

    //update x and res
    for(int i=0;i<m;i++){
      x[i] += alpha*p[i];
      res[i] -= alpha*z[i];
    }

    //z = (M^-1)*r
    for(int i=0;i<m;i++)
      z[i] = res[i]/diag[i];

    //find beta
    double beta; 
    beta = pdotProduct(z,res,m,n,id,np);
    beta = -beta/alpha1;

    //update p
    for(int i=0;i<m;i++)
      p[i] = z[i] - beta*p[i];

    //calculate error
    if(inner_iter==100){
      err = error(r,c,v,x,b,m,n,id,np);
      inner_iter = 0;
      #if(DEBUG)
      #endif
    }
    iter++;
    inner_iter++;    
  }

  free(proc_map);
  free(diag);
  free(res);
  free(z);
  free(p);
  return iter;
}

