/*
 * FILE: chak.c
 */

#include "pretg.h"
#include "cpxdefs.inc"

#define ALPHA 1
#define BETA 1

/*
 * Minimize the latency and register count
 * in the given edge weight graph.
 * Edge weights may be updated.
 */

void minimize_latency(wt_graph, no_vertices)
WT_GRAPHPTR wt_graph;
int no_vertices;
{
  int i, *indeg, *count = (int *)NULL;
  int Orig_no_vertices, *Orig_outdeg;
  int max_wt, fanout, new_wt, reg_count;
  char outfilebuf[MAXSTRLEN];
  FILE *outfile;
  LISTPTR l, next_l;
  EDGEPTR arc;
  double  *cost, *rvalues;

  extern double *call_cplex();

  assert(wt_graph);
  assert(N2V(0) == no_vertices);

  if (Debug == 1)
    print_wt_graph(wt_graph);

  Orig_no_vertices = no_vertices;
  
  /* compute outdeg before fanout modifications, need this
     to compute the cost of each vertex for reg_min */

  Orig_outdeg = (int *)SR_MALLOC(Orig_no_vertices*sizeof(int));
  for (i = 0; i < Orig_no_vertices; i++) 
    Orig_outdeg[i] = 0;
  for (i = 0; i < Orig_no_vertices; i++)
    Orig_outdeg[i] = l_len(wt_graph[i]);

  /* determine the sources, need this to compute count */

  indeg = (int *)SR_MALLOC(2*Orig_no_vertices*sizeof(int));

  for (i = 0; i < 2*Orig_no_vertices; i++) 
    indeg[i] = 0;
  for (i = 0; i < Orig_no_vertices; i++)
    for (l = wt_graph[i]; l != LISTNULL; l = l->next){
      arc = (EDGEPTR)l->o;
      indeg[arc->vertex]++;
    }

  count = dfs_wt_graph(wt_graph, indeg, no_vertices);
  
  /* model fanout appropriately for reg_min */

  for (i = 0; i < Orig_no_vertices; i++){
    max_wt = fanout = 0;
    for (l = wt_graph[i]; l != LISTNULL; l = l->next) {
      arc = (EDGEPTR)l->o;
      if ( arc->vertex < Orig_no_vertices){
	max_wt = MAX(max_wt, arc->wt);
	fanout++;
      }
    }
    assert(fanout == Orig_outdeg[i]);
    if (fanout > 1 ){
      for (l = wt_graph[i]; l != LISTNULL; l = l->next) {
	arc = (EDGEPTR)l->o;
	if ( arc->vertex < Orig_no_vertices) {
	  new_wt = max_wt - arc->wt ;
	  l_append(wt_graph[arc->vertex],
		   (int)edgecreate(new_wt, no_vertices));
	}
      }
      indeg[no_vertices] = Orig_outdeg[i];
      no_vertices++;
    }
  }

  /* count number of registers in the ckt before retiming */

  reg_count = 0;
  for (i = 0; i < Orig_no_vertices; i++) {
    max_wt = 0;
    for (l = wt_graph[i]; l != LISTNULL; l = l->next) {
      arc = (EDGEPTR) l->o;
      if (arc->vertex < Orig_no_vertices) 
	max_wt = MAX(max_wt, arc->wt);
    }
    reg_count += max_wt;
  }
  printf("\tRegister count before %d\n", reg_count);

  /* compute the effective indeg & outdeg for reg_min */

  cost = (double *)SR_MALLOC(no_vertices*sizeof(double));

  for (i = 0; i < no_vertices; i++) 
    cost[i] = 0.0;
  for (i = 0; i < Orig_no_vertices; i++) {
    if (Orig_outdeg[i]) cost[i] += 1.0;
    for (l = wt_graph[i]; l!=LISTNULL; l=l->next) {
      arc = (EDGEPTR)l->o;
      if (arc->vertex < Orig_no_vertices && Orig_outdeg[i] == 1)
	cost[arc->vertex] += -1.0;
    }
  }
  
  for (i = Orig_no_vertices; i < no_vertices; i++) 
    cost[i] += -1.0;

  /*
  rvalues = (double *)SR_MALLOC(no_vertices*sizeof(double));
  for (i = 0; i < no_vertices; i++)
    rvalues[i] = (double)0; */

  rvalues = call_cplex(wt_graph, no_vertices, count, cost); 

  /* Compute new arc weights using the rvalues[] */
  for (i = 0; i < Orig_no_vertices; i++)
    for (l = wt_graph[i]; l != LISTNULL; l = l->next) {
      arc = (EDGEPTR)l->o;
      assert(arc->wt >= 0);
      arc->wt = arc->wt + (int) (rvalues[arc->vertex] - rvalues[i]);
      assert(arc->wt >= 0);
    }

  /* count number of registers in the ckt after retiming */
  reg_count = 0;
  for (i = 0; i < Orig_no_vertices; i++) {
    max_wt = 0;
    for (l = wt_graph[i]; l != LISTNULL; l = l->next) {
      arc = (EDGEPTR) l->o;
      if (arc->vertex < Orig_no_vertices) 
	max_wt = MAX(max_wt, arc->wt);
    }
    reg_count += max_wt;
  }
  printf("\tRegister count after %d\n", reg_count);

  /* print rvalues of input/output nodes to file */

  sprintf(outfilebuf, "%s.stats", Circuit_Basename);
  outfile = sr_fopen(outfilebuf, "w");
  
  fprintf(outfile, "Inputs \n");
  for (l = Inputs; l != LISTNULL; l = l->next) 
    fprintf(outfile, "\t%s \t%d\n", Nodetable[l->o]->name, (int)rvalues[N2V(l->o)]);
  fprintf(outfile, "Outputs \n");
  for (l = Outputs; l != LISTNULL; l = l->next)
    fprintf(outfile, "\t%s \t%d\n", Nodetable[l->o]->name, (int)rvalues[N2V(l->o)]);

  outfile = sr_fclose(outfile);

  /* remove the fanout modifications */
  no_vertices = Orig_no_vertices;
  for (i = 0; i < no_vertices; i++)
    for (l = wt_graph[i]; l != LISTNULL; l = next_l) {
      next_l = l->next;
      arc = (EDGEPTR)l->o;
      if (arc->vertex > no_vertices - 1) {
	l_delete(&(wt_graph[i]), l);
	arc = edgefree(arc);
      }
    }

  if (Debug == 1){
    printf("Final Weight graph:\n");
    print_wt_graph(wt_graph);
  }

  /* free memory. 'rvalues' was allocated in call_cplex() */
  rvalues = (double *) sr_free((char *) rvalues);
  indeg = (int *)sr_free((char *) indeg);
  Orig_outdeg = (int *)sr_free((char *) Orig_outdeg);
  count = (int *) sr_free((char *) count);
  cost = (double *) sr_free((char *) cost);

  return;
}

/* Solves an LP of the form 
   Max      cx 
   sub to   Ar <= w
   whose dual is a network flow problem.
   The netopt option in cplex can be used to
   solve the dual efficiently. The constraint
   matrix A and the wt vector w are specified
   by wt_graph. The objective function c has 
   two components specified by count & cost */

double *call_cplex(wt_graph, no_vertices, count, cost)
WT_GRAPHPTR wt_graph;
int no_vertices, *count;
double *cost;
{
  int i, j, no_arcs = 0;
  LISTPTR l;
  EDGEPTR arc;

  /* variables used by cplex package */
  char *_cplexlp = "_cplex.lp";
  struct cpxlp *lp;
  char *probname = "retminl", *senx;
  int mac = 0, mar = 0, *matbeg, *matcnt, *matind, status, lpstat;
  double *objx, *rhsx, *matval, *bdl, *bdu, *lpx, *lpdualx, lpobj;

  extern struct cpxlp *loadprob();

  extern void print_solution();

  /* NETWORK FLOW SPECIFICATION  */
  
  assert(count);
  assert(cost);

  /* Specify number of rows and columns of the constraint matrix */
  for (i = 0; i < no_vertices; i++)
    for (l = wt_graph[i]; l != LISTNULL; l = l->next)
      no_arcs++;
  mac = no_arcs;
  mar = no_vertices;

  /*Set up obj function. For each arc, cost= arc weight */
  objx = (double *)SR_MALLOC(mac*sizeof(double));
  j = 0;
  for (i = 0; i < no_vertices; i++)
    for (l = wt_graph[i]; l != LISTNULL; l = l->next) {
      arc = (EDGEPTR)l->o;
      assert(arc->wt >= 0);
      objx[j++] = (double)(arc->wt);
    }

  /* Set up Rhs term for each constraint in the constraint matrix */
  rhsx = (double *)SR_MALLOC(mar*sizeof(double));
  senx = (char *)SR_MALLOC(mar*sizeof(char));
  for (i = 0; i < no_vertices; i++) {
    rhsx[i] = (double) ALPHA*cost[i] + BETA*count[i];
    senx[i] = 'E';
  }
  
  /* Set up bounds on each edge */
  bdl = (double *)SR_MALLOC(mac*sizeof(double));
  bdu = (double *)SR_MALLOC(mac*sizeof(double));
  for (j = 0; j < no_arcs; j++) {
    bdl[j] = (double) 0.0;
    bdu[j] = (double)(INFBOUND);
  }

  /* Set up constraint matrix */
  matbeg = (int *)SR_MALLOC(mac*sizeof(int));
  matcnt = (int *)SR_MALLOC(mac*sizeof(int));
  matval = (double *)SR_MALLOC(2*mac*sizeof(double));
  matind = (int *)SR_MALLOC(2*mac*sizeof(int));

  j = 0;
  for (i = 0; i < no_vertices; i++)
    for (l = wt_graph[i]; l != LISTNULL; l = l->next) {
      matbeg[j] = 2*j;
      matcnt[j] = 2;
      matval[2*j] = (double) 1.0;
      matval[2*j+1] = (double) -1.0;
      matind[2*j] = i;
      matind[2*j+1] = ((EDGEPTR)l->o)->vertex;
      j++;
    }

  /* Load the flow problem */
  fprintf(stdout, "\tCPLEX (r=%d,c=%d)..",
               mar, mac); fflush(stdout);
  lp = loadprob(probname, mac, mar, 0, 1, objx, rhsx, senx, matbeg, matcnt,
                matind, matval, bdl, bdu, (double *)NULL, (int *)NULL,
                (int *)NULL, (int *)NULL, (int *)NULL, (int *)NULL,
                (double *)NULL, (char *)NULL, (char *)NULL, (char *)NULL,
                (char *)NULL, (char *)NULL, (char **)NULL, (char *)NULL,
                (char **)NULL, (char *)NULL, (char **)NULL, (char *)NULL, mac,
                mar, 2*mac, 0, 0, (unsigned)0, (unsigned)0, (unsigned)0);

  if (!lp)
    sr_error("ERROR! CPLEX is out of memory..");

  assert(_cplexlp);
  lpwrite(lp, _cplexlp);
  if (Debug == 1) {
    if (lpwrite(lp, _cplexlp))
      sr_error("Could not write the lp to file _cplex.lp!");
  }

  /* Solve the optimization problem */
  status = netopt(lp);
  if (status != -1) {
    fprintf(stdout, "Status = %d\n", status);
    sr_error("CPLEX. NETOPT cannot find optimal solution. ");
  }
  status = optimize(lp);
  if (status)
    sr_error("CPLEX. Could not find a solution.");

  /* Obtain the solution of the problem */
  lpx = (double *)SR_MALLOC(mac*sizeof(double));
  lpdualx = (double *)SR_MALLOC(mar*sizeof(double));
  status = solution(lp, &lpstat, &lpobj, lpx, lpdualx,
                    (double *)NULL, (double *)NULL);

  if (status)
    sr_error("CPLEX. Solution() returns non-zero value.");
  if (lpstat != 1)
    sr_error("CPLEX. Optimal solution not found!");

  fprintf(stdout, "..Done.\n"); fflush(stdout); 

  assert(mar == no_vertices);
  assert(mac == no_arcs);

  if (Debug == 1)
    print_solution(lpobj, lpx, mac, lpdualx, mar);

  /* Clean up all memory used by cplex */
  freeprob(&lp);
  objx = (double *)sr_free((char *)objx);
  rhsx = (double *)sr_free((char *)rhsx);
  senx = (char *)sr_free((char *)senx);
  bdl = (double *)sr_free((char *)bdl);
  bdu = (double *)sr_free((char *)bdu);
  matbeg = (int *)sr_free((char *)matbeg);
  matcnt = (int *)sr_free((char *)matcnt);
  matval = (double *)sr_free((char *)matval);
  matind = (int *)sr_free((char *)matind);
  lpx = (double *)sr_free((char *)lpx);

  return(lpdualx);
}

void print_solution(lpobj, lpx, mac, lpdualx, mar)
double lpobj, *lpx;
int mac;
double *lpdualx;
int mar;
{
  int i;

  fprintf(stdout, "Solution to the flow problem:\n");
  fprintf(stdout, "Optimal Value: %8.2f\n", lpobj);

  if (Verbose) {
    fprintf(stdout, "Primal Values for the arcs:\n");
    for (i = 0; i < mac; i++)
      fprintf(stdout, "%8.2f ", lpx[i]);
    fprintf(stdout, "\n");
  }

  fprintf(stdout, "Dual Values for vertices:\n");
  for (i = 0; i < mar; i++)
    fprintf(stdout, "Vertex %d: %8.2f\n", i, lpdualx[i]);

  return;
}



