/********************************************************************/
/*             FILE: sched_area.C                                   */
/********************************************************************/
#include "scheduler.h"

/********************************************************************/
/*This is the main area optimizing routine
 */
Datapath *Scheduler::areaschedule(Dfg &flowgraph, library *lib,
				  const float sample_constraint, const float laxity, const int max_clocks_to_explore)
{
  int num_csteps, max_csteps, min_csteps, lastcstep;
  float vdd;
  float current_clk;
  float fastest_sample_period, sample_period;
  int num_explorations = 0;
  Datapath *cur_dp, *best_dp, *tmp_dp;
  Dfg *best_flowgraph = NULL;
  Dfg *initial_flowgraph = new Dfg;

  assert_force(lib);
  //??????????????????????????????????????????????Strange
  //assert_force((sample_constraint > 0.0 && laxity == 0.0) ||
//	       (sample_constraint == 0.0 && laxity >= 1.0));
  //CHAINING IS CURRENTLY NOT IMPLEMENTED
  assert_force(MAX_CHAINING_FACTOR == 1);
  assert(max_clocks_to_explore >= 0);

  vdd = 5.0;
  cout<<"sample_constraint = "<<sample_constraint<<endl;
  cout<<"laxity = "<<laxity<<endl;	
  if(sample_constraint == 0.0) {
    assert(laxity >= 1.0);
    fastest_sample_period = compute_min_sample_period(flowgraph, lib);
    sample_period = fastest_sample_period*laxity;
    cout<<"Set the sample_period as the "<<laxity<<" times fastest_sample_period "<< fastest_sample_period<<endl;

  } else {
    sample_period = sample_constraint;
  }

  initial_flowgraph->copy(flowgraph);

  cout << "*" << endl;
  cur_dp = new Datapath;
  best_dp = NULL;

  max_csteps = compute_max_csteps(flowgraph, lib, sample_period, 5.0);
  min_csteps = compute_min_csteps(flowgraph, lib, sample_period,
				  vdd, MAX_CHAINING_FACTOR);
  cout<<"max csteps = "<<max_csteps<<" min csteps = "<<min_csteps<<endl;
  for(num_csteps = max_csteps; num_csteps >= min_csteps; num_csteps--) {
    //if clock is skippable, skip it
    current_clk = (int)(sample_period/(float)num_csteps);

    //reset the Dfg & Datapath
    //copy initial_flowgraph into flowgraph
    flowgraph.copy(*initial_flowgraph);

    flowgraph.reset_scheduling_info();
    flowgraph.reset_allocation_info();
    delete cur_dp;
    cur_dp = new Datapath;

    cur_dp->set_vdd(vdd);
    cur_dp->set_sample_period(sample_period);
    cur_dp->set_csteps(num_csteps);
#ifdef PHYSICAL    
    cur_dp->set_bitwidth(flowgraph.get_bitwidth());
#endif    
    //map each operation to the fastest module that can perform it
    fastest_map(flowgraph, lib);
	
    //perform an asap schedule - returns the number of csteps needed
    lastcstep = asap_schedule(flowgraph, lib, vdd, current_clk);

    //perform iterative improvement based synthesis only if the initial
    //solution meets the sample_period
    if(lastcstep <= num_csteps) {
      num_explorations++;
      cout << "Iterative improvement synthesis No.: " << num_explorations
	<< ", Vdd: " << vdd << ", Contol Steps: " << num_csteps << endl;
      //perform an one-to-one-allocation to create the Datapath
      initial_allocation(flowgraph, cur_dp, lib);
#ifdef PHYSICAL
      cout<<"Fully parallel result: "<<endl;
      cur_dp->do_floorplan(lib, scm);
#endif      
      //perform an iterative improvement to reduce the area
      iterative_improvement(flowgraph, lib, cur_dp, vdd, current_clk, AREA, scm);
#ifdef PHYSICAL
      iterative_reg_improvement(flowgraph, lib, cur_dp, vdd, current_clk, AREA, scm);
#endif      
    } else {
      cout << "OOPS - cannot meet sample period with Vdd: " << vdd << " control steps:"
	<< num_csteps << " best csteps possible: " << lastcstep << endl;
      continue;  /*WATCH OUT - A CONTINUE IS BEING USED*/
    }

    if(!best_dp) {
      assert_force(best_flowgraph == NULL);
      best_flowgraph = new Dfg;
      best_dp = new Datapath;
      copy_flowgraph_and_dp(flowgraph, *cur_dp, *best_flowgraph, *best_dp, lib);
    } else if(compute_areacost(flowgraph, cur_dp, lib) <
	     compute_areacost(*best_flowgraph, best_dp, lib)) {
	      assert(best_flowgraph != NULL);
	      copy_flowgraph_and_dp(flowgraph, *cur_dp, *best_flowgraph, *best_dp, lib);
    }

    if(max_clocks_to_explore > 0) {
      if(num_explorations == max_clocks_to_explore) {
	cout << "Reached limit on max. clock periods to explore" << endl;
	break; /*BREAK FROM THE FOR EACH CLOCK LOOP*/
      }
    }

  } /*END FOR EACH CANDIDATE csteps*/

  copy_flowgraph_and_dp(*best_flowgraph, *best_dp, flowgraph, *cur_dp, lib);
  delete initial_flowgraph;

  char tmpfilename[MAXSTRLEN];
  //write out Genesis CDFG file
  strcpy(tmpfilename, flowgraph.name);
  strcat(tmpfilename, "_gen.dfg");
  write_genesis_dfg(flowgraph, *cur_dp, tmpfilename);
  strcpy(tmpfilename, flowgraph.name);
  strcat(tmpfilename, "_dfg.vcg");
  flowgraph.print_vcg(tmpfilename);

  //lzhong
  //
  //cur_dp->write_bdnet(flowgraph.name, laxity);

  delete cur_dp;
  //lzhong commented out
#if 0
  //compute the Vdd scaling possible for the obtained area-optimized implementation
  cout << "Supply voltage can be scaled to "
    << compute_vdd(5.0, flowgraph.get_numcsteps(),best_dp->get_csteps())
    << "V " << "(" << flowgraph.get_numcsteps() << " out of "
    << best_dp->get_csteps() << " csteps used)" <<  endl;
#endif
  return(best_dp);
}
/********************************************************************/
/*This routine computes the expected AREA gain for a move of class A. This
 *routine uses the FACT that there is NO CHANGE IN THE FUNCTIONAL UNIT
 *OR REGISTER ALLOCATION due to the move. The gain is equal to the
 *EXPECTED REDUCTION IN THE COST FUNCTION if this move is performed.
 */
float Scheduler::compute_areagain(Datapath *dp, library *lib, Class_a_move &move)
{
  register libelement *libel;
  float retval = 0.0;

  assert(dp && lib);
  assert(move.fu);

  libel = move.old_libelement;
  assert(libel);
  retval += libel->get_area();

  libel = move.new_libelement;
  assert(libel);
  retval -= libel->get_area();

  return(retval);
}
/********************************************************************/
/*This routine computes the expected AREA gain for a move of class B
 *functional unit move. CURRENTLY, this does not include multiplexer
 *or wiring area estimates.
 */
float Scheduler::compute_areagain(Datapath *dp, library *lib, Class_b_fu_move &move)
{
  register libelement *libel;
  float retval = 0.0;

  assert(dp && lib);
  assert( (!move.splitting && move.fu1 && move.fu2 && move.split_operations.is_empty()) ||
	 (move.splitting && move.fu1 && !move.fu2 && !move.split_operations.is_empty()) );
  assert(move.fu1->get_libelement() == move.fu2->get_libelement());

  libel = move.fu1->get_libelement();
  assert(libel);

  //In a splitting move, an instance of libel is replaced by two instances
  //hence, the gain in area is equal to -(area of libel)
  if(move.splitting) {
    retval = -(libel->get_area());
  } else {
    //In a sharing move, two instances of libel are replaced with just one.
    //Hence, the gain in functional unit area is equal to the area of libel.
    retval = libel->get_area();
  }

  //need to add estimated gain in multiplexer area as well

  return(retval);
}
/********************************************************************/
/*This routine computes the expected AREA gain for a move of class B
 *storage unit move. CURRENTLY, this does not include multiplexer
 *or wiring area estimates.
 */
float Scheduler::compute_areagain(Dfg &flowgraph, Datapath *dp, library *lib,
				  Class_b_reg_move &move)
{
  register libelement *libel;
  float retval = 0.0;

  assert(dp && lib);
  assert( (!move.splitting && move.su1 && move.su2 && move.split_variables.is_empty()) ||
	 (move.splitting && move.su1 && !move.su2 && !move.split_variables.is_empty()) );

  //In a splitting move, an instance of libel is replaced by two instances
  //hence, the gain in area is equal to -(area of libel)
  if(move.splitting) {
    retval = -(lib->get_reg_bit_area()*flowgraph.get_bitwidth());
  } else {
    //In a sharing move, two instances of libel are replaced with just one.
    //Hence, the gain in functional unit area is equal to the area of libel.
    retval = lib->get_reg_bit_area()*flowgraph.get_bitwidth();
  }

  //need to add estimated gain in multiplexer area as well

  return(retval);
}
/********************************************************************/
/*This routine computes the value of the cost function for a Datapath
 *that implements the given Dfg. The cost estimation is based on the
 *area estimates in the library. CURRENTLY ONLY FUNCTIONAL UNIT AND
 *REGISTER ESTIMATES ARE USED. NEED TO PROVIDE MULTIPLEXER, BUS, CLOCK,
 *AND INTERCONNECT AND CONTROLLER ESTIMATES AS WELL.
 */

float Scheduler::compute_areacost(Dfg &flowgraph, Datapath *dp, library *lib)
{
  register FUPTR fu;
  register STORPTR reg;
  register Mux *mux;
  int num_inputs;
  float total = 0.0;
  List_iterator<FUPTR> fuscan;
  List_iterator<Storage_unit *> regscan;
  List_iterator<Interconnect_unit *>intconscan;

  assert(lib && dp);
  //functional unit area
  FOR_EACH_LISTNODE(dp->get_functional_units(), fuscan) {
    fu = fuscan.get_item();
    assert(fu);
    total += fu->get_libelement()->get_area();
  }

  //register area
  FOR_EACH_LISTNODE(dp->get_storage_units(), regscan) {
    assert(!strcmp(regscan.get_item()->get_id(),"Register"));
    reg = (STORPTR) regscan.get_item();
    assert(reg);
    total += lib->get_reg_bit_area()*flowgraph.get_bitwidth();
  }

  //Total multiplexer/bus area
  //NOTE: The area of an n-to-1 mux is estimated as (n-1)*(area of 2-to-1 mux)
  FOR_EACH_LISTNODE(dp->get_interconnect_units(), intconscan) {
    mux = (Mux *)intconscan.get_item();
    assert(mux && !strcmp(mux->get_id(), "Mux"));
    num_inputs = mux->get_number_of_nets() - 1;
    assert(num_inputs >= 1);
    total += (num_inputs-1)*lib->get_mux_bit_area()*flowgraph.get_bitwidth();
  }

  //estimate of wiring area

  //estimate of clock network area

  return(total);
}
/********************************************************************/
