#include "iscalp.h"
#define GAIN_THRESHOLD 8.0
Datapath *iScheduler::areaschedule(Dfg &flowgraph, library *lib,
                                  const float sample_constraint, const float laxity, const int max_clocks_to_explore)
{
  int num_csteps, max_csteps, min_csteps, lastcstep;
  float vdd;
  float current_clk;
  float fastest_sample_period, sample_period;
  int num_explorations = 0;
  Datapath *cur_dp, *best_dp, *tmp_dp;
  Dfg *best_flowgraph = NULL;
  Dfg *initial_flowgraph = new Dfg;
  assert_force(lib);
  //CHAINING IS CURRENTLY NOT IMPLEMENTED
  assert_force(MAX_CHAINING_FACTOR == 1);
  assert(max_clocks_to_explore >= 0);

  vdd = 5.0;
  if(sample_constraint == 0.0) {
    assert(laxity >= 1.0);
    fastest_sample_period = compute_min_sample_period(flowgraph, lib);
    sample_period = fastest_sample_period*laxity;
  } else {
    sample_period = sample_constraint;
  }

  initial_flowgraph->copy(flowgraph);

  cout << "*" << endl;
  cur_dp = new Datapath;
  best_dp = NULL;

  max_csteps = compute_max_csteps(flowgraph, lib, sample_period, 5.0);
  min_csteps = compute_min_csteps(flowgraph, lib, sample_period,
                                  vdd, MAX_CHAINING_FACTOR);
  for(num_csteps = max_csteps; num_csteps >= min_csteps; num_csteps--) {
    //if clock is skippable, skip it
    current_clk = (int)(sample_period/(float)num_csteps);

    //reset the Dfg & Datapath
    //copy initial_flowgraph into flowgraph
    flowgraph.copy(*initial_flowgraph);

    flowgraph.reset_scheduling_info();
    flowgraph.reset_allocation_info();
    delete cur_dp;
    cur_dp = new Datapath;

    cur_dp->set_vdd(vdd);
    cur_dp->set_sample_period(sample_period);
    cur_dp->set_csteps(num_csteps);
    cur_dp->set_bitwidth(flowgraph.get_bitwidth());
    //map each operation to the fastest module that can perform it
    fastest_map(flowgraph, lib);

    //perform an asap schedule - returns the number of csteps needed
    lastcstep = asap_schedule(flowgraph, lib, vdd, current_clk);
    //perform iterative improvement based synthesis only if the initial
    //solution meets the sample_period
    if(lastcstep <= num_csteps) {
      num_explorations++;
      cout << "Iterative improvement synthesis No.: " << num_explorations
        << ", Vdd: " << vdd << ", Contol Steps: " << num_csteps << endl;
      //perform an one-to-one-allocation to create the Datapath
      initial_allocation(flowgraph, cur_dp, lib);
      cout<<"Fully parallel result:"<<endl;
      cur_dp->do_floorplan(lib, scm);
      //perform an iterative improvement to reduce the area
      iterative_improvement(flowgraph, lib, cur_dp, vdd, current_clk, AREA, scm);
      iterative_reg_improvement(flowgraph, lib, cur_dp, vdd, current_clk, AREA, scm);
    } else {
      cout << "OOPS - cannot meet sample period with Vdd: " << vdd << " control steps:"
        << num_csteps << " best csteps possible: " << lastcstep << endl;
      continue;  //WATCH OUT - A CONTINUE IS BEING USED
    }

    if(!best_dp) {
      assert_force(best_flowgraph == NULL);
      best_flowgraph = new Dfg;
      best_dp = new Datapath;
      copy_flowgraph_and_dp(flowgraph, *cur_dp, *best_flowgraph, *best_dp, lib);
    } else
        if(cur_dp->get_area_after_floorplan()<best_dp->get_area_after_floorplan())
            {
      assert(best_flowgraph != NULL);
      copy_flowgraph_and_dp(flowgraph, *cur_dp, *best_flowgraph, *best_dp, lib);
    }

    if(max_clocks_to_explore > 0) {
      if(num_explorations == max_clocks_to_explore) {
        cout << "Reached limit on max. clock periods to explore" << endl;
        break; //BREAK FROM THE FOR EACH CLOCK LOOP
      }
    }

  } //END FOR EACH CANDIDATE csteps

  copy_flowgraph_and_dp(*best_flowgraph, *best_dp, flowgraph, *cur_dp, lib);
  delete initial_flowgraph;

  char tmpfilename[MAXSTRLEN];
  //write out Genesis CDFG file
  strcpy(tmpfilename, flowgraph.name);
  strcat(tmpfilename, "_gen.dfg");
  write_genesis_dfg(flowgraph, *cur_dp, tmpfilename);
  strcpy(tmpfilename, flowgraph.name);
  strcat(tmpfilename, "_dfg.vcg");
  flowgraph.print_vcg(tmpfilename);

  //lzhong
  //
  //cur_dp->write_bdnet(flowgraph.name, laxity);
delete cur_dp;

  return(best_dp);
}

/********************************************************************/
/*This is the main low-power scheduling routine.
 */
Datapath *iScheduler::lpschedule(Dfg &flowgraph, library *lib,
                                const float sample_constraint, const float laxity,
                                Scm *scms, const Boolean use_retiming,
                                char *vec_filename, const int max_synthesis_iterations)
{
  float current_vdd, min_vdd, max_vdd, cur_cost, best_cost;
  int num_csteps, max_csteps, min_csteps, lastcstep;
  float previous_clk, current_clk;
  float fastest_sample_period, sample_period;
  int num_explorations = 0;
  Datapath *cur_dp, *best_dp, *tmp_dp;
  Dfg *best_flowgraph = NULL;
  Dfg *initial_flowgraph = new Dfg;
  Retpipe rp;
  Schalloc_info best_info(flowgraph.numnodes(), flowgraph.numedges());
  char tmpfilename[MAXSTRLEN];

  assert_force(lib);
  assert_force(scms);
  assert_force((sample_constraint > 0.0 && laxity == 0.0) ||
               (sample_constraint == 0.0 && laxity >= 1.0));
  //CHAINING IS CURRENTLY NOT IMPLEMENTED
  assert_force(MAX_CHAINING_FACTOR == 1);
  assert(max_synthesis_iterations >= 0);

  max_vdd = 5.0;
  cout<<"sample_constraint = "<<sample_constraint<<endl;
  cout<<"laxity = "<<laxity<<endl;	
  fastest_sample_period = compute_min_sample_period(flowgraph, lib);
  if(sample_constraint == 0.0) {
  	sample_period = fastest_sample_period*laxity;
  	cout<<"Set the sample_period as the "<<laxity<<" times fastest_sample_period "
		<< fastest_sample_period<<endl;
  } else {
  	sample_period = sample_constraint;
  }
  min_vdd = 1.0;

  cout << "*" << endl;
  cur_dp = best_dp = NULL;

  initial_flowgraph->copy(flowgraph);
  current_vdd = 5.0;
  //current_vdd = 1.8;
  //for(current_vdd = min_vdd; current_vdd <= max_vdd;
  //    current_vdd = current_vdd+0.1) {
  {  
  //if vdd is skippable, skip it
    //if(vdd_can_be_pruned()) continue;

  	max_csteps = compute_max_csteps(*initial_flowgraph, lib, sample_period, current_vdd);
  	min_csteps = compute_min_csteps(*initial_flowgraph, lib, sample_period,
                                    current_vdd, MAX_CHAINING_FACTOR);
  	previous_clk = 0.0;
  	for(num_csteps = max_csteps; num_csteps >= min_csteps; num_csteps--) {
      		current_clk = (int)(sample_period/(float)num_csteps);

      		//each time we start off with the initial_flowgraph
      		flowgraph.copy(*initial_flowgraph);

      		//reset the Dfg & Datapath
     		flowgraph.reset_scheduling_info();
      		flowgraph.reset_allocation_info();
      		if(cur_dp) delete cur_dp;
      			cur_dp = new Datapath;
      		cur_dp->set_vdd(current_vdd);
      		cur_dp->set_sample_period(sample_period);
      		cur_dp->set_csteps(num_csteps);
      		cur_dp->set_bitwidth(flowgraph.get_bitwidth());
	    
      		//map each operation to the fastest module that can perform it
      		fastest_map(flowgraph, lib);

      		//perform an asap schedule
      		lastcstep = asap_schedule(flowgraph, lib, current_vdd, current_clk);

      		//If sample period constraint is not satisfied, attempt pipelining to
     		 //meet the sample period constraint
      		//NOTE: flowgraph is updated to reflect the new positions of delays
      		if(use_retiming && lastcstep > num_csteps) {
        		if(rp.retpipe_flowgraph(flowgraph, num_csteps)) {
          			//perform an ASAP schedule and update the value of num_csteps
          			lastcstep = asap_schedule(flowgraph, lib, current_vdd, current_clk);
          			//CDFG structure has changed ==> need to recompute SCMs
          			if(lastcstep <= num_csteps) {
            				scms->extract_scmatrices(flowgraph, lib, vec_filename);
          			}
        		}
      		}

      		//perform iterative improvement based synthesis only if the initial
      		//solution meets the sample_period
      		if(lastcstep <= num_csteps) {
        		//num_explorations++;
        		cout << "Iterative improvement synthesis No.: " << num_explorations
         		 << ", Vdd: " << current_vdd << ", Contol Steps: " << num_csteps << endl;
        		//perform an one-to-one-allocation to create the Datapath
        		initial_allocation(flowgraph, cur_dp, lib);
			cout<<"Fully parallel result:"<<endl;
			cur_dp->do_floorplan(lib, scms);
			cur_dp->compute_sccost(lib, scms);
			//07112002

        		//perform an iterative improvement to reduce the switched
        		//capacitance
        		iterative_improvement(flowgraph, lib, cur_dp, current_vdd, 
					current_clk, POWER, scms);
                        iterative_reg_improvement(flowgraph, lib, cur_dp, current_vdd,
		                                        current_clk, POWER, scms);
      		} else {
        		cout << "OOPS - cannot meet constraint with Vdd: " 
				<< current_vdd << " control steps:"
          			<< num_csteps << " best csteps possible: " << lastcstep << endl;
       			continue;  /*WATCH OUT - A CONTINUE IS BEING USED*/
      		}// end of iterative improvement for different num_csteps
		
      		cur_dp->do_floorplan(lib, scms);
      		cur_cost = cur_dp->compute_sccost(lib,scms);
     		cur_cost = cur_dp->get_sc_after_floorplan();
      		cout << "Cost (Vdd = " << current_vdd << ", csteps = " << num_csteps
           		<< ") = " << cur_cost << endl;
      		if(!best_dp) {
        		assert(!best_flowgraph);
        		best_flowgraph = new Dfg;
        		best_dp = new Datapath;
        		copy_flowgraph_and_dp(flowgraph, *cur_dp, *best_flowgraph, *best_dp, lib);
         		check_dfg_and_datapath(*best_flowgraph, *best_dp);
        		best_cost = cur_cost;
      		} else if( cur_cost < best_cost) {
        		//copying cur_dp to best_dp is done by simply EXCHANGING pointers
			cout<<"Updated best cost is "<<cur_cost<<endl;
       			copy_flowgraph_and_dp(flowgraph, *cur_dp, *best_flowgraph, *best_dp, lib);
       			check_dfg_and_datapath(*best_flowgraph, *best_dp);
        		best_cost = cur_cost;
      		}
/*
      		if(max_synthesis_iterations > 0) {
        		if(num_explorations == max_synthesis_iterations) {
          			cout << "Reached limit on max. synthesis iterations" << endl;
          			break; //BREAK FROM THE FOR EACH CLOCK LOOP
        		}
      		}
		*/
    	} /*END FOR EACH CANDIDATE csteps*/
  } /*END FOR EACH CANDIDATE Vdd*/
  copy_flowgraph_and_dp(*best_flowgraph, *best_dp, flowgraph, *cur_dp, lib);
  check_dfg_and_datapath(flowgraph, *cur_dp);

  //write out Genesis CDFG file
  strcpy(tmpfilename, flowgraph.name);
  strcat(tmpfilename, "_gen.dfg");
  write_genesis_dfg(flowgraph, *cur_dp, tmpfilename);
  strcpy(tmpfilename, flowgraph.name);
  strcat(tmpfilename, "_dfg.vcg");
  flowgraph.print_vcg(tmpfilename);

  assert(best_dp);
  delete best_dp;
  assert(best_flowgraph);
  delete best_flowgraph;
  assert(initial_flowgraph);
  delete initial_flowgraph;

  return(cur_dp);
}
/**********************************************************************************/
void iScheduler::iterative_improvement(Dfg &flowgraph, library *lib, Datapath * &dp,
                             const float current_vdd, const float current_clk,
                             const objective &obj, Scm *scms)
{
  register int i;
  Boolean moves_left;
  Boolean improvement = T;
  int pass = 0;
  Datapath *cur_dp;
  Dfg *cur_flowgraph;
  Schalloc_info cur_dfg_info(flowgraph.numnodes(),flowgraph.numedges());
  Array<Boolean> nodes_visited(flowgraph.numnodes());
  Array<Boolean> edges_visited(flowgraph.numedges());
  float best_cost,cur_cost;

  assert(lib && dp && current_vdd > 0.0 && current_clk > 0.0);
  assert((obj == AREA || obj == POWER) && scms);


  //flowgraph and dp store the best solution
  //cur_flowgraph and cur_dp store the current solution
  cur_flowgraph = new Dfg;
  cur_dp = new Datapath;

  while(improvement && pass < MAX_NUM_PASSES){
    pass++;
    improvement = F;

    //each pass starts with the best solution seen thus far
    copy_flowgraph_and_dp(flowgraph, *dp, *cur_flowgraph, *cur_dp, lib);
	check_dfg_and_datapath(*cur_flowgraph, *cur_dp);

    nodes_visited.resize(cur_flowgraph->numnodes());
    nodes_visited.reset(F);
    edges_visited.resize(cur_flowgraph->numedges());
    edges_visited.reset(F);

    cur_dfg_info.extract_info(*cur_flowgraph);

    if(obj == POWER){
    	best_cost = cur_dp->get_sc_after_floorplan();
	//best_cost = compute_sccost(cur_dp, lib, scms);
    } else {
        best_cost=cur_dp->get_area_after_floorplan();
    }

    for(i = 0; i < MAX_NUM_MOVES_PER_PASS; i++) {
          //One move is generated and implemented on the copy of cur_dp.
          moves_left = iScheduler::generate_move(*cur_flowgraph, lib, cur_dp,
		      cur_dfg_info, nodes_visited, edges_visited, scms, obj);
          if(!moves_left) {
                 break;    //BREAK FROM THE FOR LOOP
	  }

    }
    //I moved the cost evaluation out of the loop.
    //Do RTL floorplanning to incorporate the physical level information
    cur_dp->do_floorplan(lib, scms);

    if(obj == POWER){
         cur_cost = cur_dp->compute_sccost(lib, scms);
    } else {
       	 assert(obj == AREA);
	 cur_cost = cur_dp->get_area_after_floorplan();
    }
   //if the current solution is better
   // than the previous best, update the best solution
   if(cur_cost < best_cost) {
       	 cur_flowgraph->copy_schalloc_info(cur_dfg_info);
       	 copy_flowgraph_and_dp(*cur_flowgraph, *cur_dp,
		 flowgraph, *dp, lib);
	 check_dfg_and_datapath(flowgraph, *dp);
       	 best_cost = cur_cost;
       	 improvement = T;
    }
  } /*END WHILE*/

  delete cur_flowgraph;
  delete cur_dp;

  return;
}
Boolean iScheduler::generate_move(Dfg &flowgraph, library *lib, Datapath *dp,
                      Schalloc_info &cur_dfg_info, Array<Boolean> &nodes_visited,
                      Array<Boolean> &edges_visited,
                      Scm *scms, const objective obj)
{
  float best_gain;
  Boolean better_b_fu_move_found(F), check(F);
  Class_b_fu_move best_b_fu_move;

  assert(lib && dp);

  best_gain = -HUGEFLOAT;
  better_b_fu_move_found = iScheduler::find_best_class_b_fu_move(flowgraph, lib, dp, cur_dfg_info,
           nodes_visited, scms, obj, best_b_fu_move, best_gain);
 if(better_b_fu_move_found)
 {
        check = reschedule(flowgraph, lib, dp, cur_dfg_info, best_b_fu_move, T);
        assert_force(check);
        cout<<"Fu Merged2"<<endl;
        implement_move(best_b_fu_move, dp, flowgraph, lib,
                        cur_dfg_info, nodes_visited);
  } 
  return(better_b_fu_move_found);
}

/**********************************************************************************/
void iScheduler::iterative_reg_improvement(Dfg &flowgraph, library *lib, Datapath * &dp,
                             const float current_vdd, const float current_clk,
                             const objective &obj, Scm *scms)
{
  register int i;
  Boolean moves_left;
  Boolean improvement = T;
  int pass = 0;
  Datapath *cur_dp;
  Dfg *cur_flowgraph;
  Schalloc_info cur_dfg_info(flowgraph.numnodes(),flowgraph.numedges());
  Array<Boolean> nodes_visited(flowgraph.numnodes());
  Array<Boolean> edges_visited(flowgraph.numedges());
  float best_cost,cur_cost;

  assert(lib && dp && current_vdd > 0.0 && current_clk > 0.0);
  assert((obj == AREA || obj == POWER) && scms);


  //flowgraph and dp store the best solution
  //cur_flowgraph and cur_dp store the current solution
  cur_flowgraph = new Dfg;
  cur_dp = new Datapath;

  while(improvement && pass < MAX_NUM_PASSES){
    pass++;
    improvement = F;

    //each pass starts with the best solution seen thus far
    copy_flowgraph_and_dp(flowgraph, *dp, *cur_flowgraph, *cur_dp, lib);
        check_dfg_and_datapath(*cur_flowgraph, *cur_dp);

    nodes_visited.resize(cur_flowgraph->numnodes());
    nodes_visited.reset(F);
    edges_visited.resize(cur_flowgraph->numedges());
    edges_visited.reset(F);

    cur_dfg_info.extract_info(*cur_flowgraph);

    if(obj == POWER){
        best_cost = cur_dp->get_sc_after_floorplan();
        //best_cost = compute_sccost(cur_dp, lib, scms);
    } else {
        best_cost=cur_dp->get_area_after_floorplan();
    }
   for(i = 0; i < MAX_NUM_MOVES_PER_PASS; i++) {
          //One move is generated and implemented on the copy of cur_dp.
          moves_left = iScheduler::generate_reg_move(*cur_flowgraph, lib, cur_dp,
                      cur_dfg_info, nodes_visited, edges_visited, scms, obj);
          if(!moves_left) {
                 //cout<<"No move left"<<endl;
                 break;    //BREAK FROM THE FOR LOOP
          }

    }
    cur_dp->do_floorplan(lib, scms);
    if(obj == POWER){
          cur_cost = cur_dp->compute_sccost(lib, scms);
     } else {
          assert(obj == AREA);
          cur_cost = cur_dp->get_area_after_floorplan();
     }
     if(cur_cost < best_cost) {
    	cur_flowgraph->copy_schalloc_info(cur_dfg_info);
    	copy_flowgraph_and_dp(*cur_flowgraph, *cur_dp,flowgraph, *dp, lib);
    	check_dfg_and_datapath(flowgraph, *dp);
    	best_cost = cur_cost;
    	improvement = T;
     }
  } /*END WHILE*/

  delete cur_flowgraph;
  delete cur_dp;

  return;
}
Boolean iScheduler::generate_reg_move(Dfg &flowgraph, library *lib, Datapath *dp,
                      Schalloc_info &cur_dfg_info, Array<Boolean> &nodes_visited,
                      Array<Boolean> &edges_visited,
                      Scm *scms, const objective obj)
{
  float best_gain;
  Boolean better_b_reg_move_found, check(F);
  Class_b_reg_move best_b_reg_move;

  assert(lib && dp);

  best_gain = -HUGEFLOAT;

  better_b_reg_move_found = iScheduler::find_best_class_b_reg_move(flowgraph, lib,
                  dp, cur_dfg_info, edges_visited, scms, obj, best_b_reg_move, best_gain);
 if (better_b_reg_move_found) {
    implement_move(best_b_reg_move, dp, flowgraph, lib, cur_dfg_info, edges_visited);
 }
  return(better_b_reg_move_found);
}

///////////////////////////////////////////////////////
// 03312002 Communication sensitive binding
/*
float iScheduler::communication_gain(Datapath *dp, Class_b_fu_move &move, Scm *scms, library *lib)
{	float retval(0.0);
	List_iterator<FUPTR> fuscan;
	//gain from wire (communication cost)
	int index1(-1), index2(-1), index(0);
	assert(move.fu1!=move.fu2);
	FOR_EACH_LISTNODE(dp->get_functional_units(), fuscan) {
		if(move.fu1==fuscan.get_item()){
			index1 = index;
		} else if(move.fu2==fuscan.get_item()) {
			index2 = index;
		}
		index++;
	}
	assert(index1>=0&&index2>=0);
	Register reg_temp1(NULL), reg_temp2(NULL);
	vector<int> reg_list1, reg_list2;
	List_iterator<NODEPTR> nodescan, nodescan2;
	NODEPTR node;
	List_iterator<EDGEPTR> edgescan;
	Boolean mark = F;
	FOR_EACH_LISTNODE(move.fu1->get_operations(), nodescan) {
		node = nodescan.get_item();
		FOR_EACH_LISTNODE(move.fu2->get_operations(), nodescan2) {
			FOR_EACH_LISTNODE(
			nodescan2.get_item()->get_output_edges(), edgescan) {
				if(node->get_input_edges().find(
				edgescan.get_item())){
						mark = T;
						break;
				}
			}
			if(mark==T)
				break;
		}
		if(mark==T)
			break;
	}
	if(mark==T){
		dp->get_fu_output(reg_temp1, reg_list1, move.fu1);
		retval = scms->get_total_wiresw(reg_temp1.get_variables());
	}
	mark = F;
        FOR_EACH_LISTNODE(move.fu2->get_operations(), nodescan) {
                node = nodescan.get_item();
                FOR_EACH_LISTNODE(move.fu1->get_operations(), nodescan2) {
                        FOR_EACH_LISTNODE(
                        nodescan2.get_item()->get_output_edges(), edgescan) {
                                if(node->get_input_edges().find(
                                edgescan.get_item())){
                                                mark = T;
                                                break;
                                }
                        }
                        if(mark==T)
                                break;
                }
                if(mark==T)
                        break;
        }
        if(mark==T){
                dp->get_fu_output(reg_temp2, reg_list2, move.fu2);
                retval += scms->get_total_wiresw(reg_temp2.get_variables());
        }
	float area1 = move.fu1->get_libelement()->get_area();
	float area2 = move.fu2->get_libelement()->get_area();
	retval = retval*((float)sqrt(area1)+(float)sqrt(area2))*
		TECH_SCALE * lib->get_lamda_scale_down()*
		//There was a bug 07252002
		//0.5*WIRE_LENGTH_SCALE*lib->get_wire_scale_down()/lib->get_gate_scale_down()/LIB_CLK/FROM_35_to_18;	
		0.5*WIRE_LENGTH_SCALE*lib->get_wire_scale_down()/lib->get_gate_scale_down()/LIB_CLK;	
	//cout<<" Wire gain = "<<retval<<"  Gate gain = "<<Scheduler::compute_scgain(dp, move, scms)<<endl;
	
	return retval;

}
*/
float iScheduler::communication_gain(Datapath *dp, Class_b_fu_move &move, Scm *scms, library *lib)
{       float retval(0.0);
        List_iterator<FUPTR> fuscan;
        //gain from wire (communication cost)
        int index1(-1), index2(-1), index(0);
        assert(move.fu1!=move.fu2);
        FOR_EACH_LISTNODE(dp->get_functional_units(), fuscan) {
                if(move.fu1==fuscan.get_item()){
                        index1 = index;
                } else if(move.fu2==fuscan.get_item()) {
                        index2 = index;
                }
                index++;
        }
        assert(index1>=0&&index2>=0);
        Register reg_temp1(NULL), reg_temp2(NULL);
        vector<int> reg_list1_in, reg_list1_out, reg_list2_in, reg_list2_out;
        dp->get_fu_input(reg_list1_in, move.fu1);
        dp->get_fu_input(reg_list2_in, move.fu2);
        dp->get_fu_output(reg_temp1, reg_list1_out, move.fu1);
        dp->get_fu_output(reg_temp2, reg_list2_out, move.fu2);
        int count = 0;
        for( int i =0; i< reg_list1_in.size(); i++) {
                for( int j = 0; j<reg_list2_out.size(); j++) {
                        if(reg_list1_in[i]==reg_list2_out[j])
                                count++;
                }
        }
        for( int i =0; i<reg_list2_in.size(); i++) {
                for( int j = 0; j<reg_list1_out.size(); j++) {
                        if(reg_list2_in[i]==reg_list1_out[j])
                                count++;
                }
        }
        retval = scms->get_total_wiresw(reg_temp1.get_variables());
        retval += scms->get_total_wiresw(reg_temp2.get_variables());
        retval *=(float)count;
        float area1 = move.fu1->get_libelement()->get_area();
        float area2 = move.fu2->get_libelement()->get_area();
        retval = retval*((float)sqrt(area1)+(float)sqrt(area2))*
                TECH_SCALE * lib->get_lamda_scale_down()*
                0.5*WIRE_LENGTH_SCALE*lib->get_wire_scale_down()/lib->get_gate_scale_down()/LIB_CLK;
        return retval;
}
//communication_gain for registers
float iScheduler::communication_gain(Datapath *dp, Class_b_reg_move &move, Scm *scms, library *lib)
{       float retval(0.0);
        List_iterator<Storage_unit *> suscan;
        //gain from wire (communication cost)
        int index1(-1), index2(-1), index(0);
        assert(move.su1!=move.su2);
        FOR_EACH_LISTNODE(dp->get_storage_units(), suscan) {
                if(move.su1==suscan.get_item()){
                        index1 = index;
                } else if(move.su2==suscan.get_item()) {
                        index2 = index;
                }
                index++;
        }
        assert(index1>=0&&index2>=0);
        Register reg_temp1(NULL), reg_temp2(NULL);
        vector<int> fu_list1_in, fu_list1_out, fu_list2_in, fu_list2_out;
	dp->get_reg_input(fu_list1_in, move.su1);
	dp->get_reg_input(fu_list2_in, move.su2);
	dp->get_reg_output(reg_temp1, fu_list1_out, move.su1);
	dp->get_reg_output(reg_temp2, fu_list2_out, move.su2);
	int count = 0;
	for( int i = 0; i<fu_list1_in.size(); i++) {
		for( int j = 0; j<fu_list2_out.size(); j++) {
			if(fu_list1_in[i]==fu_list2_out[j])
				count++;
		}
	}
        for( int i = 0; i<fu_list2_in.size(); i++) {
                for( int j = 0; j<fu_list1_out.size(); j++) {
                        if(fu_list2_in[i]==fu_list1_out[j])
                                count++;
                }               
        }  	
        retval = scms->get_total_wiresw(reg_temp1.get_variables());
        retval += scms->get_total_wiresw(reg_temp2.get_variables());
	retval *=(float)count;				

        retval =retval*((float)sqrt(REG_AREA_PER_BIT*dp->get_bitwidth()))
		*TECH_SCALE * lib->get_lamda_scale_down()*
                //There was a bug 07252002
                //0.5*WIRE_LENGTH_SCALE*lib->get_wire_scale_down()/lib->get_gate_scale_down()/LIB_CLK/FROM_35_to_18;    
                0.5*WIRE_LENGTH_SCALE*lib->get_wire_scale_down()/lib->get_gate_scale_down()/LIB_CLK;
        //cout<<" Wire gain = "<<retval<<"  Gate gain = "<<Scheduler::compute_scgain(dp, move, scms)<<endl;

        return retval;

}
//Neighborhood sensitive binding
float iScheduler::neighborhood_crowd_gain(Datapath *dp, Class_b_fu_move &move, library *lib)
{
	set<FUPTR, fu_set_ltstr> neighbors;
	set<FUPTR, fu_set_ltstr>::iterator  iter;
	move.fu1->compute_nc();
	const set<FUPTR, fu_set_ltstr> n1 = move.fu1->get_neighbors();
	move.fu2->compute_nc();
	const set<FUPTR, fu_set_ltstr> n2 = move.fu2->get_neighbors();
	set_intersection(n1.begin(), n1.end(), n2.begin(), n2.end(),
				inserter(neighbors, neighbors.begin()));
	 
	float self_area = move.fu1->get_libelement()->get_area();
	self_area = (float)sqrt(self_area);
	
	float retval = 0.0;
	
	iter = neighbors.begin();
	for(;iter!=neighbors.end();iter++) {
		float area = (*iter)->get_libelement()->get_area();
		area = (float)sqrt(area);		
		retval+=(self_area/area);
		retval+=(area/self_area);
	}
	return retval;
}
float iScheduler::neighborhood_crowd_gain(Datapath *dp, Class_b_reg_move &move, library *lib)
{
        set<FUPTR, fu_set_ltstr> neighbors;
        set<FUPTR, fu_set_ltstr>::iterator  iter;
	float self_area =  REG_AREA_PER_BIT * dp->get_bitwidth();
        move.su1->compute_nc(self_area);
        const set<FUPTR, fu_set_ltstr> n1 = move.su1->get_neighbors();
        move.su2->compute_nc(self_area);
        const set<FUPTR, fu_set_ltstr> n2 = move.su2->get_neighbors();
        set_intersection(n1.begin(), n1.end(), n2.begin(), n2.end(),
                                inserter(neighbors, neighbors.begin()));

        self_area = (float)sqrt(self_area);

        float retval = 0.0;

        iter = neighbors.begin();
        for(;iter!=neighbors.end();iter++) {
                float area = (*iter)->get_libelement()->get_area();
                area = (float)sqrt(area);
                retval+=(self_area/area);
                retval +=(area/self_area);
        }
        return retval;
}

///////////////////////////////////////////////////
#define NC_WEIGHT 0.002 
#define COMM_WEIGHT 150.0
float iScheduler::compute_scgain(Datapath *dp, Class_b_fu_move &move, Scm *scms, library *lib)
{
	//Switching reduction in the two fus.
	float retval = Scheduler::compute_scgain(dp, move, scms);
	//Switching reduction in the communication between the two fus.
	retval += (COMM_WEIGHT*communication_gain(dp, move, scms, lib));
	//Neighborhood crowd reduction due to the move.
	retval +=(NC_WEIGHT*neighborhood_crowd_gain(dp, move, lib));
	return retval;
}
float iScheduler::compute_scgain(Datapath *dp, Class_b_reg_move &move, Scm *scms, library *lib)
{
        //Switching reduction in the two fus.
        float retval = Scheduler::compute_scgain(dp, move, scms);
        //Switching reduction in the communication between the two fus.
        retval += (COMM_WEIGHT*communication_gain(dp, move, scms, lib));
       // Neighborhood crowd reduction due to the move.
        retval +=(NC_WEIGHT*neighborhood_crowd_gain(dp, move, lib));
        return retval;
}

////////////////////////////////////////////////////
Boolean iScheduler::find_best_class_b_fu_move(Dfg &flowgraph, library *lib, Datapath *dp,
                       Schalloc_info &cur_dfg_info, Array<Boolean> &nodes_visited,
                       Scm *scms, const objective obj, Class_b_fu_move &best_move,
                       float &best_gain)
{
  List_ar<Operator> operlist;
  List_iterator<FUPTR> fuscan1, fuscan2;
  List_iterator<NODEPTR> nodescan;
  FUPTR fu1, fu2;
  NODEPTR node;
  Boolean check, found;
  Class_b_fu_move cur_b_move;
  float cur_gain;

  assert(lib && dp);
  found = F;
  FOR_EACH_LISTNODE(dp->get_functional_units(), fuscan1) {
    fu1 = fuscan1.get_item();
    assert(fu1);
    check = F;
    FOR_EACH_LISTNODE(fu1->get_operations(), nodescan) {
      node = nodescan.get_item();
      assert(node);
      if(!nodes_visited[node->get_address()]) {
        check = T;
      }
    }
    if(check == F) {
      continue; //WATCH OUT - CONTINUE WITH THE INNER FOR LOOP
    }

    FOR_EACH_LISTNODE_REVERSE(dp->get_functional_units(), fuscan2) {
      fu2 = fuscan2.get_item();
      assert(fu2);

      if(fu1 == fu2) {
        continue; //WATCH OUT - CONTINUE WITH THE INNER FOR LOOP
      }

      check = F;
      FOR_EACH_LISTNODE(fu2->get_operations(), nodescan) {
        node = nodescan.get_item();
        assert(node);
     if(!nodes_visited[node->get_address()]) {
          check = T;
        }
      }
      if(check == F) {
        continue; //WATCH OUT - CONTINUE WITH THE INNER FOR LOOP
      }

      //A class B move should not involve a change in the library template
      if(fu1->get_libelement() != fu2->get_libelement()) {
        continue;
      }

      cur_b_move.splitting = F;
      cur_b_move.fu1 = fu1;
      cur_b_move.fu2 = fu2;
      cur_b_move.split_operations.clear();
      switch(obj) {
      case AREA:
        cur_gain = compute_areagain(dp, lib, cur_b_move);
        break;
      case POWER:
        cur_gain = compute_scgain(dp, cur_b_move, scms, lib);
        break;
      default:
        assert_force(0);
        break;
      }

      if(cur_gain > best_gain) {
        if(reschedule(flowgraph, lib, dp, cur_dfg_info, cur_b_move, F)) {
          best_gain = cur_gain;
          best_move.copy(cur_b_move);
          found = T;
        }
      }
    } //END FOR EACH FUNCTIONAL UNIT
  } //END FOR EACH FUNCTIONAL UNIT

return(found);
}
Boolean iScheduler::find_best_class_b_reg_move(Dfg &flowgraph, library *lib,
                             Datapath *dp, Schalloc_info &cur_dfg_info,
                             Array<Boolean> & visited_edges, Scm *scms,
                             const objective obj, Class_b_reg_move &best_move,
                             float &best_gain)
{
  List_ar<Operator> operlist;
  List_iterator<Storage_unit *> storscan1, storscan2;
  List_iterator<NODEPTR> nodescan;
  List_iterator<EDGEPTR> edgescan;
  STORPTR su1, su2;
  NODEPTR node;
  EDGEPTR edge;
  Boolean check, found;
  Class_b_reg_move cur_b_move;
  float cur_gain;

  assert(lib && dp);

  found = F;
  FOR_EACH_LISTNODE(dp->get_storage_units(), storscan1) {
    su1 = (STORPTR) storscan1.get_item();
    assert(su1);
    check = F;
    FOR_EACH_LISTNODE(((STORPTR)su1)->get_variables(), edgescan) {
      edge = edgescan.get_item();
      assert(edge);
      if(!visited_edges[edge->get_address()]) {
        check = T;
      }
    }
    if(check == F) {
      continue; /*WATCH OUT - CONTINUE WITH THE OUTER FOR LOOP*/
    }
    FOR_EACH_LISTNODE_REVERSE(dp->get_storage_units(), storscan2) {
      su2 = (STORPTR)storscan2.get_item();
      assert(su2);

      if(su1 == su2) {
        continue; /*WATCH OUT - CONTINUE WITH THE INNER FOR LOOP*/
      }
      check = F;
      FOR_EACH_LISTNODE(((STORPTR)su2)->get_variables(), edgescan) {
        edge = edgescan.get_item();
        assert(edge);
        if(!visited_edges[edge->get_address()]) {
          check = T;
        }
      }
      if(check == F) {
        continue; /*WATCH OUT - CONTINUE WITH THE INNER FOR LOOP*/
      }
      cur_b_move.splitting = F;
      cur_b_move.su1 = su1;
      cur_b_move.su2 = su2;
      switch(obj) {
             case AREA:
                  cur_gain = compute_areagain(flowgraph, dp, lib, cur_b_move);
                  break;
              case POWER:
                  cur_gain = iScheduler::compute_scgain(dp, cur_b_move, scms, lib);
                  break;
             default:
                  assert_force(0);
                  break;
      }
      if(cur_gain > best_gain) {
            if(reschedule(flowgraph, lib, dp, cur_dfg_info, cur_b_move, F)) {
                best_gain = cur_gain;
                best_move.copy(cur_b_move);
                found = T;
             }
      }//end if cur_gain>best_gain
    } /*END FOR EACH STORAGE UNIT*/
  } /*END FOR EACH STORAGE UNIT*/
  return(found);
}

