/********************************************************************/
/*         FILE: sched_resched.C                                    */
/********************************************************************/
#include "scheduler.h"
#include "stack.h"

/********************************************************************/
/*This routine takes a Dfg, library, Datapath, and a move of class A on
 *the Datapath. It attempts to reschedule the DFG so that the move does
 *NOT cause a violation of the sample period constraint. If a re-scheduling
 *is found, it returns T, otherwise, F is returned. If a reschedule is found,
 *the birth and death times of variables and operations in the DFG are
 *changed to reflect the new schedule.
 *
 *Currently assumes that (i) No CONDITIONALS are present, and (ii) the list
 *of operations mapped to each functional unit
 *is sorted in increasing order of lifetimes (this is asserted), and that
 *(iii)register hardware sharing constraints can be ignored (to ensure
 *that register constraints dont come in the way of good moves).
 */
Boolean Scheduler::reschedule(Dfg &flowgraph, library *lib, Datapath *dp,
			      Schalloc_info &info, Class_a_move &move, Boolean change_dfg)
{
  FUPTR fu;
  NODEPTR node;
  libelement *libel1, *libel2;
  float cur_clock;
  int maxcstep;
  List_iterator<NODEPTR> nodescan;
  static Array<PRECON> node_precedence_constraints;
  static Array<PRECON> edge_precedence_constraints;

  assert(lib && dp);
  assert(move.old_libelement && move.new_libelement);

  libel1 = move.old_libelement;
  libel2 = move.new_libelement;
  cur_clock = (int) (dp->get_sample_period()/(float)dp->get_csteps());

  //if a fu is changed to a faster type, a re-scheduling is guaranteed
  if(!change_dfg && libel2->get_total_csteps(dp->get_vdd(), cur_clock) <=
     libel1->get_total_csteps(dp->get_vdd(), cur_clock) ) {
    return T;
  }

  fu = move.fu;
  assert(fu && fu->get_libelement() == move.old_libelement);

  //generate hardware sharing precedence constraints for rescheduling
  generate_precedence_constraints(flowgraph, dp, info, move, node_precedence_constraints, edge_precedence_constraints);

  //change the moduletype of each operation performed by the fu to the new type
  FOR_EACH_LISTNODE(fu->get_operations(), nodescan) {
    node = nodescan.get_item();
    assert(node);
    assert(info.moduletypes[node->get_address()] == move.old_libelement->get_address());
    info.moduletypes[node->get_address()] = move.new_libelement->get_address();
  }

  //perform an asap schedule on the Dfg subject to the hardware sharing precedence
  //constraints. If the sample period is exceeded, return F. Otherwise, return T.
  maxcstep = precedence_constrained_schedule(flowgraph, lib, dp->get_vdd(), cur_clock,
					     info, change_dfg, node_precedence_constraints,
					     edge_precedence_constraints);

  //when we are changing the DFG, we better be SURE that after the move
  //the sample period is satisfied
  assert_force(!change_dfg || maxcstep*((int)cur_clock) <= dp->get_sample_period());

  //if NOT changing the DFG, restore the old module type
  if(!change_dfg) {
    FOR_EACH_LISTNODE(fu->get_operations(), nodescan) {
      node = nodescan.get_item();
      assert(node);
      assert(info.moduletypes[node->get_address()] == move.new_libelement->get_address());
      info.moduletypes[node->get_address()] = move.old_libelement->get_address();
    }
  }

  //return T if successful, F otherwise
  if(maxcstep*cur_clock <= dp->get_sample_period()) {
    return T;
  } else {
    return F;
  }
}
/********************************************************************/
/*This routine takes a Dfg, library, Datapath, and a move of class B on
 *the Datapath. It attempts to reschedule the DFG so that the move does
 *NOT cause a violation of the sample period constraint. If a re-scheduling
 *is found, it returns T, otherwise, F is returned. If a reschedule is found,
 *the birth and death times of variables and operations in the DFG are
 *changed to reflect the new schedule.
 *
 *Currently assumes that (i) No CONDITIONALS are present, and (ii) the list
 *of operations mapped to each functional unit
 *is sorted in increasing order of lifetimes (this is asserted), and that
 *(iii)register hardware sharing constraints can be ignored (to ensure
 *that register constraints dont come in the way of good moves).
 */
Boolean Scheduler::reschedule(Dfg &flowgraph, library *lib, Datapath *dp,
			      Schalloc_info &info, Class_b_fu_move &move, Boolean change_dfg)
{
  float cur_clock;
  int maxcstep;
  List_iterator<NODEPTR> nodescan;
  static Array<PRECON> node_precedence_constraints;
  static Array<PRECON> edge_precedence_constraints;

  assert(lib && dp);
  assert(move.fu1);
  assert( (move.splitting == T && !move.fu2 && !move.split_operations.is_empty()) ||
	 (move.splitting == F && move.fu2 && move.split_operations.is_empty()) );

  cur_clock = (int) (dp->get_sample_period()/(float)dp->get_csteps());
  //A hardware splitting move requires no change in the schedule
  //Hence, if not required to update the dfg, we can return
  if(!change_dfg && move.splitting == T) {
    return T;
  }
  

  //generate hardware sharing precedence constraints for rescheduling
  generate_precedence_constraints(flowgraph, dp, info, move, node_precedence_constraints,
				                            edge_precedence_constraints);
  //perform precedence-constrained scheduling on the DFG
      
  maxcstep = precedence_constrained_schedule(flowgraph, lib, dp->get_vdd(), cur_clock,
					     info, change_dfg, node_precedence_constraints,
					     edge_precedence_constraints);


  //when we are changing the DFG, we better be SURE that after the move
  //the sample period is satisfied
  assert_force(!change_dfg || maxcstep*((int)cur_clock) <= dp->get_sample_period());

  //return T if successful, F otherwise
  if(maxcstep*cur_clock <= dp->get_sample_period()) {
    return T;
  } else {
    return F;
  }
}
/********************************************************************/
/*This routine takes a Dfg, library, Datapath, and a register move of class B on
 *the Datapath. It attempts to reschedule the DFG so that the move does
 *NOT cause a violation of the sample period constraint. If a re-scheduling
 *is found, it returns T, otherwise, F is returned. If a reschedule is found,
 *the birth and death times of variables and operations in the DFG are
 *changed to reflect the new schedule.
 *
 *Currently assumes that (i) No CONDITIONALS are present, and (ii) the list
 *of operations mapped to each functional unit
 *is sorted in increasing order of lifetimes (this is asserted), and that
 */
Boolean Scheduler::reschedule(Dfg &flowgraph, library *lib, Datapath *dp,
			      Schalloc_info &info, Class_b_reg_move &move, Boolean change_dfg)
{
  float cur_clock;
  int maxcstep;
  List_iterator<NODEPTR> nodescan;
  static Array<PRECON> node_precedence_constraints;
  static Array<PRECON> edge_precedence_constraints;

  assert(lib && dp);
  assert(move.su1);
  

  cur_clock = (int) (dp->get_sample_period()/(float)dp->get_csteps());
  //A hardware splitting move requires no change in the schedule
  //Hence, if not required to update the dfg, we can return
  if(move.splitting && !change_dfg) {
    return T;
  }

  //check for validity of the b_reg_move 
  //two registers cannot be merged if one register contains an
  //output which outlives a primary output of the or 
  //a variable whose birth precedes a primary input in the other
  //register
  if (!valid_register_merge(move, info)) {
    return F;
  }

  //generate hardware sharing precedence constraints for rescheduling
  generate_precedence_constraints(flowgraph, dp, info, move, node_precedence_constraints,
				                            edge_precedence_constraints);
  //General comment :
  //Everything in this function is exactly the same as inthe case of rescheduling
  //for class B fu moves
  //
  //perform precedence-constrained scheduling on the DFG
  maxcstep = precedence_constrained_schedule(flowgraph, lib, dp->get_vdd(), cur_clock,
					     info, change_dfg, node_precedence_constraints,
					     edge_precedence_constraints);
  
  //when we are changing the DFG, we better be SURE that after the move
  //the sample period is satisfied
  assert_force(!change_dfg || maxcstep*((int)cur_clock) <= dp->get_sample_period());
  
  //return T if successful, F otherwise
  if(maxcstep*cur_clock <= dp->get_sample_period()) {
    return T;
  } else {
    return F;
  }
}
/********************************************************************/
/*Precedence-Constrained ASAP scheduling that determines, given a set of
 *precedence constraints for operations (imposed by hardware sharing), a
 *minimum-cycle schedule such that all precedence constraints for operations
 *are satisfied. The above problem is indeed polynomial time solvable due to
 *the following simplifications:
 *(i) The ORDER of execution of operations for each functional unit is known.
 *    While this is inherently true for class A moves, in the case of class B
 *    moves, we have to determine a good order of execution a-prori.
 *(i) Acyclicity and no conditionals (actually, the problem remains polynomial-time
 *    solvable even when there are cycles, but no conditionals).
 *(ii) Register-sharing precedence constraints for variables are not considered.
 *     The second simplification often helps find good moves that would not have
 *     been feasible if the simplification was not made (however, not assuming
 *     this simplification still leaves the problem poly-time solvable.
 */
int Scheduler::precedence_constrained_schedule(Dfg &flowgraph, library *lib, float vdd,
			     float clk, Schalloc_info &info, Boolean change_dfg,
			     Array<PRECON> &node_precedence_constraints,
			     Array<PRECON> &edge_precedence_constraints)
{
  register int level, index, num_pi_fanins;
  int maxcycle, candidate_death, maxdeath;
  NODEPTR node, innode, outnode;
  register EDGEPTR edge, outedge;
  register libelement *libel;
  node_iterator nodescan;
  edge_iterator edgescan;
  static Array<int> nodebirths, nodedeaths, edgebirths, edgedeaths, indegrees;
  Stack<NODEPTR> dfsstack(flowgraph.numnodes());
  
  assert(lib);
  assert(vdd > 0.0 && clk > 0.0);
  assert(flowgraph.levellists.get_size() == flowgraph.numnodes() &&
	 flowgraph.maxlevel < flowgraph.levellists.get_size());
  
  //create and copy the Arrays that store the scheduling information
  nodebirths.resize(flowgraph.numnodes());
  nodedeaths.resize(flowgraph.numnodes());
  edgebirths.resize(flowgraph.numedges());
  edgedeaths.resize(flowgraph.numedges());
  indegrees.resize(flowgraph.numnodes());

  nodebirths.reset(-1);
  nodedeaths.reset(-1);
  edgebirths.reset(-1);
  edgedeaths.reset(-1);

  //Start off with level 0 nodes that have no precedence constraints
  FOR_EACH_LISTNODE(flowgraph.levellists[0], nodescan) {
    node = nodescan.get_item();
    assert(node);
    index = node->get_address();
    if(node_precedence_constraints[index].predecessors.is_empty() &&
       edge_precedence_constraints[index].predecessors.is_empty()) {
      indegrees[index] = 0;
      precedence_constrained_schedule_process_node(info, nodebirths, nodedeaths,
						   lib, vdd, clk, node,
						   node_precedence_constraints,
						   edge_precedence_constraints);
      dfsstack.push(node);
    } else {
      assert(node_precedence_constraints[index].predecessors.get_size() <= 1);
      indegrees[node->get_address()] = node_precedence_constraints[index].predecessors.get_size() +
	                               edge_precedence_constraints[index].predecessors.get_size();
    }
  }

  //set up the indegrees array for the remaining nodes
  for(level = 1; level <= flowgraph.maxlevel; level++) {
    FOR_EACH_LISTNODE(flowgraph.levellists[level], nodescan) {
      node = nodescan.get_item();
      assert(node);
      index = node->get_address();
      num_pi_fanins = 0;
      FOR_EACH_FANIN_EDGE(node, edgescan) {
	edge = edgescan.get_item();
	assert(edge);
	if(edge->is_dfginput()) {
	  num_pi_fanins++;
	}
      }
      indegrees[index] = node->number_input_edges() - num_pi_fanins;
      assert(node_precedence_constraints[index].predecessors.get_size() <= 1);
      indegrees[index] += ( node_precedence_constraints[index].predecessors.get_size() +
	                  edge_precedence_constraints[index].predecessors.get_size() );
      assert(indegrees[index] > 0);
    }
  }

  while(dfsstack.is_empty() == F) {
    node = dfsstack.pop();
    assert(nodebirths[node->get_address()] >= 0 &&
	   nodedeaths[node->get_address()] > nodebirths[node->get_address()]);
    outedge = node->get_output_edge();
    assert(outedge);
    FOR_EACH_FANOUT_NODE(outedge, nodescan) {
      outnode = nodescan.get_item();
      assert(outnode);
      index = outnode->get_address();
      assert(nodebirths[index] == -1 && nodedeaths[index] == -1);
      indegrees[index]--;

      if(indegrees[index] == 0) { //outnode is ready to schedule
	precedence_constrained_schedule_process_node(info, nodebirths, nodedeaths,
						     lib, vdd, clk, outnode,
						     node_precedence_constraints,
						     edge_precedence_constraints);
	dfsstack.push(outnode);
      }
    }


    /*Process the successor of this node in the node precedence constraints list*/
    assert(node_precedence_constraints[node->get_address()].successors.get_size() <= 1);
    FOR_EACH_LISTNODE(node_precedence_constraints[node->get_address()].successors, nodescan) {
      outnode = nodescan.get_item();
      index = outnode->get_address();
      assert(node_precedence_constraints[index].predecessors.get_size() == 1 &&
	     node_precedence_constraints[index].predecessors[0] == node);
      assert(nodebirths[index] == -1 && nodedeaths[index] == -1);
      indegrees[index]--;

      if(indegrees[index] == 0) { //outnode is ready to schedule
	precedence_constrained_schedule_process_node(info, nodebirths, nodedeaths,
						     lib, vdd, clk, outnode,
						     node_precedence_constraints,
						     edge_precedence_constraints);
	dfsstack.push(outnode);
      }
    }

    /*Process the successor of this node in the edge precedence constraints list*/
    FOR_EACH_LISTNODE(edge_precedence_constraints[node->get_address()].successors, nodescan) {
      outnode = nodescan.get_item();
      index = outnode->get_address();
      assert(edge_precedence_constraints[index].predecessors.find(node));
      assert(nodebirths[index] == -1 && nodedeaths[index] == -1);
      indegrees[index]--;

      if(indegrees[index] == 0) { //outnode is ready to schedule
	precedence_constrained_schedule_process_node(info, nodebirths, nodedeaths,
						     lib, vdd, clk, outnode,
						     node_precedence_constraints,
						     edge_precedence_constraints);
	dfsstack.push(outnode);
      }
    }

  } /*END WHILE STACK NOT EMPTY*/
  
  /*assign lifetimes to the edges in the DFG*/
  FOR_EACH_EDGEINDEX(flowgraph,index) {
    edge = flowgraph.get_nthedge(index);
    assert(edgebirths[index] == -1 && edgedeaths[index] == -1);
//03282002 temp
    FOR_EACH_FANOUT_NODE(edge, nodescan) {
	    node = nodescan.get_item();
	    assert(nodebirths[node->get_address()]>=0);
    }
////////////////////////////    
    compute_edge_lifetime(edge, flowgraph, info, nodebirths, nodedeaths, edgebirths,
			  edgedeaths, lib, vdd, clk);
  }

  maxcycle = 0;
  FOR_EACH_EDGEINDEX(flowgraph, index) {
    edge = flowgraph.get_nthedge(index);
    assert(edge);
    maxcycle = MAX(maxcycle, edgedeaths[index]);
  }

  if(change_dfg) {
    info.nodebirths = nodebirths;
    info.nodedeaths = nodedeaths;
    info.edgebirths = edgebirths;
    info.edgedeaths = edgedeaths;
  }

  return(maxcycle);
}
/********************************************************************/
/*Routine to process a node that is ready to schedule during precedence
 *constrained scheduling. This routine is responsible for calculating
 *the birth and death of the given node and setting the appropriate
 *entries in the nodebirths and nodedeaths arrays.
 */
void Scheduler::precedence_constrained_schedule_process_node(Schalloc_info &info,
				    Array<int> &nodebirths, Array<int> &nodedeaths,
				    library *lib, float vdd, float clk, NODEPTR node,
				    Array<PRECON> &node_precedence_constraints,
				    Array<PRECON> &edge_precedence_constraints)
{
  int maxdeath, candidate_death, index, node_lifetime;
  register NODEPTR innode;
  register EDGEPTR edge;
  edge_iterator edgescan;
  node_iterator nodescan;
  LIBELPTR libel;

  assert(node && lib);

  index = node->get_address();
  libel = lib->get_nthelement(info.moduletypes[index]);
  assert(libel);
  node_lifetime = libel->get_total_csteps(vdd, clk);
  assert(node_lifetime > 0);

  maxdeath = 0;
  FOR_EACH_FANIN_EDGE(node, edgescan) {
    edge = edgescan.get_item();
    innode = edge->input_node();
    if(innode) {
      assert(nodebirths[innode->get_address()] >= 0 &&
	     nodedeaths[innode->get_address()] > nodebirths[innode->get_address()]);
      maxdeath = MAX(maxdeath, nodedeaths[innode->get_address()]);
    }
  }

  //NOTE: If there is a node_precedence_constraint n1 --> n2,
  //it is NOT necessary for birth(n2) to be after death(n1)
  //in the case when the FU that performs n1 is pipelined.
  //The condition to be satisfied is:
  //      birth(n2) >= birth(n1) + stage_csteps(n1)
  if(!node_precedence_constraints[index].predecessors.is_empty()) {
    assert(node_precedence_constraints[index].predecessors.get_size() == 1);
    innode = node_precedence_constraints[index].predecessors[0];
    assert(nodebirths[innode->get_address()] >= 0 &&
	   nodedeaths[innode->get_address()] > nodebirths[innode->get_address()]);
    libel = lib->get_nthelement(info.moduletypes[innode->get_address()]);
    assert(libel);
    candidate_death = nodebirths[innode->get_address()] + libel->get_stage_csteps(vdd,clk);
    maxdeath = MAX(maxdeath, candidate_death);
  }

  //Satisfy edge precedence constraints
  //NOTE: If there is an edge precedence constraint between n1 --> n2,
  //it means that death(n2) >= birth(n1) + stage_csteps(n1)
  //Since death(n2) = birth(n2) + total_csteps(n2), the above eqn. can be
  //simplified to birth(n2) >= birth(n1) + stage_csteps(n1) - total_csteps(n2);
  FOR_EACH_LISTNODE(edge_precedence_constraints[index].predecessors, nodescan) {
    innode = nodescan.get_item();
    assert(edge_precedence_constraints[innode->get_address()].successors.find(node));
    libel = lib->get_nthelement(info.moduletypes[innode->get_address()]);
    assert(libel);
    candidate_death = nodebirths[innode->get_address()] + libel->get_stage_csteps(vdd,clk);
    candidate_death -= node_lifetime;
    maxdeath = MAX(maxdeath, candidate_death);
  }
//03282002 temp
  assert(maxdeath>=0);
  nodebirths[index] = maxdeath;
  /*The death cycle of node is computed as its birth plus the number of cycles
   *taken for it to execute.
   */
  nodedeaths[index] = nodebirths[index] + node_lifetime;

  return;
}
/********************************************************************/
/*This routine is identical to the routine with the same name in file
 *sched_initial.C, except that here all lifetime info is read from /
 *written into arrays that are given as arguments.
 */
void Scheduler::compute_edge_lifetime(EDGEPTR edge, Dfg &flowgraph, Schalloc_info &info,
				      Array<int> &nodebirths, Array<int> &nodedeaths,
				      Array<int> &edgebirths, Array<int> &edgedeaths,
				      library *lib, float vdd, float clk)
{
  NODEPTR node;
  int maxcycle, candidate_death, index;
  List_iterator<NODEPTR> nodescan;
  LIBELPTR libel;

  assert(edge);
  index = edge->get_address();
  assert(flowgraph.get_nthedge(index) == edge);
  assert(lib && vdd > 0.0 && clk > 0.0);

  /*All primary inputs and loopins are born at cycle 0.
   *Other edges are born at the death cycle of the
   *nodes that feed them.
   */
  if(edge->is_dfginput()) {
    assert(edge->number_source_nodes() == 0);
    edgebirths[index] = 0;
  } else {
    assert(edge->number_source_nodes() == 1);
    node = edge->input_node();
    edgebirths[index] = nodedeaths[node->get_address()];
  }

  /*For edges that do not feed any nodes, the death cycle is simply the
   *birth cycle + 1. For other edges, the death cycle the max. of the latest
   *cycle till which the edge needs to be alive for each node it feeds
   */
  if(edge->number_sink_nodes() == 0) {
    assert(edge->is_dfgoutput());
    edgedeaths[index] =  edgebirths[index]+1;
  } else {
    maxcycle = edgebirths[index];
    FOR_EACH_FANOUT_NODE(edge, nodescan) {
      node = nodescan.get_item();
      //03282002 temp lzhong
      assert(nodebirths[node->get_address()] >= 0);
      if(nodebirths[node->get_address()]<0) {
		cout<<"Problem Node Name: "<<node->get_name();
		cout<<" death "<<node->get_death()<<" Birth "<<node->get_birth()<<endl;
		cout<<" input edge name : "<<edge->get_name();
		cout<<" death "<<edge->get_death()<<" Birth "<<edge->get_birth()<<endl;
      }
      assert(nodedeaths[node->get_address()] > nodebirths[node->get_address()]);
      assert(nodebirths[node->get_address()] >= edgebirths[index]);
      libel = lib->get_nthelement(info.moduletypes[node->get_address()]);
      assert(libel);
      /*If the module is not pipelined, i.e, has the same latency and delay,
       *its edge needs to be alive until the cycle of its death. Otherwise,
       *i.e., if the module is pipelined, the modules latency is used to
       *calculate the cycle till which its input edge must be alive
       */
      candidate_death = nodebirths[node->get_address()] + libel->get_stage_csteps(vdd,clk);
      maxcycle = MAX(maxcycle, candidate_death);
    }
    /*assertion not valid for the case of an edge between
     *CHAINED OPERATIONS.
     */
    assert(maxcycle > edgebirths[index]);
    edgedeaths[index] = maxcycle;
  }

  return;
}
/********************************************************************/
//This routine generates hardware sharing precedence constraints for
//rescheduling
//  (i) Generate precedence constraints between CONSECUTIVE operations in
//      each fus sorted list of operations. For example, if the list is
//      op1, op2, op3 precedence constraints are made for op1->op2 and
//      op2->op3 in the node_precedence_constraints array.
//  (ii) Generate precedence constraints for CONSECUTIVE variables in each
//       storage units sorted list of variables. For a pair of variables
//       (e1, e2), a precedence constraint is added from each fanout node
//       of e1 to the fanin node of e2 in the edge_precedence_constraints
//       array.
//NOTE that the entries in the node_precedence_constraints and
//edge_precedence_constraints arrays have different effects during rescheduling.
void Scheduler::generate_precedence_constraints(Dfg &flowgraph, Datapath *dp,
						Schalloc_info &info, Class_a_move &move,
						Array<PRECON> &node_precedence_constraints,
						Array<PRECON> &edge_precedence_constraints)
{
  Boolean first;
  int i;
  register NODEPTR node, node1;
  List_iterator<NODEPTR> nodescan;
  List_iterator<FUPTR> fuscan;
  List_iterator<Storage_unit *> suscan;
  STORPTR su;
  FUPTR fu;

  assert(dp);
  assert(move.fu && move.old_libelement && move.new_libelement);

  node_precedence_constraints.resize(flowgraph.numnodes());
  edge_precedence_constraints.resize(flowgraph.numnodes());

  for(i = 0; i < flowgraph.numnodes(); i++) {
    node_precedence_constraints[i].clear();
  }
  for(i = 0; i < node_precedence_constraints.get_size(); i++) {
    edge_precedence_constraints[i].clear();
  }

  //create precedence constraints for functional units
  FOR_EACH_LISTNODE(dp->get_functional_units(), fuscan) {
    fu = fuscan.get_item();
    assert(fu);

    generate_node_precedence_constraints(fu->get_operations(), info, node_precedence_constraints);
  }

  //create precedence constraints for registers
  FOR_EACH_LISTNODE(dp->get_storage_units(), suscan) {
    su = (STORPTR) suscan.get_item();
    assert(su);
    generate_edge_precedence_constraints(su->get_variables(), info, edge_precedence_constraints);
  }

  return;
}
/********************************************************************/
//Generate hardware sharing precedence constraints for rescheduling.
//We generate precedence constraints between CONSECUTIVE operations in each
//fus sorted list of operations. This routine ASSUMES the absence of conditionals.
//
//IMPORTANT NOTE: The operations involved in the current move are sorted in
//  increasing order of the CURRENT BIRTH TIMES (ties resolved arbitrarily) and the
//  resultant order is used in generating the precedence constraints
//  This ENSURES that the precedence_constraints generated CAN NEVER CONFLICT
//  with the data dependency constraints.
void Scheduler::generate_precedence_constraints(Dfg &flowgraph, Datapath *dp,
						Schalloc_info &info, Class_b_fu_move &move,
						Array<PRECON> &node_precedence_constraints,
						Array<PRECON> &edge_precedence_constraints)
{
  Boolean first;
  int i;
  NODEPTR node, node1, node2;
  LIBELPTR libel;
  float cur_clock;
  int stage_csteps;
  List_iterator<NODEPTR> nodescan, node1scan, node2scan;
  List_iterator<FUPTR> fuscan;
  List_iterator<Storage_unit *> suscan;
  List_ar<NODEPTR> tmplist;
  FUPTR fu;
  STORPTR su;

  assert(dp);

  node_precedence_constraints.resize(flowgraph.numnodes());
  edge_precedence_constraints.resize(flowgraph.numnodes());

  for(i = 0; i < flowgraph.numnodes(); i++) {
    node_precedence_constraints[i].clear();
  }
  for(i = 0; i < node_precedence_constraints.get_size(); i++) {
    edge_precedence_constraints[i].clear();
  }

  cur_clock = (int)(dp->get_sample_period()/(float)dp->get_csteps());

  //First, generate node precedence constraints for functional units not
  //involved in the move.
  FOR_EACH_LISTNODE(dp->get_functional_units(), fuscan) {
    fu = fuscan.get_item();
    assert(fu);
    if(fu == move.fu1 || fu == move.fu2) continue;

    //Need to check that the FUs operations list is indeed ordered
    generate_node_precedence_constraints(fu->get_operations(), info, node_precedence_constraints);
  }

  //Next, generate precedence constraints for functional units involved in the class B move
  if(move.splitting) {

    libel = move.fu1->get_libelement();
    assert(libel);
    stage_csteps = libel->get_stage_csteps(dp->get_vdd(), cur_clock);

    first = T;
    //generate precedence constraints for operations assigned to move.fu1 that are
    //not in move1.split_operations
    //IMPORTANT: this assumes that the operations of move.fu1 that are not in
    //move.split_operations will still be executed in the same order after the move
    FOR_EACH_LISTNODE(move.fu1->get_operations(), nodescan) {
      node1 = nodescan.get_item();
      assert(node1);

      if(move.split_operations.find(node1)) {
	continue;
      }
      if(!first) {
	assert(node != node1);
	assert_force( (info.nodebirths[node->get_address()] <
		       info.nodedeaths[node->get_address()]) &&
		      (info.nodebirths[node->get_address()] + stage_csteps <=
		       info.nodebirths[node1->get_address()]) );
	assert(node_precedence_constraints[node1->get_address()].predecessors.is_empty());
	node_precedence_constraints[node1->get_address()].predecessors.append(node);
	assert(node_precedence_constraints[node->get_address()].successors.is_empty());
	node_precedence_constraints[node->get_address()].successors.append(node1);
      } else {
	first = F;
      }
      node = node1;
    }

    //generate precedence constraints for operations in move.split_operations
    //IMPORTANT: this assumes that the split operations will be executed in the
    //order in which they appear in the list
    first = T;
    FOR_EACH_LISTNODE(move.split_operations, nodescan) {
      node1 = nodescan.get_item();
      assert(node1);

      if(!first) {
	assert(node != node1);
	assert_force( (info.nodebirths[node->get_address()] <
		       info.nodedeaths[node->get_address()]) &&
		      (info.nodebirths[node->get_address()] + stage_csteps <=
		       info.nodebirths[node1->get_address()]) );
	assert(node_precedence_constraints[node1->get_address()].predecessors.is_empty());
	node_precedence_constraints[node1->get_address()].predecessors.append(node);
	assert(node_precedence_constraints[node->get_address()].successors.is_empty());
	node_precedence_constraints[node->get_address()].successors.append(node1);
      } else {
	first = F;
      }
      node = node1;
    }
  } else {
    //order the operations of move.fu1 and move.fu2 into a single list, based on their
    //current birth times
    node1scan.start(move.fu1->get_operations());
    node2scan.start(move.fu2->get_operations());
    while(node1scan.not_done() || node2scan.not_done()) {
      if(!node1scan.not_done()) {
	assert(node2scan.not_done());
	node2 = node2scan.get_item();
	assert(node2);
	tmplist.append(node2);
	node2scan.increment();
      } else if(!node2scan.not_done()) {
	assert(node1scan.not_done());
	node1 = node1scan.get_item();
	assert(node1);
	tmplist.append(node1);
	node1scan.increment();
      } else {
	node1 = node1scan.get_item();
	node2 = node2scan.get_item();

	if(info.nodebirths[node1->get_address()] < info.nodebirths[node2->get_address()]) {
	  tmplist.append(node1);
	  node1scan.increment();
	} else if (info.nodebirths[node2->get_address()] < info.nodebirths[node1->get_address()]) {
	  tmplist.append(node2);
	  node2scan.increment();
	} else { //both birth cycles are equal - arbitratily add node1 and then node2
	  tmplist.append(node1);
	  tmplist.append(node2);
	  node1scan.increment();
	  node2scan.increment();
	}
      }
    }

    //create the precedence constraints using the single list
    assert_force(tmplist.get_size() == move.fu1->get_operations().get_size() + move.fu2->get_operations().get_size() );

    generate_node_precedence_constraints(tmplist, info, node_precedence_constraints);
  }

  //generate edge precedence constraints for the registers
  //create precedence constraints for registers
  FOR_EACH_LISTNODE(dp->get_storage_units(), suscan) {
    su = (STORPTR) suscan.get_item();
    assert(su);
    generate_edge_precedence_constraints(su->get_variables(), info, edge_precedence_constraints);
  }

  return;
}
/********************************************************************/
//Generate hardware sharing precedence constraints for rescheduling.
//We generate precedence constraints between CONSECUTIVE operations in each
//fus sorted list of operations. This routine ASSUMES the absence of conditionals.
//
//IMPORTANT NOTE: The variables involved in the current move are sorted in
//  increasing order of the CURRENT BIRTH TIMES (ties resolved arbitrarily) and the
//  resultant order is used in generating the precedence constraints
//  This ENSURES that the precedence_constraints generated CAN NEVER CONFLICT
//  with the data dependency constraints.
void Scheduler::generate_precedence_constraints(Dfg &flowgraph, Datapath *dp,
						Schalloc_info &info, Class_b_reg_move &move,
						Array<PRECON> &node_precedence_constraints,
						Array<PRECON> &edge_precedence_constraints)
{
  Boolean first;
  int i;
  NODEPTR node, node1, node2;
  EDGEPTR edge, edge1, edge2;
  LIBELPTR libel;
  float cur_clock;
  int stage_csteps;
  List_iterator<NODEPTR> nodescan;
  List_iterator<EDGEPTR> edgescan, edge1scan, edge2scan;
  List_iterator<FUPTR> fuscan;
  List_iterator<Storage_unit *> suscan;
  List_ar<EDGEPTR> tmplist;
  FUPTR fu;
  STORPTR su;

  assert(dp);


  node_precedence_constraints.resize(flowgraph.numnodes());
  edge_precedence_constraints.resize(flowgraph.numnodes());

  for(i = 0; i < flowgraph.numnodes(); i++) {
    node_precedence_constraints[i].clear();
  }
  for(i = 0; i < node_precedence_constraints.get_size(); i++) {
    edge_precedence_constraints[i].clear();
  }

  cur_clock = (int)(dp->get_sample_period()/(float)dp->get_csteps());

  //First, generate node precedence constraints for all functional units.
  FOR_EACH_LISTNODE(dp->get_functional_units(), fuscan) {
    fu = fuscan.get_item();
    assert(fu);
    //Need to check that the FUs operations list is indeed ordered
    generate_node_precedence_constraints(fu->get_operations(), info, node_precedence_constraints);
  }

  //Generate edge precedence constraints for all storage units NOT involved in the
  //move
  FOR_EACH_LISTNODE(dp->get_storage_units(), suscan) {
    su = (Register *)suscan.get_item();
    assert(su);
    if ((move.su1 == su)||(move.su2 == su))
      continue;
    //Need to check that the variables list is indeed ordered  
    generate_edge_precedence_constraints(((Register *)su)->get_variables(), info, edge_precedence_constraints);
  }
   
  //order the operations of move.su1 and move.su2 into a single list, based on their
  //current birth times
  if(move.splitting) {
    assert_force(0);
  } else {
    edge1scan.start(((Register *)(move.su1))->get_variables());
    edge2scan.start(((Register *)(move.su2))->get_variables());
    while(edge1scan.not_done() || edge2scan.not_done()) {
      if(!edge1scan.not_done()) {
	assert(edge2scan.not_done());
	edge2 = edge2scan.get_item();
	assert(edge2);
	tmplist.append(edge2);
	edge2scan.increment();
      } else if(!edge2scan.not_done()) {
	assert(edge1scan.not_done());
	edge1 = edge1scan.get_item();
	assert(edge1);
	tmplist.append(edge1);
	edge1scan.increment();
      } else {
	edge1 = edge1scan.get_item();
	edge2 = edge2scan.get_item();
	if(info.edgebirths[edge1->get_address()] < info.edgebirths[edge2->get_address()]) {
	  tmplist.append(edge1);
	  edge1scan.increment();
	} else if (info.edgebirths[edge1->get_address()] > info.edgebirths[edge2->get_address()]) {
	  tmplist.append(edge2);
	  edge2scan.increment();
	} else { 
	  tmplist.append(edge1);
	  tmplist.append(edge2);
	  edge1scan.increment();
	  edge2scan.increment();
	}
      }
    }

    //create the precedence constraints using the single list
    assert_force(tmplist.get_size() == (move.su1->get_variables().get_size() + move.su2->get_variables().get_size()));
  
    generate_edge_precedence_constraints(tmplist, info, edge_precedence_constraints);
  }

  return;
}
/********************************************************************/
//Given an ordered list of nodes and a node precedence constraints array,
//this routine generates precedence constraints between each consecutive
//pair of nodes in the given list.
void Scheduler::generate_node_precedence_constraints(const List_ar<NODEPTR> &nodelist,
					     Schalloc_info &info,
					     Array<PRECON> &node_precedence_constraints)
{
  NODEPTR node, node1;
  Boolean first;
  List_iterator<NODEPTR> nodescan;

  assert(!nodelist.is_empty());

  first = T;
  node = NULL;
  FOR_EACH_LISTNODE(nodelist, nodescan) {
    node1 = nodescan.get_item();
    assert(node1);
    if(!first) {
      assert(node != node1);
      assert_force(info.nodebirths[node->get_address()] < info.nodedeaths[node->get_address()] &&
	info.nodebirths[node->get_address()]  <= info.nodebirths[node1->get_address()]);

      assert(node_precedence_constraints[node1->get_address()].predecessors.is_empty());
      node_precedence_constraints[node1->get_address()].add_predecessor(node);
      assert(node_precedence_constraints[node->get_address()].successors.is_empty());
      node_precedence_constraints[node->get_address()].add_successor(node1);
    } else {
      first = F;
    }
    node = node1;
  }

  return;
}
/********************************************************************/
void Scheduler::generate_edge_precedence_constraints(const List_ar<EDGEPTR> &edgelist,
						Schalloc_info &info,
						Array<PRECON> &edge_precedence_constraints)
{
  register NODEPTR node, node1;
  register EDGEPTR edge, edge1;
  Boolean first;
  List_iterator<EDGEPTR> edgescan;
  List_iterator<NODEPTR> nodescan;

  first = T;
  edge = NULL;
  FOR_EACH_LISTNODE(edgelist, edgescan) {
    edge1 = edgescan.get_item();
    assert(edge1);
    if(!first) {
      assert(edge != edge1);
      //IMPORTANT NOTE: For now, we cannot have any predecessors to a PI/LOOPIN/CONSTANT,
      //or any successors of a PO/LOOPOUT
      assert(!edge->is_dfgoutput());
      assert(!edge1->is_dfginput());
      assert_force(info.edgebirths[edge->get_address()] < info.edgedeaths[edge->get_address()] &&
		   info.edgebirths[edge->get_address()] <= info.edgebirths[edge1->get_address()]);

      node1 = edge1->input_node();
      assert(node1);

      FOR_EACH_FANOUT_NODE(edge, nodescan) {
	node = nodescan.get_item();
	assert(node);
	//Check for the special case of two edges e1 and e2, s.t. e1 feeds a node n,
	//and e2 is driven by the SAME node n - in this case, there is NO NEED to add
	//any precedence constraint
	if(node != node1) {
	  edge_precedence_constraints[node1->get_address()].add_predecessor(node);
	  edge_precedence_constraints[node->get_address()].add_successor(node1);
	}
      }
    } else {
      first = F;
    }
    edge = edge1;
  }

  return;
}
/********************************************************************/
//Checks the condition that when two registers are merged, the primary 
//inputs occur together at the beginnning of the list and the POs at the
//end, where the order is determined by the birth-time.
Boolean Scheduler::valid_register_merge(Class_b_reg_move &move, Schalloc_info &info)
{
  List_iterator<EDGEPTR> edgescan, edge1scan, edge2scan;
  EDGEPTR edge, edge1, edge2;
  List_iterator<NODEPTR> nodescan;
  NODEPTR node;
  int minPObirth1, maxPIbirth1, minsu1, minsu2;
  minPObirth1 = minsu1 = minsu2 = HUGEINT;
  maxPIbirth1 = -1;
  
  if(move.splitting) {
    return T;
  }
  FOR_EACH_LISTNODE(move.su1->get_variables(), edgescan) {
    edge = edgescan.get_item();
    assert(edge);
    if (edge->is_dfgoutput() && info.edgebirths[edge->get_address()] < minPObirth1) {
      minPObirth1 = edge->get_birth();
    }
    if (info.edgebirths[edge->get_address()] < minsu1)
      minsu1 = info.edgebirths[edge->get_address()];
    if (edge->is_dfginput() && info.edgebirths[edge->get_address()] > maxPIbirth1) {
      maxPIbirth1 = edge->get_birth();
    }
  }
  FOR_EACH_LISTNODE(move.su2->get_variables(), edgescan) {
    edge = edgescan.get_item();
    assert(edge);
    if (edge->is_dfgoutput() && info.edgebirths[edge->get_address()] < minPObirth1) {
      minPObirth1 = edge->get_birth();
    } 
    if (info.edgebirths[edge->get_address()] < minsu2)
      minsu2 = info.edgebirths[edge->get_address()];
    if (edge->is_dfginput() && info.edgebirths[edge->get_address()] > maxPIbirth1) {
      maxPIbirth1 = edge->get_birth();
    }
  }

  FOR_EACH_LISTNODE(move.su1->get_variables(), edgescan) {
    edge = edgescan.get_item();
    assert(edge);
    if ( (info.edgebirths[edge->get_address()] <= maxPIbirth1)
	|| (info.edgebirths[edge->get_address()] >= minPObirth1) ) {
      return F;
    }
  }
  FOR_EACH_LISTNODE(move.su2->get_variables(), edgescan) {
    edge = edgescan.get_item();
    assert(edge);
    if ( (info.edgebirths[edge->get_address()] <= maxPIbirth1)
	|| (info.edgebirths[edge->get_address()] >= minPObirth1) ) {
      return F;
    }
  }
  //sharing edges with overlapping lifetimes may create cyclic precedence
  //constraints if the target operations share a functional unit 
  //Therefore, we do not allow such sharing to take place.
  
  FOR_EACH_LISTNODE(move.su1->get_variables(), edge1scan)
    {
      edge1 = edge1scan.get_item();
      FOR_EACH_LISTNODE(move.su2->get_variables(), edge2scan)
	{
	  edge2 = edge2scan.get_item();
	  if (info.edgedeaths[edge2->get_address()] < info.edgebirths[edge1->get_address()])
	    continue;
	  if (info.edgedeaths[edge1->get_address()] < info.edgebirths[edge2->get_address()]) 
	    continue;
	  return F;
	}
    }
    
    if (minsu1 < minsu2)
      {
	//The tail variable of su1 is edge1, 
	//and the head of su2 is edge2
	edge1 = move.su1->get_variables()[move.su1->get_variables().get_size()-1];
	edge2 = move.su2->get_variables()[0];
      }
    else
      {
	edge1 = move.su2->get_variables()[move.su2->get_variables().get_size()-1];
	edge2 = move.su1->get_variables()[0];
      }
    //Even sharing between independent edges may invalidate future class B fu 
    //move. They should be prevented.
    if (minsu1 < minsu2)
      {
	if (DFS_edge(edge1, edge2))
	  return T;
      }
    else
      {
	if (DFS_edge(edge2, edge1))
	  return T;
      }
    return F;
}
/********************************************************************/
Boolean Scheduler::DFS_edge(EDGEPTR e1, EDGEPTR e2)
{
  //checks if there is a path between e1 and e2
  List_iterator<NODEPTR> nodescan;
  NODEPTR node;
  List_iterator<EDGEPTR> edgescan;
  if (e1->get_address() == e2->get_address())
    return T;
  FOR_EACH_FANOUT_NODE(e1, nodescan)
    {
      node = nodescan.get_item();
      FOR_EACH_FANOUT_EDGE(node, edgescan)
	{
	  if (DFS_edge(edgescan.get_item(), e2))
	    return T;
	}
    }
    return F;
}
/********************************************************************/
void Scheduler::reg_share(Dfg& flowgraph, Datapath *cur_dp, Schalloc_info& cur_dfg_info, library *lib)
{
  Boolean can_share = T;
  List_iterator<Storage_unit *> suscan1, suscan2;
  STORPTR su1, su2;
  Class_b_reg_move move;
  //An array of boolean needed by implement_move
  //Might as well give it one and make it happy
  Array<Boolean> visited(flowgraph.numedges());
  visited.reset(F);
  //check_dfg_and_datapath(flowgraph, *cur_dp);
  while (can_share)
  {
    can_share = F;
    FOR_EACH_LISTNODE(cur_dp->get_storage_units(), suscan1)
      {
	su1 = (STORPTR) suscan1.get_item();
	FOR_EACH_LISTNODE(cur_dp->get_storage_units(), suscan2)
	  {
	    su2 = (STORPTR) suscan2.get_item();
	    if (su1 == su2)
	      continue;
	    if (can_share_registers(su1->get_variables(), su2->get_variables()))
	      {
		//Implement a class B register sharing move
		move.splitting = F;
		move.su1 = su1;
		move.su2 = su2; 
		implement_move(move, cur_dp, flowgraph, lib, cur_dfg_info, visited);
		//call the function to implement the b-move
		//check_dfg_and_datapath(flowgraph, cur_dfg_info, *cur_dp); 
		can_share = T;
		break;
	      }
	  }
	  if (can_share)
	    break;
      }
      visited.reset(F);
  }
}
/******************************************************************************/
Boolean Scheduler::can_share_registers(const List_ar<EDGEPTR> &edgelist1, const List_ar<EDGEPTR> &edgelist2)
{
  List_iterator<EDGEPTR> edge1scan, edge2scan;
  EDGEPTR edge1, edge2;
  FOR_EACH_LISTNODE(edgelist1, edge1scan)
    {
      edge1 = edge1scan.get_item();
      interval temp_int = interval(edge1->get_birth(),edge1->get_death()); 
      FOR_EACH_LISTNODE(edgelist2, edge2scan)
	{
	  edge2 = edge2scan.get_item();
	  interval temp_int1 = interval(edge2->get_birth(),edge2->get_death());   
	  if (temp_int1.intersect(temp_int))
	    return F;
	}
    }
    return T;
}
/******************************************************************************/
