/********************************************************************/
/*              FILE: sched_initial.C                               */
/********************************************************************/
#include "scheduler.h"

/********************************************************************/
/*This routine takes an unscheduled and unallocated Dfg, and performs
 *an initial map by mapping each Dfgnode (variable and operation) to
 *a dedicated functional unit, that is the fastest functional unit in
 *the library that can implement the Dfgnode.
 */
void Scheduler::fastest_map(Dfg &flowgraph, library *lib)
{
  register int i;
  register NODEPTR node;
  register libelement *libel;
  List_ar<libelement *> *elemlistptr;
  List_iterator<libelement *> elemscan;
  float best_delay;
  int best_delay_map;

  assert(lib);

  //allocate each operation to the fastest library element that can
  //implement it
  for(i = 0; i < flowgraph.numnodes(); i++) {
    node = flowgraph.get_nthnode(i);
    assert(node && node->get_address() == i);
    elemlistptr = lib->get_choices(node->get_func(), flowgraph.bitwidth);
    if(elemlistptr->get_size() == 0) {
      cerr << "ERROR: No module in library can perform function "
	<< node->get_func() << " at bitwidth " << flowgraph.bitwidth << endl;
      exit(-1);
    }
    best_delay = HUGEFLOAT;
    best_delay_map = -1;
    FOR_EACH_LISTNODE(*elemlistptr, elemscan) {
      libel = elemscan.get_item();
      assert(libel && libel->can_perform(node->get_func(),flowgraph.bitwidth));
      assert(libel->get_total_delay() > 0.0);
      if(libel->get_total_delay() < best_delay ) {
	best_delay = libel->get_total_delay();
	best_delay_map = libel->get_address();
      }
    }
    assert(best_delay > 0.0 && best_delay_map >= 0 &&
	   best_delay_map <= lib->numelements());
    node->set_moduletype(best_delay_map);
    delete elemlistptr;
  }

  return;
}
/********************************************************************/
/*This routine takes a Dfg in which each Dfgnode has been mapped to
 *a library element, performs an As-Soon-As-Possible schedule
 *on the Dfg, and annotates the Dfg nodes with information about
 *their lifetimes. The clock period and supply voltage are assumed
 *to be known. The number of clock cycles taken by an operation is
 *estimated using the formula, ceiling(rr_delay/clock period), where
 *rr_delay is the register-to-register delay for the library element
 *that the operation is mapped to. Chaining is not performed.
 *However, multicycling is performed. The routine returns the number
 *of control steps used in the schedule.
 */
int Scheduler::asap_schedule(Dfg &flowgraph, library *lib, float vdd, float clk)
{
  register int level, i;
  int maxcycle, candidate_death;
  register NODEPTR node, innode;
  register EDGEPTR edge;
  register libelement *libel;
  List_iterator<NODEPTR> node_liter;
  node_iterator nodescan;
  edge_iterator edgescan;

  assert(lib);
  assert(vdd > 0.0 && clk > 0.0);
  assert(flowgraph.maxlevel < flowgraph.levellists.get_size());
  for(level = 0; level <= flowgraph.maxlevel; level++) {
    FOR_EACH_LISTNODE(flowgraph.levellists[level], node_liter) {
      node = node_liter.get_item();
      assert(node);

      /*node's birth is computed as the max. of the deaths of its fanins*/
      maxcycle = 0;
      FOR_EACH_FANIN_EDGE(node, edgescan) {
	edge = edgescan.get_item();
	assert(edge);
	innode = edge->input_node();
	assert((edge->is_dfginput() == T && !innode) ||
	       (edge->is_dfginput() == F && innode));
	if(innode) {
	  assert(innode->get_birth() >= 0 && innode->get_death() > innode->get_birth());
	  maxcycle = MAX(maxcycle, innode->get_death());
	}
      }
      node->set_birth(maxcycle);

      /*node's death is computed as its birth plus the number of cycles
       *taken for it
       */
      libel = lib->get_nthelement(node->get_moduletype());
      assert(libel);
      /*ANAND 4/5/94*/
      //CURRENTLY IGNORES THE REGISTER, MUX/BUS, AND WIRING DELAY ESTIMATES
      node->set_death(node->get_birth() + libel->get_total_csteps(vdd,clk));
    }
  }

  /*assign lifetimes to the Dfgedges*/
  FOR_EACH_EDGEINDEX(flowgraph,i) {
    edge = flowgraph.get_nthedge(i);
    compute_edge_lifetime(edge, flowgraph, lib, vdd, clk);
  }

  maxcycle = 0;
  FOR_EACH_EDGEINDEX(flowgraph, i) {
    edge = flowgraph.get_nthedge(i);
    maxcycle = MAX(maxcycle, edge->get_death());
  }

  assert(maxcycle > 0);

  return(maxcycle);
}
/********************************************************************/
/*This routine can be used to compute the lifetime of an edge, after
 *lifetimes have been computed for ALL nodes, using the following rules:
 *
 * -->The birth cycle of an edge is equal to the death cycle of the node that
 * feeds it. For edges that are NOT fed by nodes (PI/CONSTANT/LOOPIN edges),
 * the birth cycle is ASSUMED TO BE 0.
 *
 * -->The death cycle of an edge is determined by the maximum of the constraints
 * imposed by its fanout nodes. If an edge E has a fanout node N, and N is
 * mapped to a non-pipelined functional unit, then E must be alive at least until
 * the death cycle of N. If N is mapped to a pipelined functional unit, then E
 * must be alive at least until the (birth of N) + (# of csteps taken by 1 pipeline
 * stage). For edges that do not feed any nodes (PO/LOOPOUT edges), the death
 * of the edge is ASSUMED TO BE the birth + 1.
 *
 *NOTE: This routine does not handle DFGs with chained operations.
 */
void Scheduler::compute_edge_lifetime(EDGEPTR edge, Dfg &flowgraph, library *lib,
				      float vdd, float clk)
{
  NODEPTR node;
  int maxcycle, candidate_death;
  List_iterator<NODEPTR> nodescan;
  LIBELPTR libel;

  assert(edge);
  assert(lib);
  assert(vdd > 0.0 && clk > 0.0);

  /*All primary inputs and loopins are born at cycle 0.
   *Other edges are born at the death cycle of the
   *nodes that feed them.
   */
  if(edge->is_dfginput()) {
    assert(edge->number_source_nodes() == 0);
    edge->set_birth(0);
  } else {
    assert(edge->number_source_nodes() == 1);
    node = edge->input_node();
    edge->set_birth(node->get_death());
  }

  /*For edges that do not feed any nodes, the death cycle is simply the
   *birth cycle + 1. For other edges, the death cycle the max. of the latest
   *cycle till which the edge needs to be alive for each node it feeds
   */
  if(edge->number_sink_nodes() == 0) {
	  if(!edge->is_dfgoutput())
		  cout<<"Name "<<edge->get_name()<<endl;
    assert(edge->is_dfgoutput());
    edge->set_death(edge->get_birth()+1);
  } else {
    maxcycle = edge->get_birth();
    FOR_EACH_FANOUT_NODE(edge, nodescan) {
      node = nodescan.get_item();
      assert(node->get_birth() >= 0 && node->get_death() > node->get_birth());
      assert(node->get_birth() >= edge->get_birth());
      libel = lib->get_nthelement(node->get_moduletype());
      assert(libel);
      /*If the module is not pipelined, i.e, has the same latency and delay,
       *its edge needs to be alive until the cycle of its death. Otherwise,
       *i.e., if the module is pipelined, its latency is used to
       *calculate the cycle till which its input edge must be alive
       */
      candidate_death = node->get_birth() + libel->get_stage_csteps(vdd,clk);
      maxcycle = MAX(maxcycle, candidate_death);
    }

    /*assertion not valid for the case of an edge between
     *chained operations.
     */
    assert(maxcycle > edge->get_birth());
    edge->set_death(maxcycle);
  }

  return;
}
/********************************************************************/
void Scheduler::alap_schedule(Dfg &flowgraph, library *lib, float vdd, float clk)
{
  assert(flowgraph.numnodes() && flowgraph.numedges());
  assert(lib);
  assert(vdd > 0 && vdd <= 5.0);
  assert(clk > 0);
  cerr << "ALAP schedule not implemented" << endl;
  exit(-1);
}
/********************************************************************/
/*This routine takes a Dfg on which module selection is assumed to have
 *been performed, i.e, each operation is assumed to be bound to a
 *library element type. A one-to-one allocation is performed for
 *both registers and modules to generate the Datapath. The Datapath
 *is assumed to be initially empty.
 */
void Scheduler::initial_allocation(Dfg &flowgraph,Datapath *dp,library *lib)
{
  register int i, j;
  register NODEPTR node;
  register EDGEPTR edge;
  EDGEPTR loopout_edge;
  register FUPTR fu;
  register STORPTR su;
  STORPTR loopout_su;
  List_iterator<NODEPTR> nodescan;
  List_iterator<EDGEPTR> edgescan;
  List_iterator<Functional_unit *> fuscan;
  List_iterator<Storage_unit *> suscan;
  Net *tmpnet;

  assert(dp && lib);
  /*Initially, the Datapath should be empty*/
  //assert(dp->get_fucount() == 0 && dp->get_regcount() == 0);

  /*Each node is already annotated with the module type that it should
   *be mapped to. Create a new module of the appropriate type for each
   *node and allocate the node to it
   */
  FOR_EACH_NODEINDEX(flowgraph, i) {
    node = flowgraph.get_nthnode(i);
    assert(node && node->get_functional_unit() == NULL);
    assert(node->get_moduletype() >= 0 &&
	   node->get_moduletype() < lib->numelements());
    mem_ok( fu = new FU(lib->get_nthelement(node->get_moduletype())) );
    dp->add_functional_unit(fu);
    fu->add_operation(node);
    node->set_functional_unit(fu);
  }

  /*Create a separate register for each edge and map the edge to it.
   */
  FOR_EACH_EDGEINDEX(flowgraph, i) {
    edge = flowgraph.get_nthedge(i);
    mem_ok( su = new STOR(lib->get_register(flowgraph.get_bitwidth())) );
    dp->add_storage_unit(su);
    su->add_variable(edge);
    edge->set_storage_unit(su);
  }

  //create nets for each functional and storage unit output port
  FOR_EACH_NODEINDEX(flowgraph, i) {
    node = flowgraph.get_nthnode(i);
    assert(node);
    fu = node->get_functional_unit();
    assert(fu);

    mem_ok( tmpnet = new Net );
    fu->connect_to_net(tmpnet, OUT1);
    dp->add_net(tmpnet);
  }

  FOR_EACH_EDGEINDEX(flowgraph, i) {
    edge = flowgraph.get_nthedge(i);
    assert(edge);
    su = edge->get_storage_unit();
    assert(su);

    mem_ok( tmpnet = new Net );
    su->connect_to_net(tmpnet, REGOUT);
    dp->add_net(tmpnet);
  }

  //add interconnect units to connect the functional units and registers
  //all interconnect units are initially 1-input since no sharing has been done
  //NOTE: The code below AUTOMATICALLY marks input nets to the datapath
  FOR_EACH_LISTNODE(dp->get_functional_units(), fuscan) {
    fu = fuscan.get_item();
    assert(fu);
    dp->generate_interconnect_network(fu, lib->get_mux(flowgraph.get_bitwidth()));
  }

  FOR_EACH_LISTNODE(dp->get_storage_units(), suscan) {
    su = (STORPTR) suscan.get_item();
    assert(su);
    dp->generate_interconnect_network(su, lib->get_mux(flowgraph.get_bitwidth()));
  }

  //mark output nets of the datapath
  FOR_EACH_LISTNODE(flowgraph.outputs, edgescan) {
    edge = edgescan.get_item();
    assert(edge);
    su = (STORPTR) edge->get_storage_unit();
    assert(su);
    tmpnet = su->get_net(REGOUT);
    assert(tmpnet);
    dp->mark_po(tmpnet);
  }

  return;
}
/********************************************************************/

