/* * This file is part of libFirm. * Copyright (C) 2012 University of Karlsruhe. */ /** * @file * @brief Block-scheduling strategies. * @author Matthias Braun, Christoph Mallon * @date 27.09.2006 * * The goals of the greedy (and ILP) algorithm here works by assuming that * we want to change as many jumps to fallthroughs as possible (executed jumps * actually, we have to look at the execution frequencies). The algorithms * do this by collecting execution frequencies of all branches (which is easily * possible when all critical edges are split) then removes critical edges where * possible as we don't need and want them anymore now. The algorithms then try * to change as many edges to fallthroughs as possible, this is done by setting * a next and prev pointers on blocks. The greedy algorithm sorts the edges by * execution frequencies and tries to transform them to fallthroughs in this order */ #include "beblocksched.h" #include "bearch.h" #include "beirg.h" #include "bemodule.h" #include "besched.h" #include "debug.h" #include "execfreq.h" #include "iredges_t.h" #include "irgmod.h" #include "irgwalk.h" #include "irnode_t.h" #include "pdeq.h" #include "util.h" DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) static bool blocks_removed; /** * Post-block-walker: Find blocks containing only one jump and * remove them. */ static void remove_empty_block(ir_node *block) { if (irn_visited_else_mark(block)) return; if (get_Block_n_cfgpreds(block) != 1) goto check_preds; ir_node *jump = NULL; sched_foreach(block, node) { if (!(arch_get_irn_flags(node) & arch_irn_flag_simple_jump)) goto check_preds; /* we should never have 2 jumps in a block */ if (jump) panic("found 2 jumps in a block"); jump = node; } if (jump == NULL) goto check_preds; ir_entity *entity = get_Block_entity(block); ir_node *pred = get_Block_cfgpred(block, 0); ir_node *succ_block = NULL; foreach_out_edge_safe(jump, edge) { assert(succ_block == NULL); succ_block = get_edge_src_irn(edge); if (get_Block_entity(succ_block) != NULL && entity != NULL) { /* Currently we can add only one label for a block. Therefore we * cannot combine them if both block already have one. :-( */ goto check_preds; } int const pos = get_edge_src_pos(edge); set_irn_n(succ_block, pos, pred); } /* move the label to the successor block */ set_Block_entity(succ_block, entity); /* there can be some non-scheduled Pin nodes left in the block, move them * to the succ block (Pin) or pred block (Sync) */ foreach_out_edge_safe(block, edge) { ir_node *const node = get_edge_src_irn(edge); if (node == jump) { continue; } else if (is_Pin(node)) { /* we simply kill Pins, because there are some strange interactions * between jump threading, which produce PhiMs with Pins, we simply * kill the pins here, everything is scheduled anyway */ exchange(node, get_Pin_op(node)); } else if (is_Sync(node)) { set_nodes_block(node, get_nodes_block(pred)); } else if (is_End(node)) { /* End-keep, reroute it to the successor */ int pos = get_edge_src_pos(edge); set_irn_n(node, pos, succ_block); } else { panic("unexpected node %+F in block %+F with empty schedule", node, block); } } ir_graph *irg = get_irn_irg(block); set_Block_cfgpred(block, 0, new_r_Bad(irg, mode_X)); kill_node(jump); blocks_removed = true; /* check predecessor */ remove_empty_block(get_nodes_block(pred)); return; check_preds: for (int i = 0, arity = get_Block_n_cfgpreds(block); i < arity; ++i) { ir_node *pred = get_Block_cfgpred_block(block, i); remove_empty_block(pred); } } /* removes basic blocks that just contain a jump instruction */ static void remove_empty_blocks(ir_graph *irg) { blocks_removed = false; ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED); inc_irg_visited(irg); remove_empty_block(get_irg_end_block(irg)); foreach_irn_in(get_irg_end(irg), i, pred) { if (is_Block(pred)) remove_empty_block(pred); } ir_free_resources(irg, IR_RESOURCE_IRN_VISITED); if (blocks_removed) clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE); } typedef struct blocksched_entry_t blocksched_entry_t; struct blocksched_entry_t { ir_node *block; blocksched_entry_t *next; blocksched_entry_t *prev; }; typedef struct edge_t edge_t; struct edge_t { ir_node *block; /**< source block */ int pos; /**< number of cfg predecessor (target) */ double execfreq; /**< the frequency */ /** for edges leaving the loop this is the penality when we make them a * fallthrough. */ double outedge_penalty_freq; /** flag that indicates whether this edge is the edge with the highest * execfreq pointing away from this block */ int highest_execfreq; }; typedef struct blocksched_env_t blocksched_env_t; struct blocksched_env_t { ir_graph *irg; struct obstack obst; edge_t *edges; deq_t worklist; unsigned blockcount; }; static blocksched_entry_t* get_blocksched_entry(const ir_node *block) { return (blocksched_entry_t*)get_irn_link(block); } /** * Collect cfg frequencies of all edges between blocks. * Also determines edge with highest frequency. */ static void collect_egde_frequency(ir_node *block, void *data) { blocksched_env_t *env = (blocksched_env_t*)data; /* Exclude the end block from the block schedule. */ if (block == get_irg_end_block(env->irg)) return; edge_t edge; memset(&edge, 0, sizeof(edge)); blocksched_entry_t *entry = OALLOCZ(&env->obst, blocksched_entry_t); entry->block = block; set_irn_link(block, entry); int arity = get_Block_n_cfgpreds(block); if (arity == 0) { /* must be the start block, everything else is dead code and should be * removed by now */ assert(block == get_irg_start_block(env->irg)); /* nothing to do here */ return; } else if (arity == 1) { ir_loop *loop = get_irn_loop(block); ir_node *pred_block = get_Block_cfgpred_block(block, 0); ir_loop *pred_loop = get_irn_loop(pred_block); float freq = (float)get_block_execfreq(block); /* is it an edge leaving a loop */ if (get_loop_depth(pred_loop) > get_loop_depth(loop)) { float pred_freq = (float)get_block_execfreq(pred_block); edge.outedge_penalty_freq = -(pred_freq - freq); } edge.block = block; edge.pos = 0; edge.execfreq = freq; edge.highest_execfreq = 1; ARR_APP1(edge_t, env->edges, edge); } else { double highest_execfreq = -1.0; int highest_edge_num = -1; edge.block = block; for (int i = 0; i < arity; ++i) { ir_node *const pred_block = get_Block_cfgpred_block(block, i); double const execfreq = get_block_execfreq(pred_block); edge.pos = i; edge.execfreq = execfreq; edge.highest_execfreq = 0; ARR_APP1(edge_t, env->edges, edge); if (highest_execfreq < execfreq) { highest_execfreq = execfreq; highest_edge_num = ARR_LEN(env->edges) - 1; } } if (highest_edge_num >= 0) { env->edges[highest_edge_num].highest_execfreq = 1; } } } static int cmp_edges_base(const edge_t *e1, const edge_t *e2) { long nr1 = get_irn_node_nr(e1->block); long nr2 = get_irn_node_nr(e2->block); if (nr1 < nr2) { return 1; } else if (nr1 > nr2) { return -1; } else if (e1->pos < e2->pos) { return 1; } else if (e1->pos > e2->pos) { return -1; } else { return 0; } } static int cmp_edges(const void *d1, const void *d2) { const edge_t *e1 = (const edge_t*)d1; const edge_t *e2 = (const edge_t*)d2; double freq1 = e1->execfreq; double freq2 = e2->execfreq; if (freq1 < freq2) { return 1; } else if (freq1 > freq2) { return -1; } else { return cmp_edges_base(e1, e2); } } static int cmp_edges_outedge_penalty(const void *d1, const void *d2) { const edge_t *e1 = (const edge_t*)d1; const edge_t *e2 = (const edge_t*)d2; double pen1 = e1->outedge_penalty_freq; double pen2 = e2->outedge_penalty_freq; if (pen1 > pen2) { return 1; } else if (pen1 < pen2) { return -1; } else { return cmp_edges_base(e1, e2); } } static void clear_loop_links(ir_loop *loop) { set_loop_link(loop, NULL); for (int i = 0, n = get_loop_n_elements(loop); i < n; ++i) { loop_element elem = get_loop_element(loop, i); if (*elem.kind == k_ir_loop) { clear_loop_links(elem.son); } } } static void coalesce_blocks(blocksched_env_t *env) { /* sort interblock edges by execution frequency */ edge_t *edges = env->edges; QSORT_ARR(edges, cmp_edges); /* run1: only look at jumps */ size_t edge_count = ARR_LEN(edges); for (size_t i = 0; i < edge_count; ++i) { edge_t const *const edge = &edges[i]; /* only check edge with highest frequency */ if (!edge->highest_execfreq) continue; /* the block might have been removed already... */ ir_node *const block = edge->block; if (is_Bad(get_Block_cfgpred(block, 0))) continue; ir_node *pred_block = get_Block_cfgpred_block(block, edge->pos); blocksched_entry_t *entry = get_blocksched_entry(block); blocksched_entry_t *pred_entry = get_blocksched_entry(pred_block); /* is one of the blocks already attached to another block? */ if (pred_entry->next != NULL || entry->prev != NULL) continue; /* only coalesce jumps */ if (get_block_succ_next(pred_block, get_block_succ_first(pred_block)) != NULL) continue; /* schedule the two blocks behind each other */ DB((dbg, LEVEL_1, "Coalesce (Jump) %+F -> %+F (%.3g)\n", pred_entry->block, entry->block, edge->execfreq)); pred_entry->next = entry; entry->prev = pred_entry; } /* run2: pick loop fallthroughs */ clear_loop_links(get_irg_loop(env->irg)); QSORT_ARR(edges, cmp_edges_outedge_penalty); for (size_t i = 0; i < edge_count; ++i) { edge_t const *const edge = &edges[i]; /* already seen all loop outedges? */ if (edge->outedge_penalty_freq == 0) break; ir_node *const block = edge->block; /* the block might have been removed already... */ ir_node *const pred = get_Block_cfgpred(block, edge->pos); if (is_Bad(pred)) continue; ir_node *const pred_block = get_nodes_block(pred); blocksched_entry_t *const entry = get_blocksched_entry(block); blocksched_entry_t *const pred_entry = get_blocksched_entry(pred_block); if (pred_entry->next != NULL || entry->prev != NULL) continue; /* we want at most 1 outedge fallthrough per loop */ ir_loop *loop = get_irn_loop(pred_block); if (get_loop_link(loop) != NULL) continue; /* schedule the 2 blocks behind each other */ DB((dbg, LEVEL_1, "Coalesce (Loop Outedge) %+F -> %+F (%.3g)\n", pred_entry->block, entry->block, edge->execfreq)); pred_entry->next = entry; entry->prev = pred_entry; /* all loops left have an outedge now */ ir_loop *outer_loop = get_irn_loop(block); do { /* we set loop link to loop to mark it */ set_loop_link(loop, loop); loop = get_loop_outer_loop(loop); } while (loop != outer_loop); } /* sort interblock edges by execution frequency */ QSORT_ARR(edges, cmp_edges); /* run3: remaining edges */ for (size_t i = 0; i < edge_count; ++i) { const edge_t *edge = &edges[i]; ir_node *block = edge->block; /* the block might have been removed already... */ ir_node *const pred = get_Block_cfgpred(block, edge->pos); if (is_Bad(pred)) continue; ir_node *const pred_block = get_nodes_block(pred); blocksched_entry_t *const entry = get_blocksched_entry(block); blocksched_entry_t *const pred_entry = get_blocksched_entry(pred_block); /* is one of the blocks already attached to another block? */ if (pred_entry->next != NULL || entry->prev != NULL) continue; /* schedule the two blocks behind each other */ DB((dbg, LEVEL_1, "Coalesce (CondJump) %+F -> %+F (%.3g)\n", pred_entry->block, entry->block, edge->execfreq)); pred_entry->next = entry; entry->prev = pred_entry; } } static void pick_block_successor(blocksched_entry_t *entry, blocksched_env_t *env) { ir_node *const block = entry->block; if (irn_visited_else_mark(block)) return; env->blockcount++; DB((dbg, LEVEL_1, "Pick succ of %+F\n", block)); /* put all successors into the worklist */ foreach_block_succ(block, edge) { ir_node *succ_block = get_edge_src_irn(edge); if (irn_visited(succ_block)) continue; /* we only need to put the first of a series of already connected * blocks into the worklist */ blocksched_entry_t *succ_entry = get_blocksched_entry(succ_block); while (succ_entry->prev != NULL) { /* break cycles... */ if (succ_entry->prev->block == succ_block) { succ_entry->prev->next = NULL; succ_entry->prev = NULL; break; } succ_entry = succ_entry->prev; } if (irn_visited(succ_entry->block)) continue; DB((dbg, LEVEL_1, "Put %+F into worklist\n", succ_entry->block)); deq_push_pointer_right(&env->worklist, succ_entry->block); } if (entry->next != NULL) { pick_block_successor(entry->next, env); return; } DB((dbg, LEVEL_1, "deciding...\n")); double best_succ_execfreq = -1; /* no successor yet: pick the successor block with the highest execution * frequency which has no predecessor yet */ ir_node *succ = NULL; foreach_block_succ(block, edge) { ir_node *const succ_block = get_edge_src_irn(edge); if (irn_visited(succ_block)) continue; blocksched_entry_t *const succ_entry = get_blocksched_entry(succ_block); if (succ_entry->prev != NULL) continue; double execfreq = get_block_execfreq(succ_block); if (best_succ_execfreq < execfreq) { best_succ_execfreq = execfreq; succ = succ_block; } } if (succ == NULL) { DB((dbg, LEVEL_1, "pick from worklist\n")); do { if (deq_empty(&env->worklist)) { DB((dbg, LEVEL_1, "worklist empty\n")); return; } succ = deq_pop_pointer_left(ir_node, &env->worklist); } while (irn_visited(succ)); } blocksched_entry_t *const succ_entry = get_blocksched_entry(succ); entry->next = succ_entry; succ_entry->prev = entry; pick_block_successor(succ_entry, env); } static blocksched_entry_t *finish_block_schedule(blocksched_env_t *env) { ir_graph *const irg = env->irg; ir_reserve_resources(irg, IR_RESOURCE_IRN_VISITED); inc_irg_visited(irg); /* Exclude the end block from the block schedule. */ mark_irn_visited(get_irg_end_block(irg)); deq_init(&env->worklist); ir_node *const startblock = get_irg_start_block(irg); blocksched_entry_t *const entry = get_blocksched_entry(startblock); pick_block_successor(entry, env); assert(deq_empty(&env->worklist)); deq_free(&env->worklist); ir_free_resources(irg, IR_RESOURCE_IRN_VISITED); return entry; } static ir_node **create_blocksched_array(blocksched_env_t *const env) { DB((dbg, LEVEL_1, "Blockschedule:\n")); unsigned i = 0; blocksched_entry_t *const first = finish_block_schedule(env); unsigned const count = env->blockcount; struct obstack *const obst = be_get_be_obst(env->irg); ir_node **const block_list = NEW_ARR_D(ir_node*, obst, count); for (blocksched_entry_t const *entry = first; entry; entry = entry->next) { assert(i < count); block_list[i++] = entry->block; DB((dbg, LEVEL_1, "\t%+F\n", entry->block)); } assert(i == count); return block_list; } ir_node **be_create_block_schedule(ir_graph *irg) { blocksched_env_t env = { .irg = irg, .edges = NEW_ARR_F(edge_t, 0), .blockcount = 0, }; obstack_init(&env.obst); assure_loopinfo(irg); // collect edge execution frequencies ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK); irg_block_walk_graph(irg, collect_egde_frequency, NULL, &env); remove_empty_blocks(irg); coalesce_blocks(&env); ir_node **const block_list = create_blocksched_array(&env); ir_free_resources(irg, IR_RESOURCE_IRN_LINK); DEL_ARR_F(env.edges); obstack_free(&env.obst, NULL); return block_list; } BE_REGISTER_MODULE_CONSTRUCTOR(be_init_blocksched) void be_init_blocksched(void) { FIRM_DBG_REGISTER(dbg, "firm.be.blocksched"); }