/* * This file is part of libFirm. * Copyright (C) 2012 University of Karlsruhe. */ /** * @file * @brief This is the main ia32 firm backend driver. * @author Christian Wuerdig */ #include "ia32_bearch_t.h" #include "beflags.h" #include "begnuas.h" #include "bemodule.h" #include "bera.h" #include "besched.h" #include "bespillslots.h" #include "bestack.h" #include "betranshlp.h" #include "beutil.h" #include "bevarargs.h" #include "gen_ia32_regalloc_if.h" #include "ia32_architecture.h" #include "ia32_emitter.h" #include "ia32_encode.h" #include "ia32_new_nodes.h" #include "ia32_optimize.h" #include "ia32_transform.h" #include "ident_t.h" #include "instrument.h" #include "ircons.h" #include "iredges_t.h" #include "irgmod.h" #include "irgopt.h" #include "irgwalk.h" #include "iropt_t.h" #include "irtools.h" #include "isas.h" #include "lc_opts_enum.h" #include "lower_alloc.h" #include "lower_builtins.h" #include "lower_calls.h" #include "lower_mode_b.h" #include "lower_softfloat.h" #include "lowering.h" #include "panic.h" #include "platform_t.h" #include "target_t.h" #include "x86_x87.h" pmap *ia32_tv_ent; /**< A map of entities that store const tarvals */ ir_mode *ia32_mode_fpcw; ir_mode *ia32_mode_flags; ir_mode *ia32_mode_gp; ir_mode *ia32_mode_float64; ir_mode *ia32_mode_float32; typedef ir_node *(*create_const_node_func) (dbg_info *dbgi, ir_node *block); /** * Used to create per-graph unique pseudo nodes. */ static inline ir_node *create_const(ir_graph *irg, ir_node **place, create_const_node_func func, const arch_register_t* reg) { if (*place != NULL) return *place; ir_node *block = get_irg_start_block(irg); ir_node *res = func(NULL, block); arch_set_irn_register(res, reg); *place = res; /* We need a keep edge on our cached nodes, so that following firm * irgwalks will not miss them. */ keep_alive(res); return res; } ir_node *ia32_new_NoReg_gp(ir_graph *irg) { ia32_irg_data_t *irg_data = ia32_get_irg_data(irg); return create_const(irg, &irg_data->noreg_gp, new_bd_ia32_NoReg_GP, &ia32_registers[REG_GP_NOREG]); } ir_node *ia32_new_NoReg_fp(ir_graph *irg) { ia32_irg_data_t *irg_data = ia32_get_irg_data(irg); return create_const(irg, &irg_data->noreg_fp, new_bd_ia32_NoReg_FP, &ia32_registers[REG_FP_NOREG]); } ir_node *ia32_new_NoReg_xmm(ir_graph *irg) { ia32_irg_data_t *irg_data = ia32_get_irg_data(irg); return create_const(irg, &irg_data->noreg_xmm, new_bd_ia32_NoReg_XMM, &ia32_registers[REG_XMM_NOREG]); } ir_node *ia32_new_Fpu_truncate(ir_graph *irg) { ia32_irg_data_t *irg_data = ia32_get_irg_data(irg); return create_const(irg, &irg_data->fpu_trunc_mode, new_bd_ia32_ChangeCW, &ia32_registers[REG_FPCW]); } /** * Returns the admissible noreg register node for input register pos of node irn. */ static ir_node *ia32_get_admissible_noreg(ir_node *irn, int pos) { ir_graph *irg = get_irn_irg(irn); const arch_register_req_t *req = arch_get_irn_register_req_in(irn, pos); if (req->cls == &ia32_reg_classes[CLASS_ia32_gp]) return ia32_new_NoReg_gp(irg); if (ia32_cg_config.use_sse2) { return ia32_new_NoReg_xmm(irg); } else { return ia32_new_NoReg_fp(irg); } } static void ia32_set_frame_entity(ir_node *node, ir_entity *entity, unsigned size, unsigned po2align) { ia32_attr_t *const attr = get_ia32_attr(node); attr->addr.immediate = (x86_imm32_t) { .kind = X86_IMM_FRAMEENT, .entity = entity, .offset = attr->addr.immediate.offset, }; assert(get_ia32_frame_use(node) != IA32_FRAME_USE_NONE); /* set ls_mode based on entity unless we explicitly requested * a certain mode */ if (get_ia32_frame_use(node) != IA32_FRAME_USE_AUTO || is_ia32_Cmp(node) || is_ia32_Conv_I2I(node)) return; (void)po2align; attr->size = size == 12 ? X86_SIZE_80 : x86_size_from_bytes(size); /** 8bit stores have a special register requirement, so we can't simply * change size to 8bit here. The "hack" in ia32_collect_frame_entity_nodes() * should take care that it never happens. */ assert(!is_ia32_Store(node) || attr->size > X86_SIZE_8); } static void ia32_determine_frameoffset_addr(ir_node *const node, x86_addr_t *const addr, int const sp_offset) { if (addr->immediate.kind == X86_IMM_FRAMEENT) { #ifndef NDEBUG if (is_ia32_irn(node)) get_ia32_attr(node)->old_frame_ent = addr->immediate.entity; #endif addr->immediate.offset += get_entity_offset(addr->immediate.entity); addr->immediate.entity = NULL; addr->immediate.kind = X86_IMM_FRAMEOFFSET; } if (addr->immediate.kind == X86_IMM_FRAMEOFFSET) { assert(x86_addr_variant_has_base(addr->variant)); arch_register_t const *const base = arch_get_irn_register_in(node, addr->base_input); if (base == &ia32_registers[REG_ESP]) { addr->immediate.offset += sp_offset; } else { assert(base == &ia32_registers[REG_EBP]); /* we calculate offsets relative to the SP value at function begin, * but EBP points after the saved old frame pointer */ addr->immediate.offset += IA32_REGISTER_SIZE; } addr->immediate.kind = X86_IMM_VALUE; } } static void ia32_determine_frameoffset(ir_node *node, int sp_offset) { if (is_ia32_irn(node)) { ia32_attr_t *const attr = get_ia32_attr(node); ia32_determine_frameoffset_addr(node, &attr->addr, sp_offset); } else if (be_is_Asm(node)) { be_asm_attr_t const *const attr = get_be_asm_attr_const(node); x86_asm_operand_t *const ops = (x86_asm_operand_t*)attr->operands; for (size_t i = 0, n = ARR_LEN(ops); i != n; ++i) { x86_asm_operand_t *const op = &ops[i]; if (op->op.kind == BE_ASM_OPERAND_MEMORY) ia32_determine_frameoffset_addr(node, &op->u.addr, sp_offset); } } } static void ia32_sp_sim(ir_node *const node, stack_pointer_state_t *state) { /* Pop nodes modify the stack pointer before calculating destination * address, so do this first */ if (is_ia32_Pop(node) || is_ia32_PopMem(node)) { ia32_attr_t const *const attr = get_ia32_attr_const(node); state->offset -= x86_bytes_from_size(attr->size); } if (!state->no_change) ia32_determine_frameoffset(node, state->offset); if (is_ia32_Call(node)) { state->offset -= get_ia32_call_attr_const(node)->pop; } else if (is_ia32_Push(node)) { ia32_attr_t const *const attr = get_ia32_attr_const(node); state->offset += x86_bytes_from_size(attr->size); } else if (is_ia32_Leave(node) || is_ia32_CopyEbpEsp(node)) { state->offset = 0; state->align_padding = 0; } else if (is_ia32_SubSP(node)) { state->align_padding = 0; } } int ia32_get_sp_change(ir_node *const node) { if (be_is_IncSP(node)) return -be_get_IncSP_offset(node); stack_pointer_state_t state = { .offset = 160, .no_change = true, }; ia32_sp_sim(node, &state); int res = 160 - state.offset; assert(-16 <= res && res <= 16); return res; } /** * Get the estimated cycle count for @p irn. * * @param irn The node. * * @return The estimated cycle count for this operation */ static unsigned ia32_get_op_estimated_cost(ir_node const *const irn) { if (!is_ia32_irn(irn)) return 1; if (is_ia32_CopyB_i(irn)) { unsigned const size = get_ia32_copyb_size(irn); return 20 + size * 4 / 3; } unsigned cost = get_ia32_latency(irn); /* in case of address mode operations add additional cycles */ if (get_ia32_op_type(irn) != ia32_Normal) { if (get_ia32_frame_use(irn) != IA32_FRAME_USE_NONE || ( is_ia32_NoReg_GP(get_irn_n(irn, n_ia32_base)) && is_ia32_NoReg_GP(get_irn_n(irn, n_ia32_index)))) { /* Stack access, assume it is cached. */ cost += 5; } else { /* Access probably elsewhere. */ cost += 20; } } return cost; } /** * Check if irn can load its operand at position i from memory (source addressmode). * @param irn The irn to be checked * @param i The operands position * @return whether operand can be loaded */ static bool ia32_possible_memory_operand(const ir_node *irn, unsigned int i) { if (!is_ia32_irn(irn) || /* must be an ia32 irn */ get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */ get_ia32_frame_use(irn) != IA32_FRAME_USE_NONE) /* must not already use frame */ return false; switch (get_ia32_am_support(irn)) { case ia32_am_none: return false; case ia32_am_unary: if (i != n_ia32_unary_op) return false; break; case ia32_am_binary: switch (i) { case n_ia32_binary_left: { if (!is_ia32_commutative(irn)) return false; /* we can't swap left/right for limited registers * (As this (currently) breaks constraint handling copies) */ arch_register_req_t const *const req = arch_get_irn_register_req_in(irn, n_ia32_binary_left); if (req->limited != NULL) return false; break; } case n_ia32_binary_right: break; default: return false; } break; default: panic("unknown AM type"); } /* HACK: must not already use "real" memory. * This can happen for Call and Div. While we should be able to use Sync * this currently confused the spillslot coalescing code. */ if (!is_NoMem(get_irn_n(irn, n_ia32_mem))) return false; ir_node *const op = get_irn_n(irn, i); ir_node const *const load = get_Proj_pred(op); ia32_attr_t const *const attr = get_ia32_attr_const(load); if (attr->size > X86_SIZE_64) return false; /* Don't do reload folding for x87 nodes for now, as we can't predict yet * whether the spillslot must be widened to 80bit for which no AM operations * exist. */ if (is_ia32_fld(load)) return false; return true; } static void ia32_perform_memory_operand(ir_node *irn, unsigned int i) { if (!ia32_possible_memory_operand(irn, i)) return; ir_node *const op = get_irn_n(irn, i); ir_node *const load = get_Proj_pred(op); x86_insn_size_t const load_size = get_ia32_attr_const(load)->size; ir_node *const spill = get_irn_n(load, n_ia32_mem); ia32_attr_t *const attr = get_ia32_attr(irn); x86_insn_size_t const dest_op_size = attr->size; if (load_size <= dest_op_size) attr->size = load_size; set_ia32_op_type(irn, ia32_AddrModeS); set_ia32_frame_use(irn, IA32_FRAME_USE_AUTO); if (i == n_ia32_binary_left && get_ia32_am_support(irn) == ia32_am_binary && /* immediates are only allowed on the right side */ !is_ia32_Immediate(get_irn_n(irn, n_ia32_binary_right))) { ia32_swap_left_right(irn); i = n_ia32_binary_right; } assert(is_NoMem(get_irn_n(irn, n_ia32_mem))); set_irn_n(irn, n_ia32_base, get_irg_frame(get_irn_irg(irn))); set_irn_n(irn, n_ia32_mem, spill); set_irn_n(irn, i, ia32_get_admissible_noreg(irn, i)); attr->addr.variant = X86_ADDR_BASE; set_ia32_is_reload(irn); /* kill the reload */ assert(get_irn_n_edges(op) == 0); assert(get_irn_n_edges(load) == 1); sched_remove(load); kill_node(op); kill_node(load); } static bool gprof; static ir_node *ia32_turn_back_dest_am(ir_node *node) { typedef ir_node *construct_binop_func( dbg_info *db, ir_node *block, ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2, x86_insn_size_t size); x86_insn_size_t const size = get_ia32_attr_const(node)->size; bool is_8bit = size == X86_SIZE_8; construct_binop_func *func; switch (get_ia32_irn_opcode(node)) { case iro_ia32_AddMem: func = is_8bit ? new_bd_ia32_Add_8bit : new_bd_ia32_Add; break; case iro_ia32_AndMem: func = is_8bit ? new_bd_ia32_And_8bit : new_bd_ia32_And; break; case iro_ia32_OrMem: func = is_8bit ? new_bd_ia32_Or_8bit : new_bd_ia32_Or; break; case iro_ia32_SubMem: func = is_8bit ? new_bd_ia32_Sub_8bit : new_bd_ia32_Sub; break; case iro_ia32_XorMem: func = is_8bit ? new_bd_ia32_Xor_8bit : new_bd_ia32_Xor; break; default: panic("cannot turn back DestAM for %+F", node); } dbg_info *const dbgi = get_irn_dbg_info(node); ir_node *const block = get_nodes_block(node); ir_node *const base = get_irn_n(node, n_ia32_base); ir_node *const idx = get_irn_n(node, n_ia32_index); ir_node *const mem = get_irn_n(node, n_ia32_mem); ir_node *const load = new_bd_ia32_Load(dbgi, block, base, idx, mem, size, false); ia32_copy_am_attrs(load, node); if (is_ia32_is_reload(node)) set_ia32_is_reload(load); sched_add_before(node, load); ir_node *const load_res = be_new_Proj(load, pn_ia32_Load_res); ir_node *const load_mem = be_new_Proj(load, pn_ia32_Load_M); ir_graph *const irg = get_irn_irg(node); ir_node *const noreg = ia32_new_NoReg_gp(irg); ir_node *const nomem = get_irg_no_mem(irg); ir_node *const operand = get_irn_n(node, n_ia32_binary_left); ir_node *const new_node = func(dbgi, block, noreg, noreg, nomem, load_res, operand, size); set_irn_mode(new_node, mode_T); arch_set_irn_register_out(new_node, pn_ia32_flags, &ia32_registers[REG_EFLAGS]); ir_node *const res_proj = be_new_Proj(new_node, pn_ia32_res); ir_node *const store = is_8bit ? new_bd_ia32_Store_8bit(dbgi, block, base, idx, load_mem, res_proj, size) : new_bd_ia32_Store(dbgi, block, base, idx, load_mem, res_proj, size); ia32_copy_am_attrs(store, node); set_ia32_op_type(store, ia32_AddrModeD); sched_add_after(node, store); ir_node *const mem_proj = get_Proj_for_pn(node, pn_ia32_M); set_Proj_pred(mem_proj, store); set_Proj_num(mem_proj, pn_ia32_Store_M); sched_replace(node, new_node); exchange(node, new_node); return new_node; } ir_node *ia32_turn_back_am(ir_node *node) { ia32_attr_t *const attr = get_ia32_attr(node); dbg_info *dbgi = get_irn_dbg_info(node); ir_graph *irg = get_irn_irg(node); ir_node *block = get_nodes_block(node); ir_node *base = get_irn_n(node, n_ia32_base); ir_node *idx = get_irn_n(node, n_ia32_index); ir_node *mem = get_irn_n(node, n_ia32_mem); ir_node *load = new_bd_ia32_Load(dbgi, block, base, idx, mem, attr->size, attr->sign_extend); ir_node *load_res = be_new_Proj(load, pn_ia32_Load_res); ia32_copy_am_attrs(load, node); if (is_ia32_is_reload(node)) set_ia32_is_reload(load); set_irn_n(node, n_ia32_mem, get_irg_no_mem(irg)); switch (get_ia32_am_support(node)) { case ia32_am_unary: set_irn_n(node, n_ia32_unary_op, load_res); break; case ia32_am_binary: if (is_ia32_Immediate(get_irn_n(node, n_ia32_binary_right))) { set_irn_n(node, n_ia32_binary_left, load_res); } else { set_irn_n(node, n_ia32_binary_right, load_res); } break; default: panic("unknown AM type"); } ir_node *noreg = ia32_new_NoReg_gp(irg); set_irn_n(node, n_ia32_base, noreg); set_irn_n(node, n_ia32_index, noreg); attr->addr.variant = X86_ADDR_REG; attr->addr.immediate = (x86_imm32_t) { .kind = X86_IMM_VALUE, .offset = 0 }; attr->addr.log_scale = 0; attr->frame_use = IA32_FRAME_USE_NONE; /* rewire mem-proj */ if (get_irn_mode(node) == mode_T) { foreach_out_edge(node, edge) { ir_node *out = get_edge_src_irn(edge); if (get_irn_mode(out) == mode_M) { set_Proj_pred(out, load); set_Proj_num(out, pn_ia32_Load_M); break; } } } set_ia32_op_type(node, ia32_Normal); if (sched_is_scheduled(node)) sched_add_before(node, load); return load_res; } static ir_node *flags_remat(ir_node *node, ir_node *after) { /* we should turn back address modes when rematerializing nodes */ ir_node *const block = get_block(after); ia32_op_type_t type = get_ia32_op_type(node); switch (type) { case ia32_AddrModeS: ia32_turn_back_am(node); break; case ia32_AddrModeD: node = ia32_turn_back_dest_am(node); break; default: assert(type == ia32_Normal); break; } ir_node *copy = exact_copy(node); set_nodes_block(copy, block); sched_add_after(after, copy); return copy; } COMPILETIME_ASSERT((int)(n_ia32_Sub_minuend) == (int)(n_ia32_Cmp_left) && (int)(n_ia32_Sub_subtrahend) == (int)(n_ia32_Cmp_right), Cmp_and_Sub_operand_numbers_equal) static bool ia32_try_replace_flags(ir_node *consumers, ir_node *flags, ir_node *available) { if (!is_ia32_Sub(flags) && !is_ia32_Cmp(flags)) return false; unsigned pn; if (is_ia32_Sub(available)) { pn = pn_ia32_Sub_flags; } else if (is_ia32_Cmp(available)) { pn = pn_ia32_Cmp_eflags; } else { return false; } /* Assuming CSE would have found the more obvious case */ ir_node *const flags_left = get_irn_n(flags, n_ia32_binary_left); ir_node *const avail_right = get_irn_n(available, n_ia32_binary_right); if (flags_left != avail_right) return false; ir_node *const avail_left = get_irn_n(available, n_ia32_binary_left); ir_node *const flags_right = get_irn_n(flags, n_ia32_binary_right); if (avail_left != flags_right) return false; /* We can use available if we reverse the consumers' condition codes. */ arch_set_irn_register_out(available, pn, &ia32_registers[REG_EFLAGS]); ir_node *const proj = get_irn_mode(available) == mode_T ? be_get_or_make_Proj_for_pn(available, pn) : available; for (ir_node *c = consumers; c != NULL; c = get_irn_link(c)) { x86_condition_code_t cc = get_ia32_condcode(c); set_ia32_condcode(c, x86_invert_condition_code(cc)); int const pos = be_get_input_pos_for_req(c, &ia32_class_reg_req_flags); assert(pos >= 0); set_irn_n(c, pos, proj); } return true; } static void remat_simplifier(ir_node *node, void *env) { (void)env; /* A Sub with unused result is a Cmp. */ if (is_ia32_Sub(node) && get_irn_mode(node) == mode_T) { ir_node *projs[] = { [pn_ia32_Sub_M] = NULL }; foreach_out_edge(node, out) { ir_node *const proj = get_edge_src_irn(out); unsigned const num = get_Proj_num(proj); assert(num < ARRAY_SIZE(projs)); assert(!projs[num] && "duplicate Proj"); projs[num] = proj; } ir_node *res_keep = NULL; ir_node *const sub_res = projs[pn_ia32_Sub_res]; if (sub_res) { foreach_out_edge(sub_res, out) { ir_node *const user = get_edge_src_irn(out); if (be_is_Keep(user)) { assert(!res_keep && "Proj has two be_Keep"); res_keep = user; } else { return; } } } dbg_info *const dbgi = get_irn_dbg_info(node); ir_node *const block = get_nodes_block(node); ir_node *const base = get_irn_n(node, n_ia32_Sub_base); ir_node *const idx = get_irn_n(node, n_ia32_Sub_index); ir_node *const mem = get_irn_n(node, n_ia32_Sub_mem); ir_node *const minu = get_irn_n(node, n_ia32_Sub_minuend); ir_node *const subt = get_irn_n(node, n_ia32_Sub_subtrahend); x86_insn_size_t const size = get_ia32_attr_const(node)->size; bool is_8bit = size == X86_SIZE_8; ir_node *cmp = is_8bit ? new_bd_ia32_Cmp_8bit(dbgi, block, base, idx, mem, minu, subt, size, false) : new_bd_ia32_Cmp(dbgi, block, base, idx, mem, minu, subt, size, false); arch_set_irn_register(cmp, &ia32_registers[REG_EFLAGS]); ia32_copy_am_attrs(cmp, node); sched_replace(node, cmp); if (get_ia32_op_type(node) == ia32_AddrModeS) { set_ia32_op_type(cmp, ia32_AddrModeS); set_irn_mode(cmp, mode_T); ir_node *const sub_mem = projs[pn_ia32_Sub_M]; if (sub_mem) { ir_node *const proj_M = be_new_Proj(cmp, pn_ia32_Cmp_M); exchange(sub_mem, proj_M); } cmp = be_new_Proj(cmp, pn_ia32_Cmp_eflags); } else { assert(get_ia32_op_type(node) == ia32_Normal); } exchange(projs[pn_ia32_Sub_flags], cmp); if (res_keep) { sched_remove(res_keep); remove_keep_alive(res_keep); kill_node(res_keep); } kill_node(node); } } static void simplify_remat_nodes(ir_graph *irg) { irg_walk_graph(irg, remat_simplifier, NULL, NULL); remove_End_Bads_and_doublets(get_irg_end(irg)); } static ir_node *ia32_new_spill(ir_node *value, ir_node *after) { ir_graph *irg = get_irn_irg(value); ir_node *block = get_block(after); ir_node *frame = get_irg_frame(irg); ir_node *noreg = ia32_new_NoReg_gp(irg); ir_node *nomem = get_irg_no_mem(irg); // FIXME: Find a way to not duplicate logic with ia32_new_reload() arch_register_req_t const *const req = arch_get_irn_register_req(value); arch_register_class_t const *const cls = req->cls; ir_node const *const skip = skip_Proj_const(value); ir_node *res; ir_node *store; if (cls == &ia32_reg_classes[CLASS_ia32_gp]) { x86_insn_size_t size = X86_SIZE_32; if (is_ia32_Load(skip)) size = get_ia32_attr_const(skip)->size; store = size == X86_SIZE_8 ? new_bd_ia32_Store_8bit(NULL, block, frame, noreg, nomem, value, size) : new_bd_ia32_Store (NULL, block, frame, noreg, nomem, value, size); res = be_new_Proj(store, pn_ia32_Store_M); } else if (cls == &ia32_reg_classes[CLASS_ia32_fp]) { x86_insn_size_t size = X86_SIZE_80; if (is_ia32_fld(skip)) size = get_ia32_attr_const(skip)->size; store = new_bd_ia32_fst(NULL, block, frame, noreg, nomem, value, size); res = be_new_Proj(store, pn_ia32_fst_M); } else { assert(cls == &ia32_reg_classes[CLASS_ia32_xmm]); // TODO: find out when we can use xStore and only store 64bit store = new_bd_ia32_xxStore(NULL, block, frame, noreg, nomem, value, X86_SIZE_128); res = be_new_Proj(store, pn_ia32_xxStore_M); } ia32_attr_t *const attr = get_ia32_attr(store); attr->addr.variant = X86_ADDR_BASE; set_ia32_op_type(store, ia32_AddrModeD); set_ia32_frame_use(store, IA32_FRAME_USE_AUTO); set_ia32_is_spill(store); sched_add_after(after, store); return res; } static ir_node *ia32_new_reload(ir_node *value, ir_node *spill, ir_node *before) { ir_graph *const irg = get_irn_irg(before); ir_node *const block = get_block(before); ir_node *const noreg = ia32_new_NoReg_gp(irg); ir_node *const frame = get_irg_frame(irg); // FIXME: Find a way to not duplicate logic with ia32_new_spill() arch_register_req_t const *const req = arch_get_irn_register_req(value); arch_register_class_t const *const cls = req->cls; ir_node const *const skip = skip_Proj_const(value); ir_node *load; if (cls == &ia32_reg_classes[CLASS_ia32_gp]) { x86_insn_size_t size = X86_SIZE_32; if (is_ia32_Load(skip)) size = get_ia32_attr_const(skip)->size; load = new_bd_ia32_Load(NULL, block, frame, noreg, spill, size, false); } else if (cls == &ia32_reg_classes[CLASS_ia32_fp]) { x86_insn_size_t size = X86_SIZE_80; if (is_ia32_fld(skip)) size = get_ia32_attr_const(skip)->size; load = new_bd_ia32_fld(NULL, block, frame, noreg, spill, size); } else { assert(cls == &ia32_reg_classes[CLASS_ia32_xmm]); // TODO: find out when we can use xLoad and only load 64bit /* Reload 128 bit SSE registers */ load = new_bd_ia32_xxLoad(NULL, block, frame, noreg, spill, X86_SIZE_128); } ia32_attr_t *const attr = get_ia32_attr(load); attr->addr.variant = X86_ADDR_BASE; set_ia32_op_type(load, ia32_AddrModeS); set_ia32_frame_use(load, IA32_FRAME_USE_AUTO); set_ia32_is_reload(load); arch_add_irn_flags(load, arch_irn_flag_reload); sched_add_before(before, load); return be_new_Proj(load, pn_ia32_res); } static ir_node *create_push(ir_node *node, ir_node *schedpoint, ir_node *sp, ir_node *mem, ir_entity *ent, x86_insn_size_t const size) { dbg_info *dbgi = get_irn_dbg_info(node); ir_node *block = get_nodes_block(node); ir_graph *irg = get_irn_irg(node); ir_node *noreg = ia32_new_NoReg_gp(irg); ir_node *frame = get_irg_frame(irg); ir_node *const push = new_bd_ia32_Push(dbgi, block, frame, noreg, mem, noreg, sp, size); ia32_attr_t *const attr = get_ia32_attr(push); attr->addr = (x86_addr_t) { .immediate = (x86_imm32_t) { .kind = X86_IMM_FRAMEENT, .entity = ent, }, .variant = X86_ADDR_BASE, }; set_ia32_frame_use(push, IA32_FRAME_USE_AUTO); set_ia32_op_type(push, ia32_AddrModeS); set_ia32_is_spill(push); sched_add_before(schedpoint, push); return push; } static ir_node *create_pop(ir_node *node, ir_node *schedpoint, ir_node *sp, ir_entity *ent, x86_insn_size_t size) { dbg_info *dbgi = get_irn_dbg_info(node); ir_node *block = get_nodes_block(node); ir_graph *irg = get_irn_irg(node); ir_node *noreg = ia32_new_NoReg_gp(irg); ir_node *frame = get_irg_frame(irg); ir_node *pop = new_bd_ia32_PopMem(dbgi, block, frame, noreg, get_irg_no_mem(irg), sp, size); ia32_attr_t *const attr = get_ia32_attr(pop); attr->addr = (x86_addr_t) { .immediate = (x86_imm32_t) { .kind = X86_IMM_FRAMEENT, .entity = ent, }, .variant = X86_ADDR_BASE, }; set_ia32_frame_use(pop, IA32_FRAME_USE_AUTO); set_ia32_op_type(pop, ia32_AddrModeD); set_ia32_is_reload(pop); sched_add_before(schedpoint, pop); return pop; } static ir_node *create_spproj(ir_node *const pred, unsigned const pos) { return be_new_Proj_reg(pred, pos, &ia32_registers[REG_ESP]); } static x86_insn_size_t entsize2insnsize(unsigned const entsize) { return entsize % 2 == 1 ? X86_SIZE_8 : entsize % 4 == 2 ? X86_SIZE_16 : (assert(entsize % 4 == 0), X86_SIZE_32); } /** * Transform MemPerm, currently we do this the ugly way and produce * push/pop into/from memory cascades. This is possible without using * any registers. */ static void transform_MemPerm(ir_node *node) { ir_graph *irg = get_irn_irg(node); ir_node *sp = be_get_Start_proj(irg, &ia32_registers[REG_ESP]); int arity = be_get_MemPerm_entity_arity(node); ir_node **pops = ALLOCAN(ir_node*, arity); /* create Pushs */ for (int i = 0; i < arity; ++i) { ir_entity *inent = be_get_MemPerm_in_entity(node, i); ir_entity *outent = be_get_MemPerm_out_entity(node, i); assert(inent->kind == IR_ENTITY_SPILLSLOT); assert(outent->kind == IR_ENTITY_SPILLSLOT); unsigned entsize = inent->attr.spillslot.size; unsigned entsize2 = outent->attr.spillslot.size; ir_node *mem = get_irn_n(node, i); /* work around cases where entities have different sizes */ if (entsize2 < entsize) entsize = entsize2; int offset = 0; do { x86_insn_size_t const size = entsize2insnsize(entsize); ir_node *const push = create_push(node, node, sp, mem, inent, size); sp = create_spproj(push, pn_ia32_Push_stack); ia32_attr_t *const attr = get_ia32_attr(push); attr->addr.immediate.offset = offset; unsigned size_bytes = x86_bytes_from_size(size); offset += size_bytes; entsize -= size_bytes; } while(entsize > 0); set_irn_n(node, i, new_r_Bad(irg, mode_X)); } /* create pops */ for (int i = arity; i-- > 0; ) { ir_entity *inent = be_get_MemPerm_in_entity(node, i); ir_entity *outent = be_get_MemPerm_out_entity(node, i); assert(inent->kind == IR_ENTITY_SPILLSLOT); assert(outent->kind == IR_ENTITY_SPILLSLOT); unsigned entsize = outent->attr.spillslot.size; unsigned entsize2 = inent->attr.spillslot.size; /* work around cases where entities have different sizes */ if (entsize2 < entsize) entsize = entsize2; int offset = entsize; ir_node *pop; do { x86_insn_size_t const size = entsize2insnsize(entsize); pop = create_pop(node, node, sp, outent, size); sp = create_spproj(pop, pn_ia32_PopMem_stack); unsigned size_bytes = x86_bytes_from_size(size); offset -= size_bytes; entsize -= size_bytes; ia32_attr_t *const attr = get_ia32_attr(pop); attr->addr.immediate.offset = offset; } while(entsize > 0); pops[i] = pop; } ir_node *const keep = be_new_Keep_one(sp); sched_replace(node, keep); /* exchange memprojs */ foreach_out_edge_safe(node, edge) { ir_node *proj = get_edge_src_irn(edge); unsigned p = get_Proj_num(proj); assert(p < (unsigned)arity); set_Proj_pred(proj, pops[p]); set_Proj_num(proj, pn_ia32_PopMem_M); } /* remove memperm */ kill_node(node); } /** * Block-Walker: Calls the transform functions Spill and Reload. */ static void ia32_after_ra_walker(ir_node *block, void *env) { (void)env; /* beware: the schedule is changed here */ sched_foreach_reverse_safe(block, node) { if (be_is_MemPerm(node)) { transform_MemPerm(node); } } } /** * Collects nodes that need frame entities assigned. */ static void ia32_collect_frame_entity_nodes(ir_node *node, void *data) { if (!is_ia32_irn(node) || get_ia32_op_type(node) != ia32_AddrModeS) return; ia32_attr_t const *const attr = get_ia32_attr_const(node); if (attr->addr.immediate.kind != X86_IMM_FRAMEENT) { assert(get_ia32_frame_use(node) == IA32_FRAME_USE_NONE); return; } if (attr->addr.immediate.entity != NULL) return; unsigned size; unsigned po2align; switch (get_ia32_frame_use(node)) { case IA32_FRAME_USE_NONE: panic("X86_IMM_FRAMEENT but IA32_FRAME_USE_NONE"); case IA32_FRAME_USE_32BIT: size = 4; po2align = 2; goto request_entity; case IA32_FRAME_USE_64BIT: size = 8; po2align = 3; goto request_entity; case IA32_FRAME_USE_AUTO: { x86_insn_size_t const insn_size = get_ia32_attr_const(node)->size; size = x86_bytes_from_size(insn_size); if (size == 10) { size = 12; po2align = 2; } else if (size == 1) { /* stupid hack: in some situations (like reloads folded into ConvI2I * with 8bit mode, an 8bit entity and reload+spill would suffice, * but an 8bit store has special register requirements on ia32 which * we may not be able to fulfill anymore at this point, so extend * the spillslot size to 16bit :-( */ size = 2; po2align = 1; } else { po2align = log2_floor(size); } goto request_entity; } } panic("invalid frame use type"); request_entity:; be_fec_env_t *env = (be_fec_env_t*)data; be_load_needs_frame_entity(env, node, size, po2align); } static void introduce_epilogue(ir_node *const ret, bool const omit_fp) { ir_node *curr_sp; ir_node *const first_sp = get_irn_n(ret, n_ia32_Ret_stack); ir_node *const block = get_nodes_block(ret); ir_graph *const irg = get_irn_irg(ret); if (!omit_fp) { arch_register_t const *const sp = &ia32_registers[REG_ESP]; arch_register_t const *const bp = &ia32_registers[REG_EBP]; int const n_ebp = be_get_input_pos_for_req(ret, &ia32_single_reg_req_gp_ebp); assert(n_ebp >= 0); ir_node *restore; ir_node *curr_bp = get_irn_n(ret, n_ebp); ir_node *curr_mem = get_irn_n(ret, n_ia32_Ret_mem); if (ia32_cg_config.use_leave) { restore = new_bd_ia32_Leave(NULL, block, curr_mem, curr_bp); curr_bp = be_new_Proj_reg(restore, pn_ia32_Leave_frame, bp); curr_sp = be_new_Proj_reg(restore, pn_ia32_Leave_stack, sp); curr_mem = be_new_Proj(restore, pn_ia32_Leave_M); } else { /* Copy ebp to esp. */ curr_sp = new_bd_ia32_CopyEbpEsp(NULL, block, curr_bp); arch_set_irn_register(curr_sp, sp); sched_add_before(ret, curr_sp); /* Pop ebp. */ restore = new_bd_ia32_Pop_ebp(NULL, block, curr_mem, curr_sp, X86_SIZE_32); curr_bp = be_new_Proj_reg(restore, pn_ia32_Pop_res, bp); curr_sp = be_new_Proj_reg(restore, pn_ia32_Pop_stack, sp); curr_mem = be_new_Proj(restore, pn_ia32_Pop_M); } sched_add_before(ret, restore); set_irn_n(ret, n_ia32_Ret_mem, curr_mem); set_irn_n(ret, n_ebp, curr_bp); } else { ir_type *const frame_type = get_irg_frame_type(irg); unsigned const frame_size = get_type_size(frame_type); curr_sp = ia32_new_IncSP(block, first_sp, -(int)frame_size, true); sched_add_before(ret, curr_sp); } set_irn_n(ret, n_ia32_Ret_stack, curr_sp); /* Keep verifier happy. */ if (get_irn_n_edges(first_sp) == 0 && is_Proj(first_sp)) kill_node(first_sp); } static void introduce_prologue(ir_graph *const irg, bool omit_fp) { const arch_register_t *sp = &ia32_registers[REG_ESP]; const arch_register_t *bp = &ia32_registers[REG_EBP]; ir_node *start = get_irg_start(irg); ir_node *block = get_nodes_block(start); ir_type *frame_type = get_irg_frame_type(irg); unsigned frame_size = get_type_size(frame_type); ir_node *initial_sp = be_get_Start_proj(irg, sp); if (!omit_fp) { /* push ebp */ ir_node *const mem = get_irg_initial_mem(irg); ir_node *const noreg = ia32_new_NoReg_gp(irg); ir_node *const initial_bp = be_get_Start_proj(irg, bp); ir_node *const push = new_bd_ia32_Push(NULL, block, noreg, noreg, mem, initial_bp, initial_sp, X86_SIZE_32); sched_add_after(start, push); ir_node *const curr_mem = be_new_Proj(push, pn_ia32_Push_M); edges_reroute_except(mem, curr_mem, push); ir_node *const curr_sp = be_new_Proj_reg(push, pn_ia32_Push_stack, sp); /* move esp to ebp */ ir_node *const curr_bp = be_new_Copy(block, curr_sp); sched_add_after(push, curr_bp); arch_copy_irn_out_info(curr_bp, 0, initial_bp); edges_reroute_except(initial_bp, curr_bp, push); ir_node *incsp = ia32_new_IncSP(block, curr_sp, frame_size, false); edges_reroute_except(initial_sp, incsp, push); sched_add_after(curr_bp, incsp); /* make sure the initial IncSP is really used by someone */ be_keep_if_unused(incsp); } else { ir_node *const incsp = ia32_new_IncSP(block, initial_sp, frame_size, false); edges_reroute_except(initial_sp, incsp, incsp); sched_add_after(start, incsp); } } /** * Put the prologue code at the beginning, epilogue code before each return */ static void introduce_prologue_epilogue(ir_graph *const irg, bool omit_fp) { /* introduce epilogue for every return node */ foreach_irn_in(get_irg_end_block(irg), i, ret) { assert(is_ia32_Ret(ret)); introduce_epilogue(ret, omit_fp); } introduce_prologue(irg, omit_fp); } static x87_attr_t *ia32_get_x87_attr(ir_node *const node) { ia32_x87_attr_t *const attr = get_ia32_x87_attr(node); return &attr->x87; } /** * Last touchups for the graph before emit: x87 simulation to replace the * virtual with real x87 instructions, creating a block schedule and * peephole optimizations. */ static void ia32_before_emit(ir_graph *irg) { ia32_irg_data_t const *const irg_data = ia32_get_irg_data(irg); bool const omit_fp = irg_data->omit_fp; /* create and coalesce frame entities */ be_fec_env_t *fec_env = be_new_frame_entity_coalescer(irg); irg_walk_graph(irg, NULL, ia32_collect_frame_entity_nodes, fec_env); be_assign_entities(fec_env, ia32_set_frame_entity, omit_fp); be_free_frame_entity_coalescer(fec_env); ir_type *const frame = get_irg_frame_type(irg); be_sort_frame_entities(frame, omit_fp); unsigned const misalign = IA32_REGISTER_SIZE; /* return address on stack */ int const begin = omit_fp ? 0 : -IA32_REGISTER_SIZE; be_layout_frame_type(frame, begin, misalign); irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, NULL); introduce_prologue_epilogue(irg, omit_fp); /* fix stack entity offsets */ be_fix_stack_nodes(irg, &ia32_registers[REG_ESP]); be_birg_from_irg(irg)->non_ssa_regs = NULL; unsigned const p2align = ir_platform.ia32_po2_stackalign; be_sim_stack_pointer(irg, misalign, p2align, ia32_sp_sim); /* fix 2-address code constraints */ ia32_finish_irg(irg); be_dump(DUMP_RA, irg, "2addr"); /* we might have to rewrite x87 virtual registers */ if (ia32_get_irg_data(irg)->do_x87_sim) { x86_prepare_x87_callbacks_ia32(); const x87_simulator_config_t config = { .regclass = &ia32_reg_classes[CLASS_ia32_fp], .new_bd_fdup = new_bd_ia32_fdup, .new_bd_fxch = new_bd_ia32_fxch, .new_bd_fpop = new_bd_ia32_fpop, .new_bd_ffreep = ia32_cg_config.use_ffreep ? new_bd_ia32_ffreep : NULL, .get_x87_attr = ia32_get_x87_attr, }; x86_x87_simulate_graph(irg, &config); } be_dump(DUMP_RA, irg, "x87"); /* do peephole optimizations */ ia32_peephole_optimization(irg); be_remove_dead_nodes_from_schedule(irg); } /** * Prepare a graph and perform code selection. */ static void ia32_select_instructions(ir_graph *irg) { if (gprof) { /* Linux gprof implementation needs base pointer */ be_options.omit_fp = 0; static ir_entity *mcount = NULL; if (mcount == NULL) { ir_type *tp = new_type_method(0, 0, false, cc_cdecl_set, mtp_no_property); ident *id = new_id_from_str("mcount"); mcount = new_global_entity(get_glob_type(), id, tp, ir_visibility_external, IR_LINKAGE_DEFAULT); } instrument_initcall(irg, mcount); } ia32_adjust_pic(irg); be_timer_push(T_CODEGEN); ia32_transform_graph(irg); be_timer_pop(T_CODEGEN); be_dump(DUMP_BE, irg, "code-selection"); /* do local optimizations (mainly CSE) */ optimize_graph_df(irg); /* optimize address mode */ ia32_optimize_graph(irg); be_dump(DUMP_BE, irg, "opt"); /* do code placement, to optimize the position of constants */ place_code(irg); /* backend code expects that outedges are always enabled */ assure_edges(irg); be_dump(DUMP_BE, irg, "place"); } /** * Check if Mux(sel, mux_true, mux_false) would represent a Max or Min operation */ static bool mux_is_float_min_max(ir_node const *const sel, ir_node const *const mux_true, ir_node const *const mux_false) { if (!is_Cmp(sel)) return false; ir_node *cmp_l = get_Cmp_left(sel); ir_node *cmp_r = get_Cmp_right(sel); if (!mode_is_float(get_irn_mode(cmp_l))) return false; /* check for min/max. They're defined as (C-Semantik): * min(a, b) = a < b ? a : b * or min(a, b) = a <= b ? a : b * max(a, b) = a > b ? a : b * or max(a, b) = a >= b ? a : b * (Note we only handle float min/max here) */ ir_relation relation = get_Cmp_relation(sel); switch (relation) { case ir_relation_greater_equal: case ir_relation_greater: /* this is a max */ if (cmp_l == mux_true && cmp_r == mux_false) return true; break; case ir_relation_less_equal: case ir_relation_less: /* this is a min */ if (cmp_l == mux_true && cmp_r == mux_false) return true; break; case ir_relation_unordered_greater_equal: case ir_relation_unordered_greater: /* this is a min */ if (cmp_l == mux_false && cmp_r == mux_true) return true; break; case ir_relation_unordered_less_equal: case ir_relation_unordered_less: /* this is a max */ if (cmp_l == mux_false && cmp_r == mux_true) return true; break; default: break; } return false; } static bool mux_is_set(ir_node const *const sel, ir_node const *const mux_true, ir_node const *const mux_false) { (void)sel; ir_mode *mode = get_irn_mode(mux_true); if (!be_mode_needs_gp_reg(mode) && mode != mode_b) return false; /* we can create a set plus up two 3 instructions for any combination * of constants */ if (is_Const(mux_true) && is_Const(mux_false)) return true; return false; } static bool mux_is_float_const_const(ir_node const *const sel, ir_node const *const mux_true, ir_node const *const mux_false) { (void)sel; if (!mode_is_float(get_irn_mode(mux_true))) return false; return is_Const(mux_true) && is_Const(mux_false); } static bool mux_is_doz(ir_node const *const sel, ir_node const *mux_true, ir_node const *mux_false) { if (!is_Cmp(sel)) return false; ir_mode *mode = get_irn_mode(mux_true); if (mode_is_signed(mode) || mode_is_float(mode)) return false; ir_relation relation = get_Cmp_relation(sel); ir_node *cmp_left = get_Cmp_left(sel); ir_node *cmp_right = get_Cmp_right(sel); /* "move" zero constant to false input */ if (is_irn_null(mux_true)) { ir_node const *tmp = mux_false; mux_false = mux_true; mux_true = tmp; relation = get_negated_relation(relation); } if (!is_irn_null(mux_false)) return false; if (!is_Sub(mux_true)) return false; ir_node *sub_left = get_Sub_left(mux_true); ir_node *sub_right = get_Sub_right(mux_true); /* Mux(a >=u b, 0, a-b) */ if ((relation & ir_relation_greater) && sub_left == cmp_left && sub_right == cmp_right) return true; /* Mux(a <=u b, 0, b-a) */ if ((relation & ir_relation_less) && sub_left == cmp_right && sub_right == cmp_left) return true; return false; } static int ia32_is_mux_allowed(ir_node const *const sel, ir_node const *const mux_false, ir_node const *const mux_true) { /* middleend can handle some things */ if (ir_is_optimizable_mux(sel, mux_false, mux_true)) return true; /* we can handle Set for all modes and compares */ if (mux_is_set(sel, mux_true, mux_false)) return true; /* SSE has own min/max operations */ if (ia32_cg_config.use_sse2 && mux_is_float_min_max(sel, mux_true, mux_false)) return true; /* we can handle Mux(?, Const[f], Const[f]) */ if (mux_is_float_const_const(sel, mux_true, mux_false)) return true; /* no support for 64bit inputs to cmov */ ir_mode *mode = get_irn_mode(mux_true); if (get_mode_size_bits(mode) > 32) return false; /* we can handle Abs for all modes and compares (except 64bit) */ if (ir_mux_is_abs(sel, mux_false, mux_true) != 0) return true; /* we can't handle MuxF yet */ if (mode_is_float(mode)) return false; if (mux_is_doz(sel, mux_true, mux_false)) return true; /* Check Cmp before the node */ if (is_Cmp(sel)) { ir_mode *cmp_mode = get_irn_mode(get_Cmp_left(sel)); /* we can't handle 64bit compares */ if (get_mode_size_bits(cmp_mode) > 32) return false; /* we can't handle float compares */ if (mode_is_float(cmp_mode)) return false; } /* can we use cmov instructions? */ return ia32_cg_config.use_cmov; } /** * Initializes the backend ISA. */ static void ia32_init(void) { ia32_setup_cg_config(); x86_set_be_asm_constraint_support(&ia32_asm_constraints); ia32_mode_fpcw = new_non_arithmetic_mode("fpcw", 16); ia32_mode_flags = new_non_arithmetic_mode("flags", 32); ia32_mode_gp = new_int_mode("gp", 32, 0, 32); ia32_mode_float64 = new_float_mode("F64", irma_ieee754, 11, 52, ir_overflow_indefinite); ia32_mode_float32 = new_float_mode("F32", irma_ieee754, 8, 23, ir_overflow_indefinite); ir_target.fast_unaligned_memaccess = true; ir_target.allow_ifconv = ia32_is_mux_allowed; ir_target.float_int_overflow = ir_overflow_indefinite; ir_platform_set_va_list_type_pointer(); if (!ia32_cg_config.use_sse2 && !ia32_cg_config.use_softfloat) { ir_type *const type_f80 = x86_init_x87_type(); ir_target.mode_float_arithmetic = get_type_mode(type_f80); } ia32_register_init(); obstack_init(&opcodes_obst); ia32_create_opcodes(); ia32_cconv_init(); } static void ia32_finish(void) { ia32_free_opcodes(); obstack_free(&opcodes_obst, NULL); } static void ia32_mark_remat(ir_node *node) { if (is_ia32_irn(node)) set_ia32_is_remat(node); } static const regalloc_if_t ia32_regalloc_if = { .spill_cost = 7, .reload_cost = 5, .mark_remat = ia32_mark_remat, .new_spill = ia32_new_spill, .new_reload = ia32_new_reload, .perform_memory_operand = ia32_perform_memory_operand, }; static bool lower_for_emit(ir_graph *const irg, const unsigned *const sp_is_non_ssa) { if (!be_step_first(irg)) return false; struct obstack *obst = be_get_be_obst(irg); be_birg_from_irg(irg)->isa_link = OALLOCZ(obst, ia32_irg_data_t); be_birg_from_irg(irg)->non_ssa_regs = sp_is_non_ssa; ia32_select_instructions(irg); be_step_schedule(irg); be_timer_push(T_RA_PREPARATION); ia32_setup_fpu_mode(irg); be_sched_fix_flags(irg, &ia32_reg_classes[CLASS_ia32_flags], &flags_remat, NULL, &ia32_try_replace_flags); simplify_remat_nodes(irg); be_timer_pop(T_RA_PREPARATION); be_step_regalloc(irg, &ia32_regalloc_if); ia32_before_emit(irg); return true; } static void ia32_generate_code(FILE *output, const char *cup_name) { ia32_tv_ent = pmap_create(); be_begin(output, cup_name); unsigned *const sp_is_non_ssa = rbitset_alloca(N_IA32_REGISTERS); rbitset_set(sp_is_non_ssa, REG_ESP); foreach_irp_irg(i, irg) { if (!lower_for_emit(irg, sp_is_non_ssa)) continue; be_timer_push(T_EMIT); ia32_emit_function(irg); be_timer_pop(T_EMIT); be_step_last(irg); } ia32_emit_thunks(); be_finish(); pmap_destroy(ia32_tv_ent); } static ir_jit_function_t *ia32_jit_compile(ir_jit_segment_t *const segment, ir_graph *const irg) { unsigned *const sp_is_non_ssa = rbitset_alloca(N_IA32_REGISTERS); rbitset_set(sp_is_non_ssa, REG_ESP); if (!lower_for_emit(irg, sp_is_non_ssa)) return NULL; be_timer_push(T_EMIT); ir_jit_function_t *const res = ia32_emit_jit(segment, irg); be_timer_pop(T_EMIT); be_step_last(irg); return res; } static bool is_float(ir_type const *const type) { return is_atomic_type(type) && mode_is_float(get_type_mode(type)); } /* modes filled in below */ static aggregate_spec_t iu4_iu4_spec = { .length = 2, }; static aggregate_spec_t iu4_spec = { .length = 1, }; static aggregate_spec_t iu2_spec = { .length = 1, }; static aggregate_spec_t iu1_spec = { .length = 1, }; static aggregate_spec_t float_spec = { .length = 1, }; static aggregate_spec_t double_spec = { .length = 1, }; static void init_aggregate_specs(void) { iu4_iu4_spec.modes[0] = mode_Iu; iu4_iu4_spec.modes[1] = mode_Iu; iu4_spec.modes[0] = mode_Iu; iu2_spec.modes[0] = mode_Hu; iu1_spec.modes[0] = mode_Bu; float_spec.modes[0] = mode_F; double_spec.modes[0] = mode_D; } static aggregate_spec_t decide_compound_ret(void *env, ir_type const *type) { (void)env; unsigned size = get_type_size(type); if (is_Array_type(type)) { /* This is used for returning complex float numbers */ if (size == 8 && get_array_size(type) == 2 && is_float(get_array_element_type(type))) { return iu4_iu4_spec; } return (aggregate_spec_t) { .length = 1, .modes = { mode_P }, }; } if (is_atomic_type(type)) { switch (size) { case 1: return iu1_spec; case 2: return iu2_spec; case 4: return iu4_spec; } } /* return_small_struct_in_regs is used on OS X */ if (ir_platform.ia32_struct_in_regs && size <= 8) { if (get_compound_n_members(type) == 1) { ir_entity *const member = get_compound_member(type, 0); ir_type *const member_type = get_entity_type(member); if (is_float(member_type)) { unsigned member_size = get_type_size(member_type); if (member_size == 4) return float_spec; if (member_size == 8) return double_spec; } } switch (size) { case 1: return iu1_spec; case 2: return iu2_spec; case 4: return iu4_spec; case 8: return iu4_iu4_spec; } } return (aggregate_spec_t) { .length = 1, .modes = { mode_P }, }; } static void ia32_lower_va_arg(ir_node *node) { be_default_lower_va_arg(node, false, 4); } static const ir_settings_arch_dep_t ia32_arch_dep = { .replace_muls = true, .replace_divs = true, .replace_mods = true, .allow_mulhs = true, .allow_mulhu = true, .also_use_subs = true, .maximum_shifts = 4, .highest_shift_amount = 63, .evaluate = ia32_evaluate_insn, .max_bits_for_mulh = 32, }; static void ia32_lower_for_target(void) { ir_arch_lower(&ia32_arch_dep); be_after_irp_transform("lower-arch-dep"); ir_mode *mode_gp = ia32_reg_classes[CLASS_ia32_gp].mode; /* lower compound param handling * Note: we lower compound arguments ourself, since on ia32 we don't * have hidden parameters but know where to find the structs on the stack. * (This also forces us to always allocate space for the compound arguments * on the callframe and we can't just use an arbitrary position on the * stackframe) */ init_aggregate_specs(); lower_calls_with_compounds(LF_RETURN_HIDDEN, dont_lower_aggregates, NULL, decide_compound_ret, NULL, reset_stateless_abi); be_after_irp_transform("lower-calls"); /* replace floating point operations by function calls */ if (ia32_cg_config.use_softfloat) { lower_floating_point(); be_after_irp_transform("lower-fp"); } ir_builtin_kind supported[32]; size_t s = 0; supported[s++] = ir_bk_trap; supported[s++] = ir_bk_debugbreak; supported[s++] = ir_bk_return_address; supported[s++] = ir_bk_frame_address; supported[s++] = ir_bk_prefetch; supported[s++] = ir_bk_ffs; supported[s++] = ir_bk_clz; supported[s++] = ir_bk_ctz; supported[s++] = ir_bk_parity; supported[s++] = ir_bk_bswap; supported[s++] = ir_bk_outport; supported[s++] = ir_bk_inport; supported[s++] = ir_bk_saturating_increment; supported[s++] = ir_bk_va_start; if (ia32_cg_config.use_popcnt) supported[s++] = ir_bk_popcount; if (ia32_cg_config.use_cmpxchg) supported[s++] = ir_bk_compare_swap; assert(s < ARRAY_SIZE(supported)); lower_builtins(s, supported, ia32_lower_va_arg); be_after_irp_transform("lower-builtins"); foreach_irp_irg(i, irg) { /* break up switches with wide ranges */ lower_switch(irg, 4, 256, mode_gp); be_after_transform(irg, "lower-switch"); } ia32_lower64(); be_after_irp_transform("lower-64"); foreach_irp_irg(i, irg) { /* lower for mode_b stuff */ ir_lower_mode_b(irg, ia32_mode_gp); be_after_transform(irg, "lower-modeb"); lower_alloc(irg, ir_platform.ia32_po2_stackalign); be_after_transform(irg, "lower-alloc"); } foreach_irp_irg(i, irg) { /* Turn all small CopyBs into loads/stores, keep medium-sized CopyBs, * so we can generate rep movs later, and turn all big CopyBs into * memcpy calls. */ lower_CopyB(irg, 64, 8193, true); be_after_transform(irg, "lower-copyb"); } } static const lc_opt_table_entry_t ia32_options[] = { LC_OPT_ENT_BOOL("gprof", "Create gprof profiling code", &gprof), LC_OPT_LAST }; /** we don't have a concept of aliasing registers, so enumerate them * manually for the asm nodes. */ static be_register_name_t const ia32_additional_reg_names[] = { { "al", REG_EAX }, { "ah", REG_EAX }, { "ax", REG_EAX }, { "bl", REG_EBX }, { "bh", REG_EBX }, { "bx", REG_EBX }, { "cl", REG_ECX }, { "ch", REG_ECX }, { "cx", REG_ECX }, { "dl", REG_EDX }, { "dh", REG_EDX }, { "dx", REG_EDX }, { "si", REG_ESI }, { "di", REG_EDI }, { "sp", REG_ESP }, { "bp", REG_EBP }, { NULL, ~0u } }; arch_isa_if_t const ia32_isa_if = { .name = "ia32", .pointer_size = 4, .modulo_shift = 32, .big_endian = false, .po2_biggest_alignment = 4, .pic_supported = true, .n_registers = N_IA32_REGISTERS, .registers = ia32_registers, .n_register_classes = N_IA32_CLASSES, .register_classes = ia32_reg_classes, .init = ia32_init, .finish = ia32_finish, .generate_code = ia32_generate_code, .jit_compile = ia32_jit_compile, .emit_function = ia32_emit_jit_function, .lower_for_target = ia32_lower_for_target, .additional_reg_names = ia32_additional_reg_names, .get_op_estimated_cost = ia32_get_op_estimated_cost, }; BE_REGISTER_MODULE_CONSTRUCTOR(be_init_arch_ia32) void be_init_arch_ia32(void) { lc_opt_entry_t *be_grp = lc_opt_get_grp(firm_opt_get_root(), "be"); lc_opt_entry_t *ia32_grp = lc_opt_get_grp(be_grp, "ia32"); lc_opt_add_table(ia32_grp, ia32_options); ia32_init_emitter(); ia32_init_optimize(); ia32_init_transform(); x86_init_x87(); ia32_init_architecture(); }