// SPDX-License-Identifier: LGPL-2.1-or-later /* * Copyright (C) 2014-2021 Paul Cercueil */ #include "config.h" #include "disassembler.h" #include "lightrec.h" #include "memmanager.h" #include "optimizer.h" #include "regcache.h" #include #include #include #include #define IF_OPT(opt, ptr) ((opt) ? (ptr) : NULL) struct optimizer_list { void (**optimizers)(struct opcode *); unsigned int nb_optimizers; }; bool is_unconditional_jump(union code c) { switch (c.i.op) { case OP_SPECIAL: return c.r.op == OP_SPECIAL_JR || c.r.op == OP_SPECIAL_JALR; case OP_J: case OP_JAL: return true; case OP_BEQ: case OP_BLEZ: return c.i.rs == c.i.rt; case OP_REGIMM: return (c.r.rt == OP_REGIMM_BGEZ || c.r.rt == OP_REGIMM_BGEZAL) && c.i.rs == 0; default: return false; } } bool is_syscall(union code c) { return (c.i.op == OP_SPECIAL && c.r.op == OP_SPECIAL_SYSCALL) || (c.i.op == OP_CP0 && (c.r.rs == OP_CP0_MTC0 || c.r.rs == OP_CP0_CTC0) && (c.r.rd == 12 || c.r.rd == 13)); } static u64 opcode_read_mask(union code op) { switch (op.i.op) { case OP_SPECIAL: switch (op.r.op) { case OP_SPECIAL_SYSCALL: case OP_SPECIAL_BREAK: return 0; case OP_SPECIAL_JR: case OP_SPECIAL_JALR: case OP_SPECIAL_MTHI: case OP_SPECIAL_MTLO: return BIT(op.r.rs); case OP_SPECIAL_MFHI: return BIT(REG_HI); case OP_SPECIAL_MFLO: return BIT(REG_LO); case OP_SPECIAL_SLL: case OP_SPECIAL_SRL: case OP_SPECIAL_SRA: return BIT(op.r.rt); default: return BIT(op.r.rs) | BIT(op.r.rt); } case OP_CP0: switch (op.r.rs) { case OP_CP0_MTC0: case OP_CP0_CTC0: return BIT(op.r.rt); default: return 0; } case OP_CP2: if (op.r.op == OP_CP2_BASIC) { switch (op.r.rs) { case OP_CP2_BASIC_MTC2: case OP_CP2_BASIC_CTC2: return BIT(op.r.rt); default: break; } } return 0; case OP_J: case OP_JAL: case OP_LUI: return 0; case OP_BEQ: case OP_BNE: case OP_LWL: case OP_LWR: case OP_SB: case OP_SH: case OP_SWL: case OP_SW: case OP_SWR: return BIT(op.i.rs) | BIT(op.i.rt); default: return BIT(op.i.rs); } } static u64 opcode_write_mask(union code op) { u64 flags; switch (op.i.op) { case OP_SPECIAL: switch (op.r.op) { case OP_SPECIAL_JR: case OP_SPECIAL_SYSCALL: case OP_SPECIAL_BREAK: return 0; case OP_SPECIAL_MULT: case OP_SPECIAL_MULTU: case OP_SPECIAL_DIV: case OP_SPECIAL_DIVU: if (!OPT_FLAG_MULT_DIV) return BIT(REG_LO) | BIT(REG_HI); if (op.r.rd) flags = BIT(op.r.rd); else flags = BIT(REG_LO); if (op.r.imm) flags |= BIT(op.r.imm); else flags |= BIT(REG_HI); return flags; case OP_SPECIAL_MTHI: return BIT(REG_HI); case OP_SPECIAL_MTLO: return BIT(REG_LO); default: return BIT(op.r.rd); } case OP_ADDI: case OP_ADDIU: case OP_SLTI: case OP_SLTIU: case OP_ANDI: case OP_ORI: case OP_XORI: case OP_LUI: case OP_LB: case OP_LH: case OP_LWL: case OP_LW: case OP_LBU: case OP_LHU: case OP_LWR: return BIT(op.i.rt); case OP_JAL: return BIT(31); case OP_CP0: switch (op.r.rs) { case OP_CP0_MFC0: case OP_CP0_CFC0: return BIT(op.i.rt); default: return 0; } case OP_CP2: if (op.r.op == OP_CP2_BASIC) { switch (op.r.rs) { case OP_CP2_BASIC_MFC2: case OP_CP2_BASIC_CFC2: return BIT(op.i.rt); default: break; } } return 0; case OP_REGIMM: switch (op.r.rt) { case OP_REGIMM_BLTZAL: case OP_REGIMM_BGEZAL: return BIT(31); default: return 0; } case OP_META_MOV: return BIT(op.r.rd); default: return 0; } } bool opcode_reads_register(union code op, u8 reg) { return opcode_read_mask(op) & BIT(reg); } bool opcode_writes_register(union code op, u8 reg) { return opcode_write_mask(op) & BIT(reg); } static bool opcode_is_load(union code op) { switch (op.i.op) { case OP_LB: case OP_LH: case OP_LWL: case OP_LW: case OP_LBU: case OP_LHU: case OP_LWR: case OP_LWC2: return true; default: return false; } } static bool opcode_is_store(union code op) { switch (op.i.op) { case OP_SB: case OP_SH: case OP_SW: case OP_SWL: case OP_SWR: case OP_SWC2: return true; default: return false; } } bool opcode_is_io(union code op) { return opcode_is_load(op) || opcode_is_store(op); } /* TODO: Complete */ static bool is_nop(union code op) { if (opcode_writes_register(op, 0)) { switch (op.i.op) { case OP_CP0: return op.r.rs != OP_CP0_MFC0; case OP_LB: case OP_LH: case OP_LWL: case OP_LW: case OP_LBU: case OP_LHU: case OP_LWR: return false; default: return true; } } switch (op.i.op) { case OP_SPECIAL: switch (op.r.op) { case OP_SPECIAL_AND: return op.r.rd == op.r.rt && op.r.rd == op.r.rs; case OP_SPECIAL_ADD: case OP_SPECIAL_ADDU: return (op.r.rd == op.r.rt && op.r.rs == 0) || (op.r.rd == op.r.rs && op.r.rt == 0); case OP_SPECIAL_SUB: case OP_SPECIAL_SUBU: return op.r.rd == op.r.rs && op.r.rt == 0; case OP_SPECIAL_OR: if (op.r.rd == op.r.rt) return op.r.rd == op.r.rs || op.r.rs == 0; else return (op.r.rd == op.r.rs) && op.r.rt == 0; case OP_SPECIAL_SLL: case OP_SPECIAL_SRA: case OP_SPECIAL_SRL: return op.r.rd == op.r.rt && op.r.imm == 0; case OP_SPECIAL_MFHI: case OP_SPECIAL_MFLO: return op.r.rd == 0; default: return false; } case OP_ORI: case OP_ADDI: case OP_ADDIU: return op.i.rt == op.i.rs && op.i.imm == 0; case OP_BGTZ: return (op.i.rs == 0 || op.i.imm == 1); case OP_REGIMM: return (op.i.op == OP_REGIMM_BLTZ || op.i.op == OP_REGIMM_BLTZAL) && (op.i.rs == 0 || op.i.imm == 1); case OP_BNE: return (op.i.rs == op.i.rt || op.i.imm == 1); default: return false; } } bool load_in_delay_slot(union code op) { switch (op.i.op) { case OP_CP0: switch (op.r.rs) { case OP_CP0_MFC0: case OP_CP0_CFC0: return true; default: break; } break; case OP_CP2: if (op.r.op == OP_CP2_BASIC) { switch (op.r.rs) { case OP_CP2_BASIC_MFC2: case OP_CP2_BASIC_CFC2: return true; default: break; } } break; case OP_LB: case OP_LH: case OP_LW: case OP_LWL: case OP_LWR: case OP_LBU: case OP_LHU: return true; default: break; } return false; } static u32 lightrec_propagate_consts(union code c, u32 known, u32 *v) { switch (c.i.op) { case OP_SPECIAL: switch (c.r.op) { case OP_SPECIAL_SLL: if (known & BIT(c.r.rt)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rt] << c.r.imm; } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_SRL: if (known & BIT(c.r.rt)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rt] >> c.r.imm; } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_SRA: if (known & BIT(c.r.rt)) { known |= BIT(c.r.rd); v[c.r.rd] = (s32)v[c.r.rt] >> c.r.imm; } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_SLLV: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rt] << (v[c.r.rs] & 0x1f); } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_SRLV: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rt] >> (v[c.r.rs] & 0x1f); } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_SRAV: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = (s32)v[c.r.rt] >> (v[c.r.rs] & 0x1f); } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_ADD: case OP_SPECIAL_ADDU: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = (s32)v[c.r.rt] + (s32)v[c.r.rs]; } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_SUB: case OP_SPECIAL_SUBU: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rt] - v[c.r.rs]; } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_AND: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rt] & v[c.r.rs]; } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_OR: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rt] | v[c.r.rs]; } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_XOR: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rt] ^ v[c.r.rs]; } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_NOR: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = ~(v[c.r.rt] | v[c.r.rs]); } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_SLT: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = (s32)v[c.r.rs] < (s32)v[c.r.rt]; } else { known &= ~BIT(c.r.rd); } break; case OP_SPECIAL_SLTU: if (known & BIT(c.r.rt) && known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rs] < v[c.r.rt]; } else { known &= ~BIT(c.r.rd); } break; default: break; } break; case OP_REGIMM: break; case OP_ADDI: case OP_ADDIU: if (known & BIT(c.i.rs)) { known |= BIT(c.i.rt); v[c.i.rt] = v[c.i.rs] + (s32)(s16)c.i.imm; } else { known &= ~BIT(c.i.rt); } break; case OP_SLTI: if (known & BIT(c.i.rs)) { known |= BIT(c.i.rt); v[c.i.rt] = (s32)v[c.i.rs] < (s32)(s16)c.i.imm; } else { known &= ~BIT(c.i.rt); } break; case OP_SLTIU: if (known & BIT(c.i.rs)) { known |= BIT(c.i.rt); v[c.i.rt] = v[c.i.rs] < (u32)(s32)(s16)c.i.imm; } else { known &= ~BIT(c.i.rt); } break; case OP_ANDI: if (known & BIT(c.i.rs)) { known |= BIT(c.i.rt); v[c.i.rt] = v[c.i.rs] & c.i.imm; } else { known &= ~BIT(c.i.rt); } break; case OP_ORI: if (known & BIT(c.i.rs)) { known |= BIT(c.i.rt); v[c.i.rt] = v[c.i.rs] | c.i.imm; } else { known &= ~BIT(c.i.rt); } break; case OP_XORI: if (known & BIT(c.i.rs)) { known |= BIT(c.i.rt); v[c.i.rt] = v[c.i.rs] ^ c.i.imm; } else { known &= ~BIT(c.i.rt); } break; case OP_LUI: known |= BIT(c.i.rt); v[c.i.rt] = c.i.imm << 16; break; case OP_CP0: switch (c.r.rs) { case OP_CP0_MFC0: case OP_CP0_CFC0: known &= ~BIT(c.r.rt); break; } break; case OP_CP2: if (c.r.op == OP_CP2_BASIC) { switch (c.r.rs) { case OP_CP2_BASIC_MFC2: case OP_CP2_BASIC_CFC2: known &= ~BIT(c.r.rt); break; } } break; case OP_LB: case OP_LH: case OP_LWL: case OP_LW: case OP_LBU: case OP_LHU: case OP_LWR: case OP_LWC2: known &= ~BIT(c.i.rt); break; case OP_META_MOV: if (known & BIT(c.r.rs)) { known |= BIT(c.r.rd); v[c.r.rd] = v[c.r.rs]; } else { known &= ~BIT(c.r.rd); } break; default: break; } return known; } static int lightrec_transform_ops(struct lightrec_state *state, struct block *block) { struct opcode *list; unsigned int i; for (i = 0; i < block->nb_ops; i++) { list = &block->opcode_list[i]; /* Transform all opcodes detected as useless to real NOPs * (0x0: SLL r0, r0, #0) */ if (list->opcode != 0 && is_nop(list->c)) { pr_debug("Converting useless opcode 0x%08x to NOP\n", list->opcode); list->opcode = 0x0; } if (!list->opcode) continue; switch (list->i.op) { /* Transform BEQ / BNE to BEQZ / BNEZ meta-opcodes if one of the * two registers is zero. */ case OP_BEQ: if ((list->i.rs == 0) ^ (list->i.rt == 0)) { list->i.op = OP_META_BEQZ; if (list->i.rs == 0) { list->i.rs = list->i.rt; list->i.rt = 0; } } else if (list->i.rs == list->i.rt) { list->i.rs = 0; list->i.rt = 0; } break; case OP_BNE: if (list->i.rs == 0) { list->i.op = OP_META_BNEZ; list->i.rs = list->i.rt; list->i.rt = 0; } else if (list->i.rt == 0) { list->i.op = OP_META_BNEZ; } break; /* Transform ORI/ADDI/ADDIU with imm #0 or ORR/ADD/ADDU/SUB/SUBU * with register $zero to the MOV meta-opcode */ case OP_ORI: case OP_ADDI: case OP_ADDIU: if (list->i.imm == 0) { pr_debug("Convert ORI/ADDI/ADDIU #0 to MOV\n"); list->i.op = OP_META_MOV; list->r.rd = list->i.rt; } break; case OP_SPECIAL: switch (list->r.op) { case OP_SPECIAL_SLL: case OP_SPECIAL_SRA: case OP_SPECIAL_SRL: if (list->r.imm == 0) { pr_debug("Convert SLL/SRL/SRA #0 to MOV\n"); list->i.op = OP_META_MOV; list->r.rs = list->r.rt; } break; case OP_SPECIAL_OR: case OP_SPECIAL_ADD: case OP_SPECIAL_ADDU: if (list->r.rs == 0) { pr_debug("Convert OR/ADD $zero to MOV\n"); list->i.op = OP_META_MOV; list->r.rs = list->r.rt; } case OP_SPECIAL_SUB: /* fall-through */ case OP_SPECIAL_SUBU: if (list->r.rt == 0) { pr_debug("Convert OR/ADD/SUB $zero to MOV\n"); list->i.op = OP_META_MOV; } default: /* fall-through */ break; } default: /* fall-through */ break; } } return 0; } static int lightrec_switch_delay_slots(struct lightrec_state *state, struct block *block) { struct opcode *list, *next = &block->opcode_list[0]; unsigned int i; union code op, next_op; u8 flags; for (i = 0; i < block->nb_ops - 1; i++) { list = next; next = &block->opcode_list[i + 1]; next_op = next->c; op = list->c; if (!has_delay_slot(op) || list->flags & (LIGHTREC_NO_DS | LIGHTREC_EMULATE_BRANCH) || op.opcode == 0 || next_op.opcode == 0) continue; if (i && has_delay_slot(block->opcode_list[i - 1].c) && !(block->opcode_list[i - 1].flags & LIGHTREC_NO_DS)) continue; if ((list->flags & LIGHTREC_SYNC) || (next->flags & LIGHTREC_SYNC)) continue; switch (list->i.op) { case OP_SPECIAL: switch (op.r.op) { case OP_SPECIAL_JALR: if (opcode_reads_register(next_op, op.r.rd) || opcode_writes_register(next_op, op.r.rd)) continue; case OP_SPECIAL_JR: /* fall-through */ if (opcode_writes_register(next_op, op.r.rs)) continue; default: /* fall-through */ break; } case OP_J: /* fall-through */ break; case OP_JAL: if (opcode_reads_register(next_op, 31) || opcode_writes_register(next_op, 31)) continue; else break; case OP_BEQ: case OP_BNE: if (op.i.rt && opcode_writes_register(next_op, op.i.rt)) continue; case OP_BLEZ: /* fall-through */ case OP_BGTZ: case OP_META_BEQZ: case OP_META_BNEZ: if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) continue; break; case OP_REGIMM: switch (op.r.rt) { case OP_REGIMM_BLTZAL: case OP_REGIMM_BGEZAL: if (opcode_reads_register(next_op, 31) || opcode_writes_register(next_op, 31)) continue; case OP_REGIMM_BLTZ: /* fall-through */ case OP_REGIMM_BGEZ: if (op.i.rs && opcode_writes_register(next_op, op.i.rs)) continue; break; } default: /* fall-through */ break; } pr_debug("Swap branch and delay slot opcodes " "at offsets 0x%x / 0x%x\n", i << 2, (i + 1) << 2); flags = next->flags; list->c = next_op; next->c = op; next->flags = list->flags | LIGHTREC_NO_DS; list->flags = flags | LIGHTREC_NO_DS; } return 0; } static int shrink_opcode_list(struct lightrec_state *state, struct block *block, u16 new_size) { struct opcode *list; if (new_size >= block->nb_ops) { pr_err("Invalid shrink size (%u vs %u)\n", new_size, block->nb_ops); return -EINVAL; } list = lightrec_malloc(state, MEM_FOR_IR, sizeof(*list) * new_size); if (!list) { pr_err("Unable to allocate memory\n"); return -ENOMEM; } memcpy(list, block->opcode_list, sizeof(*list) * new_size); lightrec_free_opcode_list(state, block); block->opcode_list = list; block->nb_ops = new_size; pr_debug("Shrunk opcode list of block PC 0x%08x to %u opcodes\n", block->pc, new_size); return 0; } static int lightrec_detect_impossible_branches(struct lightrec_state *state, struct block *block) { struct opcode *op, *next = &block->opcode_list[0]; unsigned int i; int ret = 0; for (i = 0; i < block->nb_ops - 1; i++) { op = next; next = &block->opcode_list[i + 1]; if (!has_delay_slot(op->c) || (!load_in_delay_slot(next->c) && !has_delay_slot(next->c) && !(next->i.op == OP_CP0 && next->r.rs == OP_CP0_RFE))) continue; if (op->c.opcode == next->c.opcode) { /* The delay slot is the exact same opcode as the branch * opcode: this is effectively a NOP */ next->c.opcode = 0; continue; } op->flags |= LIGHTREC_EMULATE_BRANCH; if (op == block->opcode_list) { pr_debug("First opcode of block PC 0x%08x is an impossible branch\n", block->pc); /* If the first opcode is an 'impossible' branch, we * only keep the first two opcodes of the block (the * branch itself + its delay slot) */ if (block->nb_ops > 2) ret = shrink_opcode_list(state, block, 2); break; } } return ret; } static int lightrec_local_branches(struct lightrec_state *state, struct block *block) { struct opcode *list; unsigned int i; s32 offset; for (i = 0; i < block->nb_ops; i++) { list = &block->opcode_list[i]; if (should_emulate(list)) continue; switch (list->i.op) { case OP_BEQ: case OP_BNE: case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: case OP_META_BEQZ: case OP_META_BNEZ: offset = i + 1 + (s16)list->i.imm; if (offset >= 0 && offset < block->nb_ops) break; default: /* fall-through */ continue; } pr_debug("Found local branch to offset 0x%x\n", offset << 2); if (should_emulate(&block->opcode_list[offset])) { pr_debug("Branch target must be emulated - skip\n"); continue; } if (offset && has_delay_slot(block->opcode_list[offset - 1].c)) { pr_debug("Branch target is a delay slot - skip\n"); continue; } pr_debug("Adding sync at offset 0x%x\n", offset << 2); block->opcode_list[offset].flags |= LIGHTREC_SYNC; list->flags |= LIGHTREC_LOCAL_BRANCH; } return 0; } bool has_delay_slot(union code op) { switch (op.i.op) { case OP_SPECIAL: switch (op.r.op) { case OP_SPECIAL_JR: case OP_SPECIAL_JALR: return true; default: return false; } case OP_J: case OP_JAL: case OP_BEQ: case OP_BNE: case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: case OP_META_BEQZ: case OP_META_BNEZ: return true; default: return false; } } bool should_emulate(const struct opcode *list) { return has_delay_slot(list->c) && (list->flags & LIGHTREC_EMULATE_BRANCH); } static void lightrec_add_unload(struct opcode *op, u8 reg) { if (op->i.op == OP_SPECIAL && reg == op->r.rd) op->flags |= LIGHTREC_UNLOAD_RD; if (op->i.rs == reg) op->flags |= LIGHTREC_UNLOAD_RS; if (op->i.rt == reg) op->flags |= LIGHTREC_UNLOAD_RT; } static int lightrec_early_unload(struct lightrec_state *state, struct block *block) { unsigned int i, offset; struct opcode *op; u8 reg; for (reg = 1; reg < 34; reg++) { int last_r_id = -1, last_w_id = -1; for (i = 0; i < block->nb_ops; i++) { union code c = block->opcode_list[i].c; if (opcode_reads_register(c, reg)) last_r_id = i; if (opcode_writes_register(c, reg)) last_w_id = i; } if (last_w_id > last_r_id) offset = (unsigned int)last_w_id; else if (last_r_id >= 0) offset = (unsigned int)last_r_id; else continue; op = &block->opcode_list[offset]; if (has_delay_slot(op->c) && (op->flags & LIGHTREC_NO_DS)) offset++; if (offset == block->nb_ops) continue; lightrec_add_unload(&block->opcode_list[offset], reg); } return 0; } static int lightrec_flag_stores(struct lightrec_state *state, struct block *block) { struct opcode *list; u32 known = BIT(0); u32 values[32] = { 0 }; unsigned int i; for (i = 0; i < block->nb_ops; i++) { list = &block->opcode_list[i]; /* Register $zero is always, well, zero */ known |= BIT(0); values[0] = 0; switch (list->i.op) { case OP_SB: case OP_SH: case OP_SW: /* Mark all store operations that target $sp or $gp * as not requiring code invalidation. This is based * on the heuristic that stores using one of these * registers as address will never hit a code page. */ if (list->i.rs >= 28 && list->i.rs <= 29 && !state->maps[PSX_MAP_KERNEL_USER_RAM].ops) { pr_debug("Flaging opcode 0x%08x as not requiring invalidation\n", list->opcode); list->flags |= LIGHTREC_NO_INVALIDATE; } /* Detect writes whose destination address is inside the * current block, using constant propagation. When these * occur, we mark the blocks as not compilable. */ if ((known & BIT(list->i.rs)) && kunseg(values[list->i.rs]) >= kunseg(block->pc) && kunseg(values[list->i.rs]) < (kunseg(block->pc) + block->nb_ops * 4)) { pr_debug("Self-modifying block detected\n"); block->flags |= BLOCK_NEVER_COMPILE; list->flags |= LIGHTREC_SMC; } default: /* fall-through */ break; } known = lightrec_propagate_consts(list->c, known, values); } return 0; } static u8 get_mfhi_mflo_reg(const struct block *block, u16 offset, const struct opcode *last, u32 mask, bool sync, bool mflo, bool another) { const struct opcode *op, *next = &block->opcode_list[offset]; u32 old_mask; u8 reg2, reg = mflo ? REG_LO : REG_HI; u16 branch_offset; unsigned int i; for (i = offset; i < block->nb_ops; i++) { op = next; next = &block->opcode_list[i + 1]; old_mask = mask; /* If any other opcode writes or reads to the register * we'd use, then we cannot use it anymore. */ mask |= opcode_read_mask(op->c); mask |= opcode_write_mask(op->c); if (op->flags & LIGHTREC_SYNC) sync = true; switch (op->i.op) { case OP_BEQ: case OP_BNE: case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: case OP_META_BEQZ: case OP_META_BNEZ: /* TODO: handle backwards branches too */ if (!last && (op->flags & LIGHTREC_LOCAL_BRANCH) && (s16)op->c.i.imm >= 0) { branch_offset = i + 1 + (s16)op->c.i.imm - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); reg = get_mfhi_mflo_reg(block, branch_offset, NULL, mask, sync, mflo, false); reg2 = get_mfhi_mflo_reg(block, offset + 1, next, mask, sync, mflo, false); if (reg > 0 && reg == reg2) return reg; if (!reg && !reg2) return 0; } return mflo ? REG_LO : REG_HI; case OP_SPECIAL: switch (op->r.op) { case OP_SPECIAL_MULT: case OP_SPECIAL_MULTU: case OP_SPECIAL_DIV: case OP_SPECIAL_DIVU: return 0; case OP_SPECIAL_MTHI: if (!mflo) return 0; continue; case OP_SPECIAL_MTLO: if (mflo) return 0; continue; case OP_SPECIAL_JR: if (op->r.rs != 31) return reg; if (!sync && !(op->flags & LIGHTREC_NO_DS) && (next->i.op == OP_SPECIAL) && ((!mflo && next->r.op == OP_SPECIAL_MFHI) || (mflo && next->r.op == OP_SPECIAL_MFLO))) return next->r.rd; return 0; case OP_SPECIAL_JALR: return reg; case OP_SPECIAL_MFHI: if (!mflo) { if (another) return op->r.rd; /* Must use REG_HI if there is another MFHI target*/ reg2 = get_mfhi_mflo_reg(block, i + 1, next, 0, sync, mflo, true); if (reg2 > 0 && reg2 != REG_HI) return REG_HI; if (!sync && !(old_mask & BIT(op->r.rd))) return op->r.rd; else return REG_HI; } continue; case OP_SPECIAL_MFLO: if (mflo) { if (another) return op->r.rd; /* Must use REG_LO if there is another MFLO target*/ reg2 = get_mfhi_mflo_reg(block, i + 1, next, 0, sync, mflo, true); if (reg2 > 0 && reg2 != REG_LO) return REG_LO; if (!sync && !(old_mask & BIT(op->r.rd))) return op->r.rd; else return REG_LO; } continue; default: break; } /* fall-through */ default: continue; } } return reg; } static void lightrec_replace_lo_hi(struct block *block, u16 offset, u16 last, bool lo) { unsigned int i; u32 branch_offset; /* This function will remove the following MFLO/MFHI. It must be called * only if get_mfhi_mflo_reg() returned a non-zero value. */ for (i = offset; i < last; i++) { struct opcode *op = &block->opcode_list[i]; switch (op->i.op) { case OP_BEQ: case OP_BNE: case OP_BLEZ: case OP_BGTZ: case OP_REGIMM: case OP_META_BEQZ: case OP_META_BNEZ: /* TODO: handle backwards branches too */ if ((op->flags & LIGHTREC_LOCAL_BRANCH) && (s16)op->c.i.imm >= 0) { branch_offset = i + 1 + (s16)op->c.i.imm - !!(OPT_SWITCH_DELAY_SLOTS && (op->flags & LIGHTREC_NO_DS)); lightrec_replace_lo_hi(block, branch_offset, last, lo); lightrec_replace_lo_hi(block, i + 1, branch_offset, lo); } break; case OP_SPECIAL: if (lo && op->r.op == OP_SPECIAL_MFLO) { pr_debug("Removing MFLO opcode at offset 0x%x\n", i << 2); op->opcode = 0; return; } else if (!lo && op->r.op == OP_SPECIAL_MFHI) { pr_debug("Removing MFHI opcode at offset 0x%x\n", i << 2); op->opcode = 0; return; } /* fall-through */ default: break; } } } static int lightrec_flag_mults_divs(struct lightrec_state *state, struct block *block) { struct opcode *list; u8 reg_hi, reg_lo; unsigned int i; for (i = 0; i < block->nb_ops - 1; i++) { list = &block->opcode_list[i]; if (list->i.op != OP_SPECIAL) continue; switch (list->r.op) { case OP_SPECIAL_MULT: case OP_SPECIAL_MULTU: case OP_SPECIAL_DIV: case OP_SPECIAL_DIVU: break; default: continue; } /* Don't support opcodes in delay slots */ if ((i && has_delay_slot(block->opcode_list[i - 1].c)) || (list->flags & LIGHTREC_NO_DS)) continue; reg_lo = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, true, false); if (reg_lo == 0) { pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as" " not writing LO\n", i << 2); list->flags |= LIGHTREC_NO_LO; } reg_hi = get_mfhi_mflo_reg(block, i + 1, NULL, 0, false, false, false); if (reg_hi == 0) { pr_debug("Mark MULT(U)/DIV(U) opcode at offset 0x%x as" " not writing HI\n", i << 2); list->flags |= LIGHTREC_NO_HI; } if (!reg_lo && !reg_hi) { pr_debug("Both LO/HI unused in this block, they will " "probably be used in parent block - removing " "flags.\n"); list->flags &= ~(LIGHTREC_NO_LO | LIGHTREC_NO_HI); } if (reg_lo > 0 && reg_lo != REG_LO) { pr_debug("Found register %s to hold LO (rs = %u, rt = %u)\n", lightrec_reg_name(reg_lo), list->r.rs, list->r.rt); lightrec_replace_lo_hi(block, i + 1, block->nb_ops, true); list->r.rd = reg_lo; } else { list->r.rd = 0; } if (reg_hi > 0 && reg_hi != REG_HI) { pr_debug("Found register %s to hold HI (rs = %u, rt = %u)\n", lightrec_reg_name(reg_hi), list->r.rs, list->r.rt); lightrec_replace_lo_hi(block, i + 1, block->nb_ops, false); list->r.imm = reg_hi; } else { list->r.imm = 0; } } return 0; } static bool remove_div_sequence(struct block *block, unsigned int offset) { struct opcode *op; unsigned int i, found = 0; /* * Scan for the zero-checking sequence that GCC automatically introduced * after most DIV/DIVU opcodes. This sequence checks the value of the * divisor, and if zero, executes a BREAK opcode, causing the BIOS * handler to crash the PS1. * * For DIV opcodes, this sequence additionally checks that the signed * operation does not overflow. * * With the assumption that the games never crashed the PS1, we can * therefore assume that the games never divided by zero or overflowed, * and these sequences can be removed. */ for (i = offset; i < block->nb_ops; i++) { op = &block->opcode_list[i]; if (!found) { if (op->i.op == OP_SPECIAL && (op->r.op == OP_SPECIAL_DIV || op->r.op == OP_SPECIAL_DIVU)) break; if ((op->opcode & 0xfc1fffff) == 0x14000002) { /* BNE ???, zero, +8 */ found++; } else { offset++; } } else if (found == 1 && !op->opcode) { /* NOP */ found++; } else if (found == 2 && op->opcode == 0x0007000d) { /* BREAK 0x1c00 */ found++; } else if (found == 3 && op->opcode == 0x2401ffff) { /* LI at, -1 */ found++; } else if (found == 4 && (op->opcode & 0xfc1fffff) == 0x14010004) { /* BNE ???, at, +16 */ found++; } else if (found == 5 && op->opcode == 0x3c018000) { /* LUI at, 0x8000 */ found++; } else if (found == 6 && (op->opcode & 0x141fffff) == 0x14010002) { /* BNE ???, at, +16 */ found++; } else if (found == 7 && !op->opcode) { /* NOP */ found++; } else if (found == 8 && op->opcode == 0x0006000d) { /* BREAK 0x1800 */ found++; break; } else { break; } } if (found >= 3) { if (found != 9) found = 3; pr_debug("Removing DIV%s sequence at offset 0x%x\n", found == 9 ? "" : "U", offset << 2); for (i = 0; i < found; i++) block->opcode_list[offset + i].opcode = 0; return true; } return false; } static int lightrec_remove_div_by_zero_check_sequence(struct lightrec_state *state, struct block *block) { struct opcode *op; unsigned int i; for (i = 0; i < block->nb_ops; i++) { op = &block->opcode_list[i]; if (op->i.op == OP_SPECIAL && (op->r.op == OP_SPECIAL_DIVU || op->r.op == OP_SPECIAL_DIV) && remove_div_sequence(block, i + 1)) op->flags |= LIGHTREC_NO_DIV_CHECK; } return 0; } static const u32 memset_code[] = { 0x10a00006, // beqz a1, 2f 0x24a2ffff, // addiu v0,a1,-1 0x2403ffff, // li v1,-1 0xac800000, // 1: sw zero,0(a0) 0x2442ffff, // addiu v0,v0,-1 0x1443fffd, // bne v0,v1, 1b 0x24840004, // addiu a0,a0,4 0x03e00008, // 2: jr ra 0x00000000, // nop }; static int lightrec_replace_memset(struct lightrec_state *state, struct block *block) { unsigned int i; union code c; for (i = 0; i < block->nb_ops; i++) { c = block->opcode_list[i].c; if (c.opcode != memset_code[i]) return 0; if (i == ARRAY_SIZE(memset_code) - 1) { /* success! */ pr_debug("Block at PC 0x%x is a memset\n", block->pc); block->flags |= BLOCK_IS_MEMSET | BLOCK_NEVER_COMPILE; /* Return non-zero to skip other optimizers. */ return 1; } } return 0; } static int (*lightrec_optimizers[])(struct lightrec_state *state, struct block *) = { IF_OPT(OPT_REMOVE_DIV_BY_ZERO_SEQ, &lightrec_remove_div_by_zero_check_sequence), IF_OPT(OPT_REPLACE_MEMSET, &lightrec_replace_memset), IF_OPT(OPT_DETECT_IMPOSSIBLE_BRANCHES, &lightrec_detect_impossible_branches), IF_OPT(OPT_TRANSFORM_OPS, &lightrec_transform_ops), IF_OPT(OPT_LOCAL_BRANCHES, &lightrec_local_branches), IF_OPT(OPT_SWITCH_DELAY_SLOTS, &lightrec_switch_delay_slots), IF_OPT(OPT_FLAG_STORES, &lightrec_flag_stores), IF_OPT(OPT_FLAG_MULT_DIV, &lightrec_flag_mults_divs), IF_OPT(OPT_EARLY_UNLOAD, &lightrec_early_unload), }; int lightrec_optimize(struct lightrec_state *state, struct block *block) { unsigned int i; int ret; for (i = 0; i < ARRAY_SIZE(lightrec_optimizers); i++) { if (lightrec_optimizers[i]) { ret = (*lightrec_optimizers[i])(state, block); if (ret) return ret; } } return 0; }