// Panopticon - A libre program analysis library for machine code // Copyright (C) 2014-2018 The Panopticon Developers // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA use std::ops::{RangeFull,Range}; use std::iter::FromIterator; use std::cmp; use std::usize; use std::collections::{HashSet,HashMap}; use std::fmt::Debug; use std::borrow::Cow; use petgraph::Graph; use petgraph::graph::{NodeIndices, NodeIndex}; use petgraph::visit::{Walker, DfsPostOrder}; use smallvec::SmallVec; use ron_uuid::UUID; use { Architecture, Region, Guard, Names, Strings, Str, StrRef, Result, Statement, Value, Constant, Variable, Segments, BasicBlock, BasicBlockIndex, Mnemonic, RewriteControl, Area, }; use statements::{StatementsIter,Statements}; use mnemonic::{MnemonicIterator,MnemonicIndex}; use basic_block::BasicBlockIterator; /// Convertable into a range of statements. pub trait IntoStatementRange { /// Converts Self into a range of statements for function `func`. fn into_statement_range(self, func: &Function) -> Range; } impl IntoStatementRange for RangeFull { fn into_statement_range(self, func: &Function) -> Range { 0..func.statements.len() } } impl IntoStatementRange for Range { fn into_statement_range(self, _: &Function) -> Range { self } } impl IntoStatementRange for BasicBlockIndex { fn into_statement_range(self, func: &Function) -> Range { debug!("into rgn: {}",self.index()); let bb = &func.basic_blocks[self.index()]; bb.into_statement_range(func) } } impl<'a> IntoStatementRange for &'a BasicBlock { fn into_statement_range(self, _: &Function) -> Range { self.statements.clone() } } /// Convertable into a range of mnemonics. pub trait IntoMnemonicRange: Debug { /// Converts Self into a range of mnemonics for function `func`. fn into_mnemonic_range(self, func: &Function) -> Range; } impl IntoMnemonicRange for RangeFull { fn into_mnemonic_range(self, func: &Function) -> Range { 0..func.mnemonics.len() } } impl IntoMnemonicRange for Range { fn into_mnemonic_range(self, _: &Function) -> Range { self } } impl IntoMnemonicRange for Range { fn into_mnemonic_range(self, _: &Function) -> Range { self.start.index()..self.end.index() } } impl IntoMnemonicRange for BasicBlockIndex { fn into_mnemonic_range(self, func: &Function) -> Range { let bb = &func.basic_blocks[self.index()]; bb.into_mnemonic_range(func) } } impl<'a> IntoMnemonicRange for &'a BasicBlock { fn into_mnemonic_range(self, _: &Function) -> Range { let start = self.mnemonics.start.index(); let end = self.mnemonics.end.index(); start..end } } impl IntoMnemonicRange for MnemonicIndex { fn into_mnemonic_range(self, _: &Function) -> Range { self.index()..(self.index() + 1) } } /// Iterator over all indirect, unresolved jumps/branches. pub struct IndirectJumps<'a> { pub graph: &'a Graph, pub iterator: NodeIndices, } impl<'a> Iterator for IndirectJumps<'a> { type Item = Variable; fn next(&mut self) -> Option { while let Some(idx) = self.iterator.next() { match self.graph.node_weight(idx) { Some(&CfgNode::Value(Value::Variable(ref v))) => { return Some(v.clone()); } _ => {} } } None } } /// Node in the control flow graph. #[derive(Debug, Clone, PartialEq, Eq)] pub enum CfgNode { /// Basic block BasicBlock(BasicBlockIndex), /// Indirect jump to this value. Value(Value), } #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum CfgTarget { Value(Value), Resolved(u64, usize), } /// A single function in the binary. /// /// A function is a set of basic blocks, with one serving as an entry point. Basic blocks form a /// control flow graph. /// /// Each function also has a user-changeable `name` and a unchangeable `uuid`. #[derive(Debug, Clone)] pub struct Function { /// Canonical name of the function. Can be anything. pub name: Str, /// Table mapping SSA variable names to integers. pub names: Names, /// Table mapping strings to integers. pub strings: Strings, /// Table mapping segments identifiers to integers. pub segments: Segments, /// Fixed UUID of this functions. Never changes. uuid: UUID, /// Region this function is in. region: UUID, /// Sort by rev. post order statements: Statements, /// Sort by rev. post order basic_blocks: Vec, /// Sort by area.start mnemonics: Vec, cflow_graph: Graph, entry_point: BasicBlockIndex, } impl Function { /// Creates a new function by disassembling from `region` starting at `start`. pub fn new(init: A::Configuration, start: u64, region: &Region, uuid: UUID) -> Result { let mut mnemonics = Vec::new(); let mut by_source = HashMap::new(); let mut by_destination = HashMap::new(); let mut func = Function{ name: format!("fn_{:x}", start).into(), names: Names::default(), strings: Strings::default(), segments: Segments::default(), uuid: uuid, region: region.uuid().clone(), statements: Statements::default(), basic_blocks: Vec::new(), mnemonics: Vec::new(), cflow_graph: Graph::new(), entry_point: BasicBlockIndex::new(0), }; disassemble::(init,vec![start],region, &mut func.names, &mut func.strings, &mut func.segments, &mut mnemonics, &mut by_source, &mut by_destination)?; assemble_function(&mut func, start, mnemonics, by_source, by_destination)?; Ok(func) } /// Disassemble a function with known control flow graph. pub fn known_cflow_graph(init: A::Configuration, mut names: Names, cfg: Vec<(Range, SmallVec<[(Value, Guard); 2]>)>, entry: u64, region: &Region, uuid: UUID, name: String) -> Result where A: Architecture { let mut strings = Strings::default(); let mut segs = Segments::default(); let mut basic_blocks = Vec::with_capacity(cfg.len()); let mut cflow_graph = Graph::::new(); let mut start_to_vx = HashMap::::new(); let mut entry_vx = None; for &(ref rgn, _) in cfg.iter() { let mut bb = BasicBlock{ uuid: UUID::now(), mnemonics: MnemonicIndex::new(0)..MnemonicIndex::new(0), node: NodeIndex::new(0), area: rgn.clone().into(), statements: 0..0, }; let nx = cflow_graph.add_node( CfgNode::BasicBlock(BasicBlockIndex::new(basic_blocks.len()))); bb.node = nx; basic_blocks.push(bb); start_to_vx.insert(rgn.start, nx); if rgn.start == entry { entry_vx = Some(nx); } } for &(ref rgn, ref jmps) in cfg.iter() { let from = start_to_vx[&rgn.start]; for &(ref tgt, ref g) in jmps.iter() { match tgt { Value::Constant(Constant{ value,.. }) => { match start_to_vx.get(&value) { Some(&to) => { cflow_graph.add_edge(from, to, g.clone()); } None => { return Err("unknown branch target".into()); } } } val => { let to = cflow_graph.add_node(CfgNode::Value(val.clone())); cflow_graph.add_edge(from, to, g.clone()); } } } } if entry_vx.is_none() { return Err("entrypoint not found".into()); } let mut postorder = DfsPostOrder::new(&cflow_graph,entry_vx.unwrap()) .iter(&cflow_graph).collect::>(); let mut postorder_rev = vec![0; basic_blocks.len()]; let mut po_idx = 0; let mut mnemonics = Vec::default(); let mut statements = Vec::default(); postorder.reverse(); for &n in postorder.iter() { if let Some(&CfgNode::BasicBlock(bb_idx)) = cflow_graph.node_weight(n) { postorder_rev[bb_idx.index()] = po_idx; po_idx += 1; } } basic_blocks.sort_by_key(|x| { if let Some(&CfgNode::BasicBlock(bb_idx)) = cflow_graph.node_weight(x.node) { postorder_rev[bb_idx.index()] } else { unreachable!() } }); for (idx,bb) in basic_blocks.iter().enumerate() { *cflow_graph.node_weight_mut(bb.node).unwrap() = CfgNode::BasicBlock(BasicBlockIndex::new(idx)); } for bb in basic_blocks.iter_mut() { let mut pos = bb.area.start; let mut matches = Vec::default(); let mne_start = MnemonicIndex::new(mnemonics.len()); let stmt_cnt = statements.len(); while pos < bb.area.end { A::decode(region, pos, &init, &mut names, &mut segs, &mut strings, &mut matches)?; for mut m in matches.drain(..) { pos = cmp::max(pos, m.area.end); mnemonics.push(Mnemonic{ area: m.area.into(), opcode: strings.insert(&m.opcode), operands: m.operands, statement_count: m.instructions.len(), }); statements.append(&mut m.instructions); } } bb.mnemonics = mne_start..MnemonicIndex::new(mnemonics.len()); bb.statements = stmt_cnt..statements.len(); } let entry = match cflow_graph.node_weight(entry_vx.unwrap()) { Some(CfgNode::BasicBlock(bb)) => bb.clone(), _ => { return Err("entry point is not a basic block".into()); } }; let f = Function{ name: name.into(), uuid: uuid, region: region.uuid().clone(), basic_blocks: basic_blocks, mnemonics: mnemonics, statements: Statements::Vector(statements), strings: strings, names: names, segments: segs, cflow_graph: cflow_graph, entry_point: entry, }; Ok(f) } /// Continues disassembling of `region`. The function looks for resolved, indirect control flow /// edges. pub fn extend(&mut self, init: A::Configuration, region: &Region) -> Result<()> { use petgraph::visit::EdgeRef; let mut by_source = HashMap::new(); let mut by_destination = HashMap::new(); let mut mnemonics = self.basic_blocks.iter().flat_map(|bb| { let mut stmt_idx = bb.statements.start; (bb.mnemonics.start.index()..bb.mnemonics.end.index()).map(|mne_idx| { let mne = &self.mnemonics[mne_idx]; let stmt_rgn = stmt_idx..(stmt_idx + mne.statement_count); let stmts = self.statements.iter_range(stmt_rgn).map(|x| { match x { Cow::Borrowed(s) => s.clone(), Cow::Owned(s) => s, } }).collect::>(); if mne_idx != bb.mnemonics.end.index() - 1 { by_source.entry((mne.area.start, mne.area.offset_start)) .or_insert_with(|| Vec::new()) .push((CfgTarget::Resolved(mne.area.end, mne.area.offset_end), Guard::True)); by_destination.entry((mne.area.end, mne.area.offset_end)) .or_insert_with(|| Vec::new()) .push((CfgTarget::Resolved(mne.area.start, mne.area.offset_start), Guard::True)); } stmt_idx += self.mnemonics[mne_idx].statement_count; (mne.clone(),stmts) }).collect::>() }).collect::>(); let mut starts = Vec::new(); for e in self.cflow_graph.edge_references() { let src = match self.cflow_graph.node_weight(e.source()) { Some(&CfgNode::BasicBlock(bb_idx)) => { let bb = &self.basic_blocks[bb_idx.index()]; let mne = &self.mnemonics[bb.mnemonics.end.index() - 1]; (mne.area.start, mne.area.offset_start) } _ => unreachable!() }; let dst = match self.cflow_graph.node_weight(e.target()) { Some(&CfgNode::BasicBlock(bb_idx)) => { let bb = &self.basic_blocks[bb_idx.index()]; let mne = &self.mnemonics[bb.mnemonics.start.index()]; CfgTarget::Resolved(mne.area.start, mne.area.offset_start) } Some(&CfgNode::Value(ref val)) => { CfgTarget::Value(val.clone()) } None => unreachable!() }; by_source.entry(src).or_insert_with(|| Vec::new()) .push((dst.clone(),e.weight().clone())); match dst { CfgTarget::Value(Value::Constant(Constant{ value,.. })) => { by_destination.entry((value, 0)).or_insert_with(|| Vec::new()) .push((CfgTarget::Resolved(src.0, src.1),e.weight().clone())); starts.push(value); } CfgTarget::Resolved(real, virt) => { by_destination.entry((real, virt)).or_insert_with(|| Vec::new()) .push((CfgTarget::Resolved(src.0, src.1),e.weight().clone())); } _ => { /* skip */ } } } mnemonics.sort_by_key(|&(ref mne, _)| (mne.area.start, mne.area.offset_start)); let entry = self.entry_address(); disassemble::(init, starts, region, &mut self.names, &mut self.strings, &mut self.segments, &mut mnemonics, &mut by_source, &mut by_destination)?; assemble_function(self,entry,mnemonics,by_source,by_destination) } /// Function entry point. pub fn entry_point(&self) -> BasicBlockIndex { self.entry_point } /// Iterator over all mnemonics in basic block `idx`. pub fn mnemonics<'a,Idx: IntoMnemonicRange + Sized>(&'a self, idx: Idx) -> MnemonicIterator<'a> { let idx = idx.into_mnemonic_range(self); MnemonicIterator::new(self,idx.start,idx.end - 1) } /// Iterator over all basic blocks in reverse post order. pub fn basic_blocks<'a>(&'a self) -> BasicBlockIterator<'a> { BasicBlockIterator::new(self,0,self.basic_blocks.len()) } /// The Functions control flow graph. pub fn cflow_graph<'a>(&'a self) -> &'a Graph { &self.cflow_graph } /// Returns a reference to the basic block `idx`. pub fn basic_block<'a>(&'a self, idx: BasicBlockIndex) -> &'a BasicBlock { &self.basic_blocks[idx.index()] } /// Returns a reference to the mnemonic `idx`. pub fn mnemonic<'a>(&'a self, idx: MnemonicIndex) -> &'a Mnemonic { &self.mnemonics[idx.index()] } /// The functions uuid. pub fn uuid<'a>(&'a self) -> &'a UUID { &self.uuid } /// The functions region. pub fn region<'a>(&'a self) -> &'a UUID { &self.region } /// Iterator over all IL statements in `rgn`. pub fn statements<'a, Idx: IntoStatementRange + Sized>(&'a self, rgn: Idx) -> StatementsIter<'a> { let rgn = rgn.into_statement_range(self); debug!("read statements {:?}",rgn); self.statements.iter_range(rgn) } /// Lowest address occupied by a basic block. Not neccecarly the entry point. pub fn first_address(&self) -> u64 { self.basic_blocks[0].area().start } /// First address of the functions entry basic block. pub fn entry_address(&self) -> u64 { let e = self.entry_point().index(); self.basic_blocks[e].area().start } /// Iterator over all indirect, unresolved jumps. pub fn indirect_jumps<'a>(&'a self) -> IndirectJumps<'a> { IndirectJumps{ graph: &self.cflow_graph, iterator: self.cflow_graph.node_indices() } } /// Mutable reference to the control flow graph. pub fn cflow_graph_mut<'a>(&'a mut self) -> &'a mut Graph { &mut self.cflow_graph } /// Calls `func` on each indirect jump and call instruction and rewrites it to jump/call the /// returned concrete addresses. pub fn resolve_indirect_jumps SmallVec<[Constant; 1]>>(&mut self, mut func: F) -> bool { use petgraph::Direction; use petgraph::visit::EdgeRef; let mut retval = false; 'outer: loop { let nodes = self.cflow_graph.node_indices().collect::>(); for n in nodes { let vals = match self.cflow_graph.node_weight(n) { Some(&CfgNode::Value(Value::Variable(ref var))) => { func(var) } _ => SmallVec::default(), }; if !vals.is_empty() { let nodes = vals.into_iter().map(|c| { self.cflow_graph.add_node(CfgNode::Value(Value::Constant(c))) }).collect::>(); let edges = self.cflow_graph.edges_directed(n, Direction::Incoming) .map(|e| (e.source(), e.weight().clone())).collect::>(); for (w, g) in edges { for &m in nodes.iter() { self.cflow_graph.add_edge(w, m, g.clone()); } } self.cflow_graph.remove_node(n); retval = true; continue 'outer; } } self.reindex_basic_blocks().unwrap(); return retval; } } /// Iterates thru all statements in `basic_block` calling `func` one each. The function is /// allowed to modify the IL. pub fn rewrite_basic_block Result + Sized>(&mut self, basic_block: BasicBlockIndex, mut func: F) -> Result<()> { debug!("start func rewrite of {:?}",basic_block); let mut bb_offset = 0; let mut stmt_offset = 0isize; { let bb = &self.basic_blocks[basic_block.index()]; for mne_idx in bb.mnemonics.start.index()..bb.mnemonics.end.index() { let mne = &mut self.mnemonics[mne_idx]; debug!("mne {} at {:#x}",self.strings.value(mne.opcode)?,mne.area.start); if mne.statement_count == 0 { debug!("skip this mnemonic (empty)"); continue; } let stmt_idx = (bb.statements.start as isize + stmt_offset) as usize; debug!("from {:?}",stmt_idx..(stmt_idx + mne.statement_count)); let new_rgn = self.statements.rewrite(stmt_idx..(stmt_idx + mne.statement_count),&mut self.names,&mut self.strings,&mut self.segments,&mut func)?; let new_stmt_num = new_rgn.end - new_rgn.start; let offset = new_stmt_num as isize - mne.statement_count as isize; debug!("...to {:?}",new_rgn); bb_offset += offset; mne.statement_count = new_stmt_num; stmt_offset += new_stmt_num as isize; } } if bb_offset != 0 { for bb_idx in (basic_block.index() + 1)..self.basic_blocks.len() { let bb = &mut self.basic_blocks[bb_idx]; bb.statements.start = (bb.statements.start as isize + bb_offset) as usize; bb.statements.end = (bb.statements.end as isize + bb_offset) as usize; } let bb_stmt = &mut self.basic_blocks[basic_block.index()].statements; bb_stmt.end = (bb_stmt.end as isize + bb_offset) as usize; } Ok(()) } /// Inserts a new mnemonic at position `pos` inside `basic_block` with `opcode` and semantics /// described by `stmts`. pub fn insert_mnemonic(&mut self, basic_block: BasicBlockIndex, pos: usize, opcode: StrRef, args: SmallVec<[Value; 3]>, stmts: Vec) -> Result<()> { debug!("insert mne {} in {:?} as {}",self.strings.value(opcode)?,basic_block,pos); let mne_idx = self.basic_blocks[basic_block.index()].mnemonics.start.index() + pos; let stmt_rgn = { let bb = &self.basic_blocks[basic_block.index()]; if bb.mnemonics.end == bb.mnemonics.start { return Err("Internal error: empty basic block".into()); } if mne_idx > bb.mnemonics.end.index() { return Err(format!("Internal error: mnemonic position out of range: {} > {:?}",mne_idx,bb.mnemonics).into()); } let stmts_pos = bb.statements.start + self.mnemonics[bb.mnemonics.start.index()..mne_idx].iter().map(|x| x.statement_count).sum::(); debug!("prepend mne at {} in {:?} {}: {:?}",stmts_pos,basic_block,self.strings.value(opcode)?,stmts); self.statements.insert(stmts_pos,stmts)? }; self.shift_areas(basic_block, pos, 1); self.shift_mnemonics(MnemonicIndex::new(mne_idx),1); self.shift_statements(BasicBlockIndex::new(basic_block.index() + 1),(stmt_rgn.end - stmt_rgn.start) as isize); let bb = &mut self.basic_blocks[basic_block.index()]; let addr = if pos == 0 { (bb.area.start, 0) } else { let a = &self.mnemonics[mne_idx - 1].area; (a.end, a.offset_end) }; let mne = Mnemonic{ opcode: opcode, area: Area{ start: addr.0, end: addr.0, offset_start: addr.1, offset_end: addr.1 + 1, }, operands: args, statement_count: stmt_rgn.end - stmt_rgn.start, }; self.mnemonics.insert(mne_idx,mne); bb.mnemonics.end = MnemonicIndex::new(bb.mnemonics.end.index() + 1); bb.statements.end += stmt_rgn.end - stmt_rgn.start; Ok(()) } /// Removes `mnemonic`. pub fn remove_mnemonic(&mut self, mnemonic: MnemonicIndex) -> Result<()> { let bb: Result = self.basic_blocks.iter().position(|bb| { bb.mnemonics.start <= mnemonic && bb.mnemonics.end > mnemonic }).ok_or("no such mnemonic".into()); let bb = bb?; let move_by = { let bb = &self.basic_blocks[bb]; let stmt_cnt = self.mnemonics[mnemonic.index()].statement_count; let stmt_off: usize = self.mnemonics[bb.mnemonics.start.index()..mnemonic.index()].iter().map(|mne| mne.statement_count).sum(); let stmt_rgn = (bb.statements.start + stmt_off)..(bb.statements.start + stmt_off + stmt_cnt); self.statements.remove(stmt_rgn.clone()); self.mnemonics.remove(mnemonic.index()); stmt_rgn.end - stmt_rgn.start }; self.shift_mnemonics(mnemonic,-1); self.shift_statements(BasicBlockIndex::new(bb + 1),-1 * move_by as isize); let bb = &mut self.basic_blocks[bb]; bb.mnemonics.end = MnemonicIndex::new(bb.mnemonics.end.index() - 1); bb.statements.end -= move_by; Ok(()) } /// Removes the first mnemonic of `basic_block`. Fails of `basic_block` has no mnemonics. pub fn drop_first_mnemonic(&mut self, basic_block: BasicBlockIndex) -> Result<()> { let mne = self.basic_blocks[basic_block.index()].mnemonics.start; self.remove_mnemonic(mne) } /// Deletes all basic blocks for which `f` returns `false`. pub fn retain_basic_blocks bool)>(&mut self, mut f: F) -> Result<()> { use vec_map::VecMap; // map from original block indices to start addresses let addr_to_old_bb = HashMap::<(u64, usize), BasicBlockIndex>::from_iter( self.basic_blocks().map(|x| { let a = x.1.area.clone(); ((a.start, a.offset_start), x.0) })); // blocks to delete let mut to_del = self.basic_blocks() .filter(|x| self.entry_point != x.0) .filter(|x| !f(&*self, x.0)).map(|x| x.0.index()).collect::>(); to_del.sort(); // delete mnemonics for &bb in to_del.iter() { loop { let mnes = self.basic_blocks[bb].mnemonics.clone(); if mnes.start == mnes.end { break; } else { self.drop_first_mnemonic(BasicBlockIndex::new(bb))?; } } } // delete basic blocks for bb in to_del.into_iter().rev() { self.basic_blocks.remove(bb); } let old_bb_to_new_bb = VecMap::::from_iter( self.basic_blocks().map(|x| { let a = x.1.area.clone(); (addr_to_old_bb[&(a.start, a.offset_start)].index(), x.0) })); // delete cfg nodes self.cflow_graph.retain_nodes(|mut g,vx| { match &mut g[vx] { &mut CfgNode::BasicBlock(ref mut bb) => { match old_bb_to_new_bb.get(bb.index()) { Some(new_bb) => { *bb = *new_bb; true } None => false, } } &mut CfgNode::Value(_) => true } }); for vx in self.cflow_graph.node_indices() { match self.cflow_graph.node_weight(vx) { Some(&CfgNode::BasicBlock(bb)) => { self.basic_blocks[bb.index()].node = vx; } _ => {} } } // update entry point self.entry_point = old_bb_to_new_bb[self.entry_point.index()]; Ok(()) } /// Create a dummy function with no basic blocks and an invalid entry point. #[cfg(test)] pub fn facade() -> Function { use Name; Function{ name: "(none)".into(), names: Names::facade(&Name::new("name-facade".into(), 0)), strings: Strings::facade(&"string-facade".into()), segments: Segments::facade(&Name::new("segment-facade".into(), 0)), uuid: UUID::now(), region: UUID::now(), statements: Statements::default(), basic_blocks: Vec::new(), mnemonics: Vec::new(), cflow_graph: Graph::new(), entry_point: BasicBlockIndex::new(0), } } /// Assembles a new function from a list of mnemonics and control flow edges. Used for /// deserializing functions. pub fn assemble(name: Str, names: Names, segments: Segments, region: UUID, strings: Strings, uuid: UUID, start: u64, mnemonics: Vec<(Mnemonic,Vec)>, by_source: HashMap<(u64, usize), Vec<(CfgTarget, Guard)>>, by_destination: HashMap<(u64, usize), Vec<(CfgTarget, Guard)>>) -> Result { let mut func = Function{ name: name, names: names, strings: strings, segments: segments, uuid: uuid, region: region, statements: Statements::default(), basic_blocks: Vec::new(), mnemonics: Vec::new(), cflow_graph: Graph::new(), entry_point: BasicBlockIndex::new(0), }; assemble_function(&mut func, start, mnemonics, by_source, by_destination)?; Ok(func) } fn shift_mnemonics(&mut self, start: MnemonicIndex, change: isize) { if start.index() >= self.mnemonics.len() { return; } for bb in self.basic_blocks.iter_mut() { if bb.mnemonics.start.index() > start.index() { bb.mnemonics.start = MnemonicIndex::new((bb.mnemonics.start.index() as isize + change) as usize); bb.mnemonics.end = MnemonicIndex::new((bb.mnemonics.end.index() as isize + change) as usize); } } } fn shift_statements(&mut self, start: BasicBlockIndex, change: isize) { if start.index() >= self.basic_blocks.len() { return; } let start_index = self.basic_blocks[start.index()].statements.start; for bb_idx in start.index()..self.basic_blocks.len() { let bb = &mut self.basic_blocks[bb_idx]; let rgn = bb.statements.clone(); let after_modification = rgn.start >= start_index; let no_underflow = change >= 0 || rgn.start as isize >= -change; if after_modification && no_underflow { bb.statements.start = (bb.statements.start as isize + change) as usize; bb.statements.end = (bb.statements.end as isize + change) as usize; } } } fn shift_areas(&mut self, mut bb: BasicBlockIndex, mnemonic: usize, change: usize) { let mut idx = self.basic_blocks[bb.index()].mnemonics.start.index() + mnemonic; let start_bb = bb; let start = if self.mnemonics.len() > idx { self.mnemonics[idx].area.start } else { self.mnemonics.last().unwrap().area.end }; // adjust mnemonics 'outer: loop { let idx_end = self.basic_blocks[bb.index()].mnemonics.end.index(); for i in idx..idx_end { let mne = &mut self.mnemonics[i]; if mne.area.start != start { break 'outer; } else { mne.area.offset_start += change; if mne.area.start == mne.area.end { mne.area.offset_end += change; } } } if self.basic_blocks[bb.index()].area.end == start { let area_end = self.basic_blocks[bb.index()].area.end; let area_offset_end = self.basic_blocks[bb.index()].area.offset_end; for (next_idx, next_bb) in self.basic_blocks.iter().enumerate() { if next_bb.area.start == area_end && next_bb.area.offset_start == area_offset_end { bb = BasicBlockIndex::new(next_idx); idx = self.basic_blocks[bb.index()].mnemonics.start.index(); continue 'outer; } } } break; } // adjust basic blocks for (idx, bb) in self.basic_blocks.iter_mut().enumerate() { if !(idx == start_bb.index() && mnemonic == 0) { bb.area.start = self.mnemonics[bb.mnemonics.start.index()].area.start; bb.area.offset_start = self.mnemonics[bb.mnemonics.start.index()].area.offset_start; } if bb.mnemonics.start.index() < bb.mnemonics.end.index() { bb.area.end = self.mnemonics[bb.mnemonics.end.index() - 1].area.end; bb.area.offset_end = self.mnemonics[bb.mnemonics.end.index() - 1].area.offset_end; } } } fn reindex_basic_blocks(&mut self) -> Result<()> { for n in self.cflow_graph.node_indices() { match self.cflow_graph.node_weight(n).unwrap() { &CfgNode::BasicBlock(bb_idx) => { let bb: Result<_> = self.basic_blocks.get_mut(bb_idx.index()).ok_or( "Internal error: re-indexing of basic blocks failed, missing block".into()); let bb = bb?; bb.node = n; } &CfgNode::Value(_) => { /* skip */ } } } Ok(()) } /// Compresses the internal representation of the function. Call this if you're running low on /// memory. pub fn pack(&mut self) -> Result<()> { self.statements.pack(&mut self.basic_blocks,&mut self.mnemonics) } /// Uncompresses the function into a faster to process representation. pub fn unpack(&mut self) -> Result<()> { self.statements.unpack(&mut self.basic_blocks,&mut self.mnemonics) } } fn disassemble(init: A::Configuration, starts: Vec, region: &Region, names: &mut Names, strings: &mut Strings, segments: &mut Segments, mnemonics: &mut Vec<(Mnemonic,Vec)>, by_source: &mut HashMap<(u64, usize),Vec<(CfgTarget, Guard)>>, by_destination: &mut HashMap<(u64, usize),Vec<(CfgTarget, Guard)>>) -> Result<()> { let mut todo = HashSet::::from_iter(starts.into_iter()); while let Some(addr) = todo.iter().next().cloned() { assert!(todo.remove(&addr)); match mnemonics.binary_search_by_key(&addr,|&(ref x,_)| x.area.start) { // Already disassembled here Ok(pos) => { let mne = &mnemonics[pos].0; if mne.area.start != addr { error!("{:#x}: Jump inside mnemonic {} at {:#x}",addr,strings.value(mne.opcode)?,mne.area.start); } } // New mnemonic Err(pos) => { let mut matches = Vec::with_capacity(2); match A::decode(region,addr,&init,names,segments,strings,&mut matches) { Ok(()) => { // Matched mnemonics if matches.is_empty() { error!("{:#x}: Unrecognized instruction",addr); } else { for m in matches { trace!("{:x}: {}",m.area.start,m.opcode); let this_mne = Mnemonic{ area: m.area.into(), opcode: strings.insert(&m.opcode), operands: m.operands, statement_count: 0, }; mnemonics.insert(pos,(this_mne,m.instructions)); // New control transfers for (origin, tgt, gu) in m.jumps { trace!("jump to {:?}", tgt); match tgt { Value::Constant(Constant{ value,.. }) => { by_source.entry((origin, 0)) .or_insert(Vec::new()) .push((CfgTarget::Resolved(value, 0), gu.clone())); by_destination.entry((value, 0)) .or_insert(Vec::new()) .push((CfgTarget::Resolved(origin, 0), gu)); todo.insert(value); } Value::Variable(Variable{ name, bits }) => { by_source.entry((origin, 0)) .or_insert(Vec::new()) .push((CfgTarget::Value(Value::var(name,bits)?), gu)); } Value::Undefined => { by_source.entry((origin, 0)) .or_insert(Vec::new()) .push((CfgTarget::Value(Value::undef()), gu)); } } } } } } Err(e) => { error!("{:#x} Failed to disassemble: {}",addr, e); } } } } } Ok(()) } fn assemble_function(function: &mut Function, entry: u64, mut mnemonics: Vec<(Mnemonic,Vec)>, by_source: HashMap<(u64, usize),Vec<(CfgTarget,Guard)>>, by_destination: HashMap<(u64, usize),Vec<(CfgTarget,Guard)>>) -> Result<()> { let mut basic_blocks = Vec::::new(); let mut idx = 0; // Partition mnemonics into basic blocks while idx < mnemonics.len() { if mnemonics.len() - idx > 1 { let next_bb = mnemonics .as_slice()[idx..].windows(2) .position(|x| is_basic_block_boundary(&x[0].0,&x[1].0,entry,&by_source,&by_destination)) .map(|x| x + 1 + idx) .unwrap_or(mnemonics.len()); let bb = BasicBlock{ uuid: UUID::now(), mnemonics: MnemonicIndex::new(idx)..MnemonicIndex::new(next_bb), area: Area{ start: mnemonics[idx].0.area.start, offset_start: mnemonics[idx].0.area.offset_start, end: mnemonics[next_bb - 1].0.area.end, offset_end: mnemonics[next_bb - 1].0.area.offset_end, }, node: NodeIndex::new(0), statements: 0..0, }; basic_blocks.push(bb); idx = next_bb; } else { let bb = BasicBlock{ uuid: UUID::now(), mnemonics: MnemonicIndex::new(idx)..MnemonicIndex::new(mnemonics.len()), area: mnemonics[idx].0.area.clone(), node: NodeIndex::new(0), statements: 0..0, }; basic_blocks.push(bb); break; } } // Build control flow graph let mut cfg = Graph::::with_capacity(basic_blocks.len(),3*basic_blocks.len() / 2); for (i,bb) in basic_blocks.iter_mut().enumerate() { bb.node = cfg.add_node(CfgNode::BasicBlock(BasicBlockIndex::new(i))); } for bb in basic_blocks.iter() { let last_mne = &mnemonics[bb.mnemonics.end.index() - 1].0; if let Some(ct) = by_source.get(&(last_mne.area.start, last_mne.area.offset_start)) { for &(ref val,ref guard) in ct.iter() { match val { &CfgTarget::Value(Value::Constant(_)) | &CfgTarget::Resolved(_, _) => { let (real, virt) = match val { &CfgTarget::Value(Value::Constant(Constant{ value,.. })) => (value, 0), &CfgTarget::Resolved(r, v) => (r, v), _ => unreachable!(), }; if let Ok(pos) = basic_blocks.binary_search_by_key(&(real, virt),|bb| (bb.area.start, bb.area.offset_start)) { cfg.update_edge(bb.node,basic_blocks[pos].node,guard.clone()); } else if virt == 0 { let n = cfg.add_node(CfgNode::Value(Value::val(real, 64)?)); cfg.update_edge(bb.node,n,guard.clone()); } else { error!("Internal error: drop jump to {}.{}", real, virt); } } &CfgTarget::Value(ref val) => { let n = cfg.add_node(CfgNode::Value(val.clone())); cfg.update_edge(bb.node,n,guard.clone()); } } } } } let entry_idx = basic_blocks .iter().position(|x| x.area.start == entry) .ok_or("Internal error: no basic block at the entry point")?; // Generate bitcode let mut postorder = DfsPostOrder::new(&cfg,basic_blocks[entry_idx].node).iter(&cfg).collect::>(); let mut bitcode = Statements::default(); let mut postorder_rev = vec![0; basic_blocks.len()]; let mut po_idx = 0; postorder.reverse(); for &n in postorder.iter() { if let Some(&CfgNode::BasicBlock(bb_idx)) = cfg.node_weight(n) { let bb = &mut basic_blocks[bb_idx.index()]; let sl = bb.mnemonics.start.index()..bb.mnemonics.end.index(); bb.statements = usize::MAX..usize::MIN; for &mut (ref mut mne,ref mut instr) in mnemonics.as_mut_slice()[sl].iter_mut() { let rgn = bitcode.append(instr.drain(..))?; mne.statement_count = rgn.end - rgn.start; bb.statements.start = cmp::min(bb.statements.start,rgn.start); bb.statements.end = cmp::max(bb.statements.end,rgn.end); } postorder_rev[bb_idx.index()] = po_idx; po_idx += 1; } } basic_blocks.sort_by_key(|x| { if let Some(&CfgNode::BasicBlock(bb_idx)) = cfg.node_weight(x.node) { postorder_rev[bb_idx.index()] } else { unreachable!() } }); for (idx,bb) in basic_blocks.iter().enumerate() { *cfg.node_weight_mut(bb.node).unwrap() = CfgNode::BasicBlock(BasicBlockIndex::new(idx)); } function.statements = bitcode; function.basic_blocks = basic_blocks; function.mnemonics = mnemonics.into_iter().map(|(mne,_)| mne).collect(); function.cflow_graph = cfg; function.entry_point = BasicBlockIndex::new(0); Ok(()) } fn is_basic_block_boundary(a: &Mnemonic, b: &Mnemonic, entry: u64, by_source: &HashMap<(u64, usize),Vec<(CfgTarget,Guard)>>, by_destination: &HashMap<(u64, usize), Vec<(CfgTarget,Guard)>>) -> bool { // if next mnemonics aren't adjacent let mut new_bb = a.area.end != b.area.start || a.area.offset_end != b.area.offset_start; // or any following jumps aren't to adjacent mnemonics new_bb |= by_source .get(&(a.area.start, a.area.offset_start)) .unwrap_or(&Vec::new()) .iter() .any(|&(ref opt_dest, _)| { match opt_dest { &CfgTarget::Resolved(r, v) => b.area.start != r || b.area.offset_start != v, &CfgTarget::Value(Value::Constant(Constant{ value, .. })) => { value != b.area.start || b.area.offset_start != 0 } _ => false, } }); // or any jumps pointing to the next that aren't from here new_bb |= by_destination .get(&(b.area.start, b.area.offset_start)) .unwrap_or(&Vec::new()) .iter() .any(|&(ref opt_src, _)| { match opt_src { &CfgTarget::Resolved(r, v) => a.area.start != r || a.area.offset_start != v, &CfgTarget::Value(Value::Constant(Constant{ value, .. })) => { value != a.area.start || a.area.offset_start != 0 } _ => false, } }); // or the entry point does not point here new_bb |= b.area.start == entry && b.area.offset_start == 0; new_bb } #[cfg(test)] mod tests { use super::*; use {Region, TestArch, Name, Operation}; use simple_logger; #[test] fn single_instruction() { let _ = simple_logger::init(); let reg = Region::from_buf("", 16, b"Ma0R".to_vec(), 0, None); let func = Function::new::((), 0, ®, UUID::now()).unwrap(); assert_eq!(func.cflow_graph().node_count(), 1); assert_eq!(func.cflow_graph().edge_count(), 0); let node = func.cflow_graph().node_indices().next().unwrap(); assert!(if let Some(&CfgNode::BasicBlock(_)) = func.cflow_graph().node_weight(node) { true } else { false }); assert_eq!(func.entry_address(), 0); assert_eq!(func.basic_blocks().len(), 1); let (bb_idx,bb) = func.basic_blocks().next().unwrap(); assert_eq!(bb.area(), &(0..4).into()); assert_eq!(func.mnemonics(bb_idx).len(), 2); let (_,mne) = func.mnemonics(bb_idx).next().unwrap(); assert_eq!(func.strings.value(mne.opcode).unwrap(), "mov"); let (_,mne) = func.mnemonics(bb_idx).skip(1).next().unwrap(); assert_eq!(func.strings.value(mne.opcode).unwrap(), "ret"); } #[test] fn branch() { let _ = simple_logger::init(); let reg = Region::from_buf("", 16, b"Bf4RR".to_vec(), 0, None); let func = Function::new::((), 0, ®, UUID::now()).unwrap(); assert_eq!(func.cflow_graph.node_count(), 3); assert_eq!(func.cflow_graph.edge_count(), 2); let mut bb0_vx = None; let mut bb1_vx = None; let mut bb2_vx = None; for n in func.cflow_graph.node_indices() { match func.cflow_graph().node_weight(n) { Some(&CfgNode::BasicBlock(bb_idx)) => { let bb = func.basic_block(bb_idx); let mnes = func.mnemonics(bb_idx).collect::>(); assert_eq!(bb.node, n); if bb.area.start == 0 { assert_eq!(mnes.len(), 1); assert_eq!(func.strings.value(mnes[0].1.opcode).unwrap(), "br"); assert_eq!(mnes[0].1.area, (0..3).into()); assert_eq!(bb.area, (0..3).into()); bb0_vx = Some(n); } else if bb.area.start == 3 { assert_eq!(mnes.len(), 1); assert_eq!(func.strings.value(mnes[0].1.opcode).unwrap(), "ret"); assert_eq!(mnes[0].1.area, (3..4).into()); assert_eq!(bb.area, (3..4).into()); bb1_vx = Some(n); } else if bb.area.start == 4 { assert_eq!(mnes.len(), 1); assert_eq!(func.strings.value(mnes[0].1.opcode).unwrap(), "ret"); assert_eq!(mnes[0].1.area, (4..5).into()); assert_eq!(bb.area, (4..5).into()); bb2_vx = Some(n); } else { unreachable!(); } } _ => { unreachable!(); } } } assert!(bb0_vx.is_some() && bb1_vx.is_some() && bb2_vx.is_some()); debug!("{:?}, {:?}, {:?}",bb0_vx, bb1_vx, bb2_vx); let entry_bb = func.entry_point(); assert_eq!(func.basic_block(entry_bb).node, bb0_vx.unwrap()); assert!(func.cflow_graph().find_edge(bb0_vx.unwrap(), bb1_vx.unwrap()).is_some()); assert!(func.cflow_graph().find_edge(bb0_vx.unwrap(), bb2_vx.unwrap()).is_some()); } #[test] fn single_loop() { let _ = simple_logger::init(); let reg = Region::from_buf("", 16, b"AaaaAxxxJ0".to_vec(), 0, None); let func = Function::new::((), 0, ®, UUID::now()).unwrap(); assert_eq!(func.cflow_graph.node_count(), 1); assert_eq!(func.cflow_graph.edge_count(), 1); let vx = func.cflow_graph.node_indices().next().unwrap(); if let Some(&CfgNode::BasicBlock(bb_idx)) = func.cflow_graph().node_weight(vx) { let bb = func.basic_block(bb_idx); let mnes = func.mnemonics(bb_idx).collect::>(); if bb.area.start == 0 { assert_eq!(mnes.len(), 3); assert_eq!(func.strings.value(mnes[0].1.opcode).unwrap(), "add"); assert_eq!(mnes[0].1.area, (0..4).into()); assert_eq!(func.strings.value(mnes[1].1.opcode).unwrap(), "add"); assert_eq!(mnes[1].1.area, (4..8).into()); assert_eq!(func.strings.value(mnes[2].1.opcode).unwrap(), "jmp"); assert_eq!(mnes[2].1.area, (8..10).into()); assert_eq!(bb.area, (0..10).into()); } else { unreachable!(); } } let entry_idx = func.entry_point(); assert_eq!(func.basic_block(entry_idx).node, vx); assert!(func.cflow_graph.find_edge(vx, vx).is_some()); } #[test] fn empty_function() { let reg = Region::from_buf("", 16, vec![], 0, None); assert!(Function::new::((), 0, ®, UUID::now()).is_err()); } #[test] fn resolve_indirect() { let _ = simple_logger::init(); let reg = Region::from_buf("", 16, b"AaaaJxAxxxR".to_vec(), 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let a = func.names.index(&Name::new("x".into(),None)).unwrap(); assert_eq!(func.cflow_graph().node_count(), 2); assert_eq!(func.cflow_graph().edge_count(), 1); for n in func.cflow_graph().node_indices() { match func.cflow_graph.node_weight(n) { Some(&CfgNode::BasicBlock(bb)) => { assert_eq!(func.basic_block(bb).area, (0..6).into()); } Some(&CfgNode::Value(Value::Variable(Variable{ ref name, bits: 32 }))) if *name == a => {} a => unreachable!("got: {:?}",a) } } let unres = func.indirect_jumps().collect::>(); assert_eq!(unres.len(), 1); assert_eq!(unres[0], Variable{ name: a, bits: 32 }); assert!(func.resolve_indirect_jumps(|x| { if *x == Variable::new(a, 32).unwrap() { vec![Constant::new(6,32).unwrap()].into() } else { Default::default() } })); assert!(func.extend::((), ®).is_ok()); assert_eq!(func.cflow_graph().node_count(), 1); assert_eq!(func.cflow_graph().edge_count(), 0); for n in func.cflow_graph().node_indices() { match func.cflow_graph.node_weight(n) { Some(&CfgNode::BasicBlock(bb)) => { assert_eq!(func.basic_block(bb).area, (0..11).into()); } _ => unreachable!() } } } #[test] fn entry_split() { use petgraph::dot::Dot; let _ = simple_logger::init(); let reg = Region::from_buf("", 16, b"AaaaAaaaJx".to_vec(), 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let unres = func.indirect_jumps().collect::>(); let a = func.names.index(&Name::new("x".into(),None)).unwrap(); assert_eq!(unres.len(), 1); assert_eq!(unres[0], Variable{ name: a, bits: 32 }); assert!(func.resolve_indirect_jumps(|x| { if *x == Variable::new(a, 32).unwrap() { vec![Constant::new(4,32).unwrap()].into() } else { Default::default() } })); assert!(func.extend::((), ®).is_ok()); println!("{:?}", Dot::new(func.cflow_graph())); assert_eq!(func.cflow_graph().node_count(), 2); assert_eq!(func.cflow_graph().edge_count(), 2); let mut bb0_vx = None; let mut bb1_vx = None; for n in func.cflow_graph().node_indices() { match func.cflow_graph.node_weight(n) { Some(&CfgNode::BasicBlock(bb)) => { if func.basic_block(bb).area == (4..10).into() { bb1_vx = Some(n); } else if func.basic_block(bb).area == (0..4).into() { bb0_vx = Some(n); } else { unreachable!(); } } _ => unreachable!() } } assert!(bb0_vx.is_some() && bb1_vx.is_some()); let entry_idx = func.entry_point(); assert_eq!(func.basic_block(entry_idx).node, bb0_vx.unwrap()); } #[test] fn resolve_indirect_to_multiple() { let _ = simple_logger::init(); let reg = Region::from_buf("", 16, b"AaaaJxAxxxRRRR".to_vec(), 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let a = func.names.index(&Name::new("x".into(),None)).unwrap(); assert_eq!(func.cflow_graph().node_count(), 2); assert_eq!(func.cflow_graph().edge_count(), 1); for n in func.cflow_graph().node_indices() { match func.cflow_graph.node_weight(n) { Some(&CfgNode::BasicBlock(bb)) => { assert_eq!(func.basic_block(bb).area, (0..6).into()); } Some(&CfgNode::Value(Value::Variable(Variable{ ref name, bits: 32 }))) if *name == a => {} a => unreachable!("got: {:?}",a) } } let unres = func.indirect_jumps().collect::>(); assert_eq!(unres.len(), 1); assert_eq!(unres[0], Variable{ name: a, bits: 32 }); assert!(func.resolve_indirect_jumps(|x| { if *x == Variable::new(a, 32).unwrap() { vec![ Constant::new(6,32).unwrap(), Constant::new(11,32).unwrap(), Constant::new(12,32).unwrap(), Constant::new(13,32).unwrap(), ].into() } else { Default::default() } })); assert!(func.extend::((), ®).is_ok()); assert_eq!(func.cflow_graph().node_count(), 5); assert_eq!(func.cflow_graph().edge_count(), 4); for n in func.cflow_graph().node_indices() { match func.cflow_graph.node_weight(n) { Some(&CfgNode::BasicBlock(bb)) => { assert!( func.basic_block(bb).area == (0..6).into() || func.basic_block(bb).area == (11..12).into() || func.basic_block(bb).area == (12..13).into() || func.basic_block(bb).area == (13..14).into() || func.basic_block(bb).area == (6..11).into()); } _ => unreachable!() } } } #[test] fn issue_51_treat_entry_point_as_incoming_edge() { let _ = simple_logger::init(); let reg = Region::from_buf("", 16, b"AaaaAxxxJ0".to_vec(), 0, None); let func = Function::new::((), 4, ®, UUID::now()).unwrap(); assert_eq!(func.cflow_graph.node_count(), 2); assert_eq!(func.cflow_graph.edge_count(), 2); let mut bb0_vx = None; let mut bb1_vx = None; for vx in func.cflow_graph.node_indices() { if let Some(&CfgNode::BasicBlock(bb_idx)) = func.cflow_graph().node_weight(vx) { let bb = func.basic_block(bb_idx); let mnes = func.mnemonics(bb_idx).collect::>(); if bb.area.start == 0 { assert_eq!(mnes.len(), 1); assert_eq!(bb.area, (0..4).into()); bb0_vx = Some(vx); } else if bb.area.start == 4 { assert_eq!(mnes.len(), 2); assert_eq!(bb.area, (4..10).into()); bb1_vx = Some(vx); } else { unreachable!(); } } else { unreachable!(); } } assert!(bb0_vx.is_some() && bb1_vx.is_some()); let entry_idx = func.entry_point(); assert_eq!(func.basic_block(entry_idx).node, bb1_vx.unwrap()); assert!(func.cflow_graph.find_edge(bb0_vx.unwrap(), bb1_vx.unwrap()).is_some()); assert!(func.cflow_graph.find_edge(bb1_vx.unwrap(), bb0_vx.unwrap()).is_some()); } #[test] fn iter_range() { let data = b"AabcAabcAabcAabcAabcAabcAabcAabcR".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let func = Function::new::((), 0, ®, UUID::now()).unwrap(); let bb_idx = func.basic_blocks().map(|x| x.0).collect::>(); assert_eq!(bb_idx.len(), 1); let stmts = func.statements(bb_idx[0]).collect::>(); assert_eq!(stmts.len(), 9); let bb = func.basic_blocks().map(|x| x.1).collect::>(); assert_eq!(bb.len(), 1); let stmts = func.statements(bb[0]).collect::>(); assert_eq!(stmts.len(), 9); let stmts = func.statements(..).collect::>(); assert_eq!(stmts.len(), 9); let mne_idx = func.mnemonics(..).map(|x| x.0).collect::>(); assert_eq!(mne_idx.len(), 9); } /* * (B0) * 0: Mi1 ; mov i 1 * 3: Cfi0 ; cmp f i 0 * 7: Bf18 ; br f (B2) * * (B1) * 11: Aii3 ; add i i 3 * 15: J22 ; jmp (B3) * * (B2) * 18: Aii3 ; add i i 3 * * (B3) * 22: Ms3 ; mov s 3 * 25: R ; ret */ #[test] fn iter_basic_blocks() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let func = Function::new::((), 0, ®, UUID::now()).unwrap(); let mut fwd = func.basic_blocks().map(|(idx,_)| idx).collect::>(); let mut bwd = func.basic_blocks().rev().map(|(idx,_)| idx).collect::>(); assert_eq!(fwd.len(), 4); assert_eq!(fwd[0], BasicBlockIndex::new(0)); assert!(fwd[1] == BasicBlockIndex::new(1) || fwd[1] == BasicBlockIndex::new(2)); assert!(fwd[2] == BasicBlockIndex::new(1) || fwd[2] == BasicBlockIndex::new(2)); assert!(fwd[1] != fwd[2]); assert_eq!(fwd[3], BasicBlockIndex::new(3)); fwd.sort(); fwd.dedup(); assert_eq!(fwd.len(), 4); assert_eq!(bwd.len(), 4); assert_eq!(bwd[0], BasicBlockIndex::new(3)); assert!(bwd[1] == BasicBlockIndex::new(1) || bwd[1] == BasicBlockIndex::new(2)); assert!(bwd[2] == BasicBlockIndex::new(1) || bwd[2] == BasicBlockIndex::new(2)); assert!(bwd[1] != bwd[2]); assert_eq!(bwd[3], BasicBlockIndex::new(0)); bwd.sort(); bwd.dedup(); assert_eq!(bwd.len(), 4); } /* * (B0) * 0: Mi1 ; mov i 1 * 3: Cfi0 ; cmp f i 0 * 7: Bf18 ; br f (B2) * * (B1) * 11: Aii3 ; add i i 3 * 15: J22 ; jmp (B3) * * (B2) * 18: Aii3 ; add i i 3 * * (B3) * 22: Ms3 ; mov s 3 * 25: R ; ret */ #[test] fn rewrite_increase() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let mut b0_idx = None; let mut b1_idx = None; let mut b2_idx = None; let mut b3_idx = None; func.pack().unwrap(); for (idx,bb) in func.basic_blocks() { if bb.area.start == 0 { b0_idx = Some(idx); } else if bb.area.start == 11 { b1_idx = Some(idx); } else if bb.area.start == 18 { b2_idx = Some(idx); } else if bb.area.start == 22 { b3_idx = Some(idx); } else { unreachable!() } } assert!(b0_idx.is_some() && b1_idx.is_some() && b2_idx.is_some() && b3_idx.is_some()); let _ = func.rewrite_basic_block(b2_idx.unwrap(),|stmt,_,_,_| { match stmt { &mut Statement::Expression{ op: Operation::Add(Value::Constant(ref mut a),Value::Constant(ref mut b)),.. } => { *a = Constant::new(0xffffffff,32).unwrap(); *b = Constant::new(0x11111111,32).unwrap(); } _ => {} } Ok(RewriteControl::Continue) }).unwrap(); let b0 = func.statements(b0_idx.unwrap()).collect::>(); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b0[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::LessOrEqualUnsigned(Value::Variable(_),Value::Constant(_)),.. } = &*b0[1] {} else { unreachable!() } assert_eq!(b0.len(), 2); let mne0 = func.mnemonics(b0_idx.unwrap()).collect::>(); assert_eq!(mne0.len(), 3); let b1 = func.statements(b1_idx.unwrap()).collect::>(); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b1[0] {} else { unreachable!() } assert_eq!(b1.len(), 1); let mne1 = func.mnemonics(b1_idx.unwrap()).collect::>(); assert_eq!(mne1.len(), 2); let b2 = func.statements(b2_idx.unwrap()).collect::>(); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b2[0] {} else { unreachable!() } assert_eq!(b2.len(), 1); let mne2 = func.mnemonics(b2_idx.unwrap()).collect::>(); assert_eq!(mne2.len(), 1); let b3 = func.statements(b3_idx.unwrap()).collect::>(); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b3[0] {} else { unreachable!() } assert_eq!(b3.len(), 2); let mne3 = func.mnemonics(b3_idx.unwrap()).collect::>(); assert_eq!(mne3.len(), 2); } /* * (B0) * 0: Mi1 ; mov i 1 * 3: Cfi0 ; cmp f i 0 * 7: Bf18 ; br f (B2) * * (B1) * 11: Aii3 ; add i i 3 * 15: J22 ; jmp (B3) * * (B2) * 18: Aii3 ; add i i 3 * * (B3) * 22: Ms3 ; mov s 3 * 25: R ; ret */ #[test] fn rewrite_rename() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let mut b0_idx = None; let mut b1_idx = None; let mut b2_idx = None; let mut b3_idx = None; for (idx,bb) in func.basic_blocks() { if bb.area.start == 0 { b0_idx = Some(idx); } else if bb.area.start == 11 { b1_idx = Some(idx); } else if bb.area.start == 18 { b2_idx = Some(idx); } else if bb.area.start == 22 { b3_idx = Some(idx); } else { unreachable!() } } assert!(b0_idx.is_some() && b1_idx.is_some() && b2_idx.is_some() && b3_idx.is_some()); fn f(stmt: &mut Statement, names: &mut Names,_: &mut Strings, _: &mut Segments) -> ::Result<::RewriteControl> { match stmt { &mut Statement::Expression{ result: Variable{ ref mut name,.. },.. } => { let new_name = names.value(*name)?.clone(); let new_name = Name::new(new_name.base().to_string().to_uppercase().into(),new_name.subscript()); *name = names.insert(&new_name); } _ => {} } Ok(RewriteControl::Continue) } let _ = func.rewrite_basic_block(b0_idx.unwrap(),&f).unwrap(); let _ = func.rewrite_basic_block(b1_idx.unwrap(),&f).unwrap(); let _ = func.rewrite_basic_block(b2_idx.unwrap(),&f).unwrap(); let _ = func.rewrite_basic_block(b3_idx.unwrap(),&f).unwrap(); let b0 = func.statements(b0_idx.unwrap()).collect::>(); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b0[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::LessOrEqualUnsigned(Value::Variable(_),Value::Constant(_)),.. } = &*b0[1] {} else { unreachable!() } assert_eq!(b0.len(), 2); let b1 = func.statements(b1_idx.unwrap()).collect::>(); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b1[0] {} else { unreachable!() } assert_eq!(b1.len(), 1); let b2 = func.statements(b2_idx.unwrap()).collect::>(); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b2[0] {} else { unreachable!() } assert_eq!(b2.len(), 1); let b3 = func.statements(b3_idx.unwrap()).collect::>(); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b3[0] {} else { unreachable!() } assert_eq!(b3.len(), 2); for stmt in func.statements(..) { match &*stmt { &Statement::Expression{ result: Variable{ ref name,.. },.. } => { assert!(func.names.value(*name).unwrap().base().chars().all(|x| x.is_uppercase())); } _ => {} } } } /* * (B0) * 0: Mi1 ; mov i 1 * 3: Cfi0 ; cmp f i 0 * 7: Bf18 ; br f (B2) * * (B1) * ; test * 11: Aii3 ; add i i 3 * 15: J22 ; jmp (B3) * * (B2) * 18: Ai2i ; add i 2 i * * (B3) * 22: Ms3 ; mov s 3 * 25: R ; ret */ #[test] fn rewrite_prepend_mnemonic_unpacked() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Ai2iMs3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let mut b0_idx = None; let mut b1_idx = None; let mut b2_idx = None; let mut b3_idx = None; for (idx,bb) in func.basic_blocks() { if bb.area.start == 0 { b0_idx = Some(idx); } else if bb.area.start == 11 { b1_idx = Some(idx); } else if bb.area.start == 18 { b2_idx = Some(idx); } else if bb.area.start == 22 { b3_idx = Some(idx); } else { unreachable!() } } assert!(b0_idx.is_some() && b1_idx.is_some() && b2_idx.is_some() && b3_idx.is_some()); let x = func.names.insert(&Name::new("x".into(),None)); let stmts = vec![ Statement::Expression{ op: Operation::And(Value::val(42,32).unwrap(),Value::var(x,32).unwrap()), result: Variable::new(x,32).unwrap() }, Statement::Expression{ op: Operation::Subtract(Value::val(42,32).unwrap(),Value::var(x,32).unwrap()), result: Variable::new(x,32).unwrap() }, ]; debug!("{:?}",func); { debug!("pre-read bb 0"); let b0 = func.basic_block(b0_idx.unwrap()); debug!("b0: {:?}",b0); debug!("pre-read bb 1"); let b1 = func.basic_block(b1_idx.unwrap()); debug!("b1: {:?}",b1); } let test = func.strings.insert(&"test".into()); let _ = func.insert_mnemonic(b1_idx.unwrap(),1,test,SmallVec::default(),stmts).unwrap(); debug!("{:?}",func); debug!("read bb 0"); let b0 = func.statements(b0_idx.unwrap()).collect::>(); assert_eq!(b0.len(), 2); debug!("b0: {:?}",b0); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b0[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::LessOrEqualUnsigned(Value::Variable(_),Value::Constant(_)),.. } = &*b0[1] {} else { unreachable!() } let mne0 = func.mnemonics(b0_idx.unwrap()).collect::>(); assert_eq!(mne0.len(), 3); let b0 = func.basic_block(b0_idx.unwrap()); debug!("b0: {:?}",b0); assert_eq!(func.mnemonic(b0.mnemonics.start).area.start, 0); assert_eq!(func.mnemonic(MnemonicIndex::new(b0.mnemonics.start.index() + 1)).area.start, 3); assert_eq!(func.mnemonic(MnemonicIndex::new(b0.mnemonics.start.index() + 2)).area.start, 7); debug!("read bb 1"); let b1 = func.statements(b1_idx.unwrap()).collect::>(); debug!("b1: {:?}",b1); assert_eq!(b1.len(), 3); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b1[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::And(Value::Constant(_),Value::Variable(_)),.. } = &*b1[1] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::Subtract(Value::Constant(_),Value::Variable(_)),.. } = &*b1[2] {} else { unreachable!() } let mne1 = func.mnemonics(b1_idx.unwrap()).collect::>(); assert_eq!(mne1.len(), 3); let b1 = func.basic_block(b1_idx.unwrap()); debug!("b1: {:?}",b1); assert_eq!(func.mnemonic(b1.mnemonics.start).area.start, 11); assert_eq!(func.mnemonic(MnemonicIndex::new(b1.mnemonics.start.index() + 1)).area.start, 15); assert_eq!(func.mnemonic(MnemonicIndex::new(b1.mnemonics.start.index() + 2)).area.start, 15); debug!("read bb 2"); let b2 = func.statements(b2_idx.unwrap()).collect::>(); assert_eq!(b2.len(), 1); if let &Statement::Expression{ op: Operation::Add(Value::Constant(_),Value::Variable(_)),.. } = &*b2[0] {} else { unreachable!() } let mne2 = func.mnemonics(b2_idx.unwrap()).collect::>(); assert_eq!(mne2.len(), 1); let b2 = func.basic_block(b2_idx.unwrap()); debug!("b2: {:?}",b2); debug!("read bb 3"); let b3 = func.statements(b3_idx.unwrap()).collect::>(); assert_eq!(b3.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b3[0] {} else { unreachable!() } let mne3 = func.mnemonics(b3_idx.unwrap()).collect::>(); assert_eq!(mne3.len(), 2); let b3 = func.basic_block(b3_idx.unwrap()); debug!("b3: {:?}",b3); } #[test] fn rewrite_remove_mnemonic() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let mut b0_idx = None; let mut b1_idx = None; let mut b2_idx = None; let mut b3_idx = None; for (idx,bb) in func.basic_blocks() { if bb.area.start == 0 { b0_idx = Some(idx); } else if bb.area.start == 11 { b1_idx = Some(idx); } else if bb.area.start == 18 { b2_idx = Some(idx); } else if bb.area.start == 22 { b3_idx = Some(idx); } else { unreachable!() } } assert!(b0_idx.is_some() && b1_idx.is_some() && b2_idx.is_some() && b3_idx.is_some()); debug!("{:?}",func); let _ = func.drop_first_mnemonic(b1_idx.unwrap()).unwrap(); debug!("{:?}",func); let b0 = func.statements(b0_idx.unwrap()).collect::>(); assert_eq!(b0.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b0[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::LessOrEqualUnsigned(Value::Variable(_),Value::Constant(_)),.. } = &*b0[1] {} else { unreachable!() } let b1 = func.statements(b1_idx.unwrap()).collect::>(); assert_eq!(b1.len(), 0); let b2 = func.statements(b2_idx.unwrap()).collect::>(); assert_eq!(b2.len(), 1); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b2[0] {} else { unreachable!() } let b3 = func.statements(b3_idx.unwrap()).collect::>(); assert_eq!(b3.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b3[0] {} else { unreachable!() } } #[test] fn remove_mnemonic_mid() { let _ = simple_logger::init(); let data = b"Mi1Mi2Cfi0Bf21Aii3J25Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let mut b0_idx = None; let mut b1_idx = None; let mut b2_idx = None; let mut b3_idx = None; for (idx,bb) in func.basic_blocks() { if bb.area.start == 0 { b0_idx = Some(idx); } else if bb.area.start == 14 { b1_idx = Some(idx); } else if bb.area.start == 21 { b2_idx = Some(idx); } else if bb.area.start == 25 { b3_idx = Some(idx); } else { unreachable!() } } assert!(b0_idx.is_some() && b1_idx.is_some() && b2_idx.is_some() && b3_idx.is_some()); debug!("{:?}",func); let _ = func.remove_mnemonic(MnemonicIndex::new(1)).unwrap(); debug!("{:?}",func); let b0 = func.statements(b0_idx.unwrap()).collect::>(); assert_eq!(func.mnemonics(b0_idx.unwrap()).count(), 3); assert_eq!(b0.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(Constant{ value: 1,.. })),.. } = &*b0[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::LessOrEqualUnsigned(Value::Variable(_),Value::Constant(_)),.. } = &*b0[1] {} else { unreachable!() } let b1 = func.statements(b1_idx.unwrap()).collect::>(); assert_eq!(b1.len(), 1); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b1[0] {} else { unreachable!() } let b2 = func.statements(b2_idx.unwrap()).collect::>(); assert_eq!(b2.len(), 1); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b2[0] {} else { unreachable!() } let b3 = func.statements(b3_idx.unwrap()).collect::>(); assert_eq!(b3.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b3[0] {} else { unreachable!() } } #[test] fn packed_iter_range() { let data = b"AabcAabcAabcAabcAabcAabcAabcAabcR".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); func.pack().unwrap(); let bb_idx = func.basic_blocks().map(|x| x.0).collect::>(); assert_eq!(bb_idx.len(), 1); let stmts = func.statements(bb_idx[0]).collect::>(); assert_eq!(stmts.len(), 9); let bb = func.basic_blocks().map(|x| x.1).collect::>(); assert_eq!(bb.len(), 1); let stmts = func.statements(bb[0]).collect::>(); assert_eq!(stmts.len(), 9); let stmts = func.statements(..).collect::>(); assert_eq!(stmts.len(), 9); let mne_idx = func.mnemonics(..).map(|x| x.0).collect::>(); assert_eq!(mne_idx.len(), 9); } #[test] fn rewrite_prepend_mnemonic_packed() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let mut b0_idx = None; let mut b1_idx = None; let mut b2_idx = None; let mut b3_idx = None; func.pack().unwrap(); for (idx,bb) in func.basic_blocks() { if bb.area.start == 0 { b0_idx = Some(idx); } else if bb.area.start == 11 { b1_idx = Some(idx); } else if bb.area.start == 18 { b2_idx = Some(idx); } else if bb.area.start == 22 { b3_idx = Some(idx); } else { unreachable!() } } assert!(b0_idx.is_some() && b1_idx.is_some() && b2_idx.is_some() && b3_idx.is_some()); let x = func.names.insert(&Name::new("x".into(),None)); let stmts = vec![ Statement::Expression{ op: Operation::And(Value::val(42,32).unwrap(),Value::var(x,32).unwrap()), result: Variable::new(x,32).unwrap() }, Statement::Expression{ op: Operation::Subtract(Value::val(42,32).unwrap(),Value::var(x,32).unwrap()), result: Variable::new(x,32).unwrap() }, ]; debug!("{:?}",func); let test = func.strings.insert(&"test".into()); let _ = func.insert_mnemonic(b1_idx.unwrap(),0,test,SmallVec::default(),stmts).unwrap(); debug!("{:?}",func); debug!("read bb 0"); let b0 = func.statements(b0_idx.unwrap()).collect::>(); assert_eq!(b0.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b0[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::LessOrEqualUnsigned(Value::Variable(_),Value::Constant(_)),.. } = &*b0[1] {} else { unreachable!() } debug!("read bb 1"); let b1 = func.statements(b1_idx.unwrap()).collect::>(); debug!("{:?}",b1); assert_eq!(b1.len(), 3); if let &Statement::Expression{ op: Operation::And(Value::Constant(_),Value::Variable(_)),.. } = &*b1[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::Subtract(Value::Constant(_),Value::Variable(_)),.. } = &*b1[1] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b1[2] {} else { unreachable!() } debug!("read bb 2"); let b2 = func.statements(b2_idx.unwrap()).collect::>(); assert_eq!(b2.len(), 1); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b2[0] {} else { unreachable!() } debug!("read bb 3"); let b3 = func.statements(b3_idx.unwrap()).collect::>(); assert_eq!(b3.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b3[0] {} else { unreachable!() } } #[test] fn rewrite_remove_mnemonic_packed() { let _ = simple_logger::init(); let reg = Region::from_buf("", 16, b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(), None, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let mut b0_idx = None; let mut b1_idx = None; let mut b2_idx = None; let mut b3_idx = None; func.pack().unwrap(); for (idx,bb) in func.basic_blocks() { if bb.area.start == 0 { b0_idx = Some(idx); } else if bb.area.start == 11 { b1_idx = Some(idx); } else if bb.area.start == 18 { b2_idx = Some(idx); } else if bb.area.start == 22 { b3_idx = Some(idx); } else { unreachable!() } } assert!(b0_idx.is_some() && b1_idx.is_some() && b2_idx.is_some() && b3_idx.is_some()); debug!("{:?}",func); let _ = func.drop_first_mnemonic(b1_idx.unwrap()).unwrap(); debug!("{:?}",func); let b0 = func.statements(b0_idx.unwrap()).collect::>(); assert_eq!(b0.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b0[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::LessOrEqualUnsigned(Value::Variable(_),Value::Constant(_)),.. } = &*b0[1] {} else { unreachable!() } let b1 = func.statements(b1_idx.unwrap()).collect::>(); assert_eq!(b1.len(), 0); let b2 = func.statements(b2_idx.unwrap()).collect::>(); assert_eq!(b2.len(), 1); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b2[0] {} else { unreachable!() } let b3 = func.statements(b3_idx.unwrap()).collect::>(); assert_eq!(b3.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b3[0] {} else { unreachable!() } } #[test] fn remove_mnemonic_mid_packed() { let _ = simple_logger::init(); let data = b"Mi1Mi2Cfi0Bf21Aii3J25Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); let mut b0_idx = None; let mut b1_idx = None; let mut b2_idx = None; let mut b3_idx = None; func.pack().unwrap(); for (idx,bb) in func.basic_blocks() { if bb.area.start == 0 { b0_idx = Some(idx); } else if bb.area.start == 14 { b1_idx = Some(idx); } else if bb.area.start == 21 { b2_idx = Some(idx); } else if bb.area.start == 25 { b3_idx = Some(idx); } else { unreachable!() } } assert!(b0_idx.is_some() && b1_idx.is_some() && b2_idx.is_some() && b3_idx.is_some()); debug!("{:?}",func); let _ = func.remove_mnemonic(MnemonicIndex::new(1)).unwrap(); debug!("{:?}",func); let b0 = func.statements(b0_idx.unwrap()).collect::>(); assert_eq!(func.mnemonics(b0_idx.unwrap()).count(), 3); assert_eq!(b0.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(Constant{ value: 1,.. })),.. } = &*b0[0] {} else { unreachable!() } if let &Statement::Expression{ op: Operation::LessOrEqualUnsigned(Value::Variable(_),Value::Constant(_)),.. } = &*b0[1] {} else { unreachable!() } let b1 = func.statements(b1_idx.unwrap()).collect::>(); assert_eq!(b1.len(), 1); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b1[0] {} else { unreachable!() } let b2 = func.statements(b2_idx.unwrap()).collect::>(); assert_eq!(b2.len(), 1); if let &Statement::Expression{ op: Operation::Add(Value::Variable(_),Value::Constant(_)),.. } = &*b2[0] {} else { unreachable!() } let b3 = func.statements(b3_idx.unwrap()).collect::>(); assert_eq!(b3.len(), 2); if let &Statement::Expression{ op: Operation::Move(Value::Constant(_)),.. } = &*b3[0] {} else { unreachable!() } } #[test] fn shift_areas() { let mut func = Function::facade(); let uu0 = UUID::now(); let uu1 = UUID::now(); let uu2 = UUID::now(); let mne0 = Mnemonic{ area: (0..1).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne1 = Mnemonic{ area: (1..2).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne2 = Mnemonic{ area: (2..3).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne22 = Mnemonic{ area: Area{ start: 3, end: 3, offset_start: 0, offset_end: 1 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne3 = Mnemonic{ area: Area{ start: 3, end: 4, offset_start: 1, offset_end: 0 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne4 = Mnemonic{ area: (4..5).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne5 = Mnemonic{ area: (5..6).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let bb0 = BasicBlock{ uuid: uu0.clone(), area: (0..2).into(), node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(0)..MnemonicIndex::new(2), statements: 0..0 }; let bb1 = BasicBlock{ uuid: uu1.clone(), area: (2..4).into(), node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(2)..MnemonicIndex::new(5), statements: 0..0 }; let bb2 = BasicBlock{ uuid: uu2.clone(), area: (4..6).into(), node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(5)..MnemonicIndex::new(7), statements: 0..0 }; func.mnemonics = vec![mne0,mne1,mne2,mne22,mne3,mne4,mne5]; func.basic_blocks = vec![bb0,bb1,bb2]; func.shift_areas(BasicBlockIndex::new(1), 0, 2); let mne0 = Mnemonic{ area: (0..1).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne1 = Mnemonic{ area: (1..2).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne2 = Mnemonic{ area: Area{ start: 2, end: 3, offset_start: 2, offset_end: 0 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne22 = Mnemonic{ area: Area{ start: 3, end: 3, offset_start: 0, offset_end: 1 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne3 = Mnemonic{ area: Area{ start: 3, end: 4, offset_start: 1, offset_end: 0 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne4 = Mnemonic{ area: (4..5).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne5 = Mnemonic{ area: (5..6).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let bb0 = BasicBlock{ uuid: uu0.clone(), area: (0..2).into(), node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(0)..MnemonicIndex::new(2), statements: 0..0 }; let bb1 = BasicBlock{ uuid: uu1.clone(), area: (2..4).into(), node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(2)..MnemonicIndex::new(5), statements: 0..0 }; let bb2 = BasicBlock{ uuid: uu2.clone(), area: (4..6).into(), node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(5)..MnemonicIndex::new(7), statements: 0..0 }; assert_eq!(func.mnemonics, vec![mne0,mne1,mne2,mne22,mne3,mne4,mne5]); assert_eq!(func.basic_blocks, vec![bb0,bb1,bb2]); } #[test] fn shift_areas_multiple_block() { let mut func = Function::facade(); let uu0 = UUID::now(); let uu1 = UUID::now(); let uu2 = UUID::now(); let mne0 = Mnemonic{ area: (0..2).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne1 = Mnemonic{ area: Area{ start: 2, end: 2, offset_start: 0, offset_end: 1 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne2 = Mnemonic{ area: Area{ start: 2, end: 2, offset_start: 1, offset_end: 2 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne3 = Mnemonic{ area: Area{ start: 2, end: 2, offset_start: 2, offset_end: 3 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne4 = Mnemonic{ area: Area{ start: 2, end: 5, offset_start: 3, offset_end: 0 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne5 = Mnemonic{ area: (5..6).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let bb0 = BasicBlock{ uuid: uu0.clone(), area: Area{ start: 0, end: 2, offset_start: 0, offset_end: 1 }, node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(0)..MnemonicIndex::new(2), statements: 0..0 }; let bb1 = BasicBlock{ uuid: uu1.clone(), area: Area{ start: 2, end: 2, offset_start: 1, offset_end: 3 }, node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(2)..MnemonicIndex::new(4), statements: 0..0 }; let bb2 = BasicBlock{ uuid: uu2.clone(), area: Area{ start: 2, end: 6, offset_start: 3, offset_end: 0 }, node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(4)..MnemonicIndex::new(6), statements: 0..0 }; func.mnemonics = vec![mne0,mne1,mne2,mne3,mne4,mne5]; func.basic_blocks = vec![bb0,bb1,bb2]; func.shift_areas(BasicBlockIndex::new(0), 1, 2); let mne0 = Mnemonic{ area: (0..2).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne1 = Mnemonic{ area: Area{ start: 2, end: 2, offset_start: 2, offset_end: 3 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne2 = Mnemonic{ area: Area{ start: 2, end: 2, offset_start: 3, offset_end: 4 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne3 = Mnemonic{ area: Area{ start: 2, end: 2, offset_start: 4, offset_end: 5 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne4 = Mnemonic{ area: Area{ start: 2, end: 5, offset_start: 5, offset_end: 0 }, opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let mne5 = Mnemonic{ area: (5..6).into(), opcode: StrRef::new(0), operands: SmallVec::default(), statement_count: 0 }; let bb0 = BasicBlock{ uuid: uu0.clone(), area: Area{ start: 0, end: 2, offset_start: 0, offset_end: 3 }, node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(0)..MnemonicIndex::new(2), statements: 0..0 }; let bb1 = BasicBlock{ uuid: uu1.clone(), area: Area{ start: 2, end: 2, offset_start: 3, offset_end: 5 }, node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(2)..MnemonicIndex::new(4), statements: 0..0 }; let bb2 = BasicBlock{ uuid: uu2.clone(), area: Area{ start: 2, end: 6, offset_start: 5, offset_end: 0 }, node: NodeIndex::new(0), mnemonics: MnemonicIndex::new(4)..MnemonicIndex::new(6), statements: 0..0 }; assert_eq!(func.mnemonics, vec![mne0,mne1,mne2,mne3,mne4,mne5]); assert_eq!(func.basic_blocks, vec![bb0,bb1,bb2]); } /* * (B0) * 0: Mi1 ; mov i 1 * 3: Cfi0 ; cmp f i 0 * 7: Bf18 ; br f (B2) * * (B1) * 11: Aii3 ; add i i 3 * 15: J22 ; jmp (B3) * * (B2) * 18: Aii3 ; add i i 3 * * (B3) * 22: Ms3 ; mov s 3 * 25: R ; ret */ #[test] fn delete_single_dead_end() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); func.retain_basic_blocks(|func, bb| { let bb = func.basic_block(bb); bb.area.start != 22 }).unwrap(); assert_eq!(func.cflow_graph().node_count(), 3); } #[test] fn delete_middle() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); func.retain_basic_blocks(|func, bb| { let bb = func.basic_block(bb); bb.area.start != 11 }).unwrap(); assert_eq!(func.cflow_graph().node_count(), 3); } #[test] fn delete_multiple() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); func.retain_basic_blocks(|func, bb| { let bb = func.basic_block(bb); bb.area.start == 11 }).unwrap(); assert_eq!(func.cflow_graph().node_count(), 2); } #[test] fn delete_all() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); func.retain_basic_blocks(|_, _| { false }).unwrap(); assert_eq!(func.cflow_graph().node_count(), 1); } #[test] fn delete_none() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); func.retain_basic_blocks(|_, _| { true }).unwrap(); assert_eq!(func.cflow_graph().node_count(), 4); } #[test] fn delete_twice() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut func = Function::new::((), 0, ®, UUID::now()).unwrap(); func.retain_basic_blocks(|func, bb| { let bb = func.basic_block(bb); bb.area.start != 11 }).unwrap(); func.retain_basic_blocks(|func, bb| { let bb = func.basic_block(bb); bb.area.start != 11 }).unwrap(); assert_eq!(func.cflow_graph().node_count(), 3); } /* * (B0) * 0: Mi1 ; mov i 1 * 3: Cfi0 ; cmp f i 0 * 7: Bf18 ; br f (B2) * * (B1) * 11: Aii3 ; add i i 3 * 15: J22 ; jmp (B3) * * (B2) * 18: Aii3 ; add i i 3 * * (B3) * 22: Ms3 ; mov s 3 * 25: R ; ret */ #[test] fn recover_cfg() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let cfg = vec![ (0..11, SmallVec::from_vec(vec![ (Value::val(11,32).unwrap(), Guard::True), (Value::val(18,32).unwrap(), Guard::True) ])), (11..18, vec![ (Value::val(22,32).unwrap(), Guard::True) ].into()), (18..22, vec![ (Value::val(22,32).unwrap(), Guard::True) ].into()), (22..26, vec![].into()), ]; let uu = UUID::now(); let func1 = Function::new::((), 0, ®, uu).unwrap(); let bb1a = func1.basic_blocks().map(|(_,bb)| bb.area.clone()).collect::>(); let bb1m = func1.basic_blocks().map(|(_,bb)| bb.mnemonics.clone()).collect::>(); let mne1 = func1.mnemonics(..).map(|(_,bb)| bb.area.clone()).collect::>(); let func2 = Function::known_cflow_graph::((), Names::default(), cfg, 0, ®, uu, "test".to_string()).unwrap(); let bb2a = func2.basic_blocks().map(|(_,bb)| bb.area.clone()).collect::>(); let bb2m = func2.basic_blocks().map(|(_,bb)| bb.mnemonics.clone()).collect::>(); let mne2 = func2.mnemonics(..).map(|(_,bb)| bb.area.clone()).collect::>(); assert_eq!(bb1a.len(), 4); assert_eq!(bb2a.len(), 4); assert_eq!(mne1.len(), 8); assert_eq!(mne2.len(), 8); assert_eq!(bb1a[0], bb2a[0]); assert_eq!(mne1[0..3], mne2[0..3]); assert_eq!(bb1a[3], bb2a[3]); assert_eq!(mne1[6..8], mne2[6..8]); let swapped = bb1a[1] == bb2a[2] && bb1a[2] == bb2a[1]; let bb11 = func1.mnemonics(bb1m[1].clone()).map(|(_,m)| m.area.clone()).collect::>(); let bb12 = func1.mnemonics(bb1m[2].clone()).map(|(_,m)| m.area.clone()).collect::>(); let bb21 = func2.mnemonics(bb2m[1].clone()).map(|(_,m)| m.area.clone()).collect::>(); let bb22 = func2.mnemonics(bb2m[2].clone()).map(|(_,m)| m.area.clone()).collect::>(); if swapped { assert!(bb1a[2] == bb2a[1] && bb1a[2] == bb2a[1]); assert_eq!(bb11,bb22); assert_eq!(bb12,bb21); } else { assert!(bb1a[1] == bb2a[1] && bb1a[2] == bb2a[2]); assert_eq!(bb11,bb21); assert_eq!(bb12,bb22); } } #[test] fn recover_cfg_bad_entry() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let cfg = vec![ (0..11, SmallVec::from_vec(vec![ (Value::val(11,32).unwrap(), Guard::True), (Value::val(18,32).unwrap(), Guard::True) ])), (11..18, vec![ (Value::val(22,32).unwrap(), Guard::True) ].into()), (18..22, vec![ (Value::val(22,32).unwrap(), Guard::True) ].into()), (22..26, vec![].into()), ]; let uu = UUID::now(); assert!(Function::known_cflow_graph::((), Names::default(), cfg, 1, ®, uu, "test".to_string()).is_err()); } #[test] fn recover_cfg_indirect_jmp() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let mut names = Names::default(); let a = names.var("a", None, 32).unwrap(); let cfg = vec![ (0..1, SmallVec::from_vec(vec![ (Value::Variable(a), Guard::True), ])), ]; let uu = UUID::now(); assert!(Function::known_cflow_graph::((), Names::default(), cfg, 0, ®, uu, "test".to_string()).is_ok()); } #[test] fn recover_cfg_bad_blocks() { let _ = simple_logger::init(); let data = b"Mi1Cfi0Bf18Aii3J22Aii3Ms3R".to_vec(); let reg = Region::from_buf("", 16, data, 0, None); let cfg = vec![ (0..12, SmallVec::from_vec(vec![ (Value::val(11,32).unwrap(), Guard::True), (Value::val(18,32).unwrap(), Guard::True) ])), ]; let uu = UUID::now(); assert!(Function::known_cflow_graph::((), Names::default(), cfg, 0, ®, uu, "test".to_string()).is_err()); } }