/* ### * IP: GHIDRA * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "dynamic.hh" #include "funcdata.hh" #include "crc32.hh" // Table for how to hash opcodes, lumps certain operators (i.e. ADD SUB PTRADD PTRSUB) into one hash // zero indicates the operator should be skipped uint4 DynamicHash::transtable[] = { 0, CPUI_COPY, CPUI_LOAD, CPUI_STORE, CPUI_BRANCH, CPUI_CBRANCH, CPUI_BRANCHIND, CPUI_CALL, CPUI_CALLIND, CPUI_CALLOTHER, CPUI_RETURN, CPUI_INT_EQUAL, CPUI_INT_EQUAL, // NOT_EQUAL hashes same as EQUAL CPUI_INT_SLESS, CPUI_INT_SLESS, // SLESSEQUAL hashes same as SLESS CPUI_INT_LESS, CPUI_INT_LESS, // LESSEQUAL hashes same as LESS CPUI_INT_ZEXT, CPUI_INT_SEXT, CPUI_INT_ADD, CPUI_INT_ADD, // SUB hases same as ADD CPUI_INT_CARRY, CPUI_INT_SCARRY, CPUI_INT_SBORROW, CPUI_INT_2COMP, CPUI_INT_NEGATE, CPUI_INT_XOR, CPUI_INT_AND, CPUI_INT_OR, CPUI_INT_MULT, // LEFT hases same as MULT CPUI_INT_RIGHT, CPUI_INT_SRIGHT, CPUI_INT_MULT, CPUI_INT_DIV, CPUI_INT_SDIV, CPUI_INT_REM, CPUI_INT_SREM, CPUI_BOOL_NEGATE, CPUI_BOOL_XOR, CPUI_BOOL_AND, CPUI_BOOL_OR, CPUI_FLOAT_EQUAL, CPUI_FLOAT_EQUAL, // NOTEQUAL hases same as EQUAL CPUI_FLOAT_LESS, CPUI_FLOAT_LESS, // LESSEQUAL hashes same as EQUAL 0, // Unused slot - skip CPUI_FLOAT_NAN, CPUI_FLOAT_ADD, CPUI_FLOAT_DIV, CPUI_FLOAT_MULT, CPUI_FLOAT_ADD, // SUB hashes same as ADD CPUI_FLOAT_NEG, CPUI_FLOAT_ABS, CPUI_FLOAT_SQRT, CPUI_FLOAT_INT2FLOAT, CPUI_FLOAT_FLOAT2FLOAT, CPUI_FLOAT_TRUNC, CPUI_FLOAT_CEIL, CPUI_FLOAT_FLOOR, CPUI_FLOAT_ROUND, CPUI_MULTIEQUAL, CPUI_INDIRECT, CPUI_PIECE, CPUI_SUBPIECE, 0, // CAST is skipped CPUI_INT_ADD, CPUI_INT_ADD, // PTRADD and PTRSUB hash same as INT_ADD CPUI_SEGMENTOP, CPUI_CPOOLREF, CPUI_NEW, CPUI_INSERT, CPUI_EXTRACT, CPUI_POPCOUNT }; /// These edges are sorted to provide consistency to the hash /// The sort is based on the PcodeOp sequence number first, then the Varnode slot /// \param op2 is the edge to compare \b this to /// \return \b true if \b this should be ordered before the other edge bool ToOpEdge::operator<(const ToOpEdge &op2) const { const Address &addr1( op->getSeqNum().getAddr() ); const Address &addr2( op2.op->getSeqNum().getAddr() ); if (addr1 != addr2) return (addr1 < addr2); uintm ord1 = op->getSeqNum().getOrder(); uintm ord2 = op2.op->getSeqNum().getOrder(); if (ord1 != ord2) return (ord1 < ord2); return (slot < op2.slot); } /// The hash accumulates: /// - the Varnode slot /// - the address of the PcodeOp /// - the op-code of the PcodeOp /// /// The op-codes are translated so that the hash is invariant under /// common variants. /// \param reg is the incoming hash accumulator value /// \return the accumulator value with \b this edge folded in uint4 ToOpEdge::hash(uint4 reg) const { reg = crc_update(reg,(uint4)slot); reg = crc_update(reg,DynamicHash::transtable[op->code()]); uintb val = op->getSeqNum().getAddr().getOffset(); int4 sz = op->getSeqNum().getAddr().getAddrSize(); for(int4 i=0;i>= 8; } return reg; } /// When building the edge, certain p-code ops (CAST) are effectively ignored so that /// we get the same hash whether or not these ops are present. /// \param vn is the given Varnode void DynamicHash::buildVnUp(const Varnode *vn) { const PcodeOp *op; for(;;) { if (!vn->isWritten()) return; op = vn->getDef(); if (transtable[op->code()] != 0) break; // Do not ignore this operation vn = op->getIn(0); } opedge.push_back(ToOpEdge(op,-1)); } /// When building edges, certain p-code ops (CAST) are effectively ignored so that /// we get the same hash whether or not these ops are present. /// \param vn is the given Varnode void DynamicHash::buildVnDown(const Varnode *vn) { list::const_iterator iter; uint4 insize = opedge.size(); for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { const PcodeOp *op = *iter; const Varnode *tmpvn = vn; while(transtable[op->code()]==0) { tmpvn = op->getOut(); if (tmpvn == (const Varnode *)0) { op = (const PcodeOp *)0; break; } op = tmpvn->loneDescend(); if (op == (const PcodeOp *)0) break; } if (op == (const PcodeOp *)0) continue; int4 slot = op->getSlot(tmpvn); opedge.push_back(ToOpEdge(op,slot)); } if ((uint4)opedge.size()-insize > 1) sort(opedge.begin()+insize,opedge.end()); } /// \param op is the given PcodeOp thats already in the sub-graph void DynamicHash::buildOpUp(const PcodeOp *op) { for(int4 i=0;inumInput();++i) { const Varnode *vn = op->getIn(i); vnedge.push_back(vn); } } /// \param op is the given PcodeOp thats already in the sub-graph void DynamicHash::buildOpDown(const PcodeOp *op) { const Varnode *vn = op->getOut(); if (vn == (const Varnode *)0) return; vnedge.push_back(vn); } void DynamicHash::gatherUnmarkedVn(void) { for(int4 i=0;iisMark()) continue; markvn.push_back(vn); vn->setMark(); } vnedge.clear(); } void DynamicHash::gatherUnmarkedOp(void) { for(;opedgeprocisMark()) continue; markop.push_back(op); op->setMark(); } } void DynamicHash::clear(void) { markop.clear(); markvn.clear(); vnedge.clear(); opedge.clear(); } /// A sub-graph is formed extending from the given Varnode as the root. The /// method specifies how the sub-graph is extended. In particular: /// - Method 0 is extends to just immediate p-code ops reading or writing root /// - Method 1 extends to one more level of inputs from method 0. /// - Method 2 extends to one more level of outputs from method 0. /// - Method 3 extends to inputs and outputs /// /// The resulting hash and address can be obtained after calling this method /// through getHash() and getAddress(). /// \param root is the given root Varnode /// \param method is the hashing method to use: 0, 1, 2, 3 void DynamicHash::calcHash(const Varnode *root,uint4 method) { vnproc = 0; opproc = 0; opedgeproc = 0; vnedge.push_back(root); gatherUnmarkedVn(); for(uint4 i=vnproc;iclearMark(); for(uint4 i=0;iclearMark(); if (opedge.size() == 0) { hash = (uint8)0; addrresult = Address(); return; } uint4 reg = 0x3ba0fe06; // Calculate the 32-bit hash // Hash in information about the root reg = crc_update(reg,(uint4)root->getSize()); if (root->isConstant()) { uintb val = root->getOffset(); for(int4 i=0;igetSize();++i) { reg = crc_update(reg,(uint4)val); val >>= 8; } } for(uint4 i=0;igetOut() == root)) break; if ((slot >=0) && (op->getIn(slot)==root)) break; } if (ct == opedge.size()) { // If everything attached to the root was a skip op op = opedge[0].getOp(); // Return op that is not attached directly slot = opedge[0].getSlot(); attachedop = false; } // 15 bits unused hash = attachedop ? 0 : 1; hash <<= 4; hash |= method; // 4-bits hash <<= 7; hash |= (uint8)op->code(); // 7-bits hash <<= 5; hash |= (uint8)(slot & 0x1f); // 5-bits hash <<= 32; hash |= (uint8)reg; // 32-bits for the neighborhood hash addrresult = op->getSeqNum().getAddr(); } /// Collect the set of Varnodes at the same address as the given Varnode. /// Starting with method 0, increment the method and calculate hashes /// of the Varnodes until the given Varnode has a unique hash within the set. /// The resulting hash and address can be obtained after calling this method /// through getHash() and getAddress(). /// /// In the rare situation that the last method still does not yield a unique hash, /// the hash encodes: /// - the smallest number of hash collisions /// - the method that produced the smallest number of hash collisions /// - the position of the root within the collision list /// /// For most cases, this will still uniquely identify the root Varnode. /// \param root is the given root Varnode /// \param fd is the function (holding the data-flow graph) void DynamicHash::uniqueHash(const Varnode *root,Funcdata *fd) { vector vnlist; vector vnlist2; vector champion; uint4 method; uint8 tmphash; Address tmpaddr; uint4 maxduplicates = 8; for(method=0;method<4;++method) { clear(); calcHash(root,method); if (hash == 0) return; // Can't get a good hash tmphash = hash; tmpaddr = addrresult; vnlist.clear(); vnlist2.clear(); gatherFirstLevelVars(vnlist,fd,tmpaddr,tmphash); for(uint4 i=0;imaxduplicates) break; } } if (vnlist2.size() <= maxduplicates) { if ((champion.size()==0)||(vnlist2.size() < champion.size())) { champion = vnlist2; if (champion.size()==1) break; // Current hash is unique } } } if (champion.empty()) { hash = (uint8)0; addrresult = Address(); // Couldn't find a unique hash return; } uint4 total = (uint4)champion.size() - 1; // total is in range [0,maxduplicates-1] uint4 pos; for(pos=0;pos<=total;++pos) if (champion[pos] == root) break; if (pos > total) { hash = (uint8)0; addrresult = Address(); return; } hash = tmphash | ((uint8)pos << 49); // Store three bits for position with list of duplicate hashes hash |= ((uint8)total << 52); // Store three bits for total number of duplicate hashes addrresult = tmpaddr; } /// \brief Given an address and hash, find the unique matching Varnode /// /// The method, number of collisions, and position are pulled out of the hash. /// Hashes for the method are performed at Varnodes linked to the given address, /// and the Varnode which matches the hash (and the position) is returned. /// If the number of collisions for the hash does not match, this method /// will not return a Varnode, even if the position looks valid. /// \param fd is the function containing the data-flow /// \param addr is the given address /// \param h is the hash /// \return the matching Varnode or NULL Varnode *DynamicHash::findVarnode(const Funcdata *fd,const Address &addr,uint8 h) { uint4 method = getMethodFromHash(h); uint4 total = getTotalFromHash(h); uint4 pos = getPositionFromHash(h); clearTotalPosition(h); vector vnlist; vector vnlist2; gatherFirstLevelVars(vnlist,fd,addr,h); for(uint4 i=0;i &varlist,const Funcdata *fd,const Address &addr,uint8 h) { OpCode opc = getOpCodeFromHash(h); int4 slot = getSlotFromHash(h); bool isnotattached = getIsNotAttached(h); PcodeOpTree::const_iterator iter = fd->beginOp(addr); PcodeOpTree::const_iterator enditer = fd->endOp(addr); while(iter!=enditer) { PcodeOp *op = (*iter).second; ++iter; if (op->code() != opc) continue; if (slot <0) { Varnode *vn = op->getOut(); if (vn != (Varnode *)0) { if (isnotattached) { // If original varnode was not attached to (this) op op = vn->loneDescend(); if (op != (PcodeOp *)0) { if (transtable[op->code()] == 0) { // Check for skipped op vn = op->getOut(); if (vn == (Varnode *)0) continue; } } } varlist.push_back(vn); } } else if (slot < op->numInput()) { Varnode *vn = op->getIn(slot); if (isnotattached) { op = vn->getDef(); if ((op != (PcodeOp *)0)&&(transtable[op->code()]==0)) vn = op->getIn(0); } varlist.push_back(vn); } } } /// The hash encodes the input \e slot the root Varnode was attached to in its PcodeOp. /// \param h is the hash value /// \return the slot index or -1 if the Varnode was attached as output int4 DynamicHash::getSlotFromHash(uint8 h) { int4 res = (int4)((h>>32)&0x1f); if (res == 31) res = -1; return res; } /// The hash encodes the \e method used to produce it. /// \param h is the hash value /// \return the method: 0, 1, 2, 3 uint4 DynamicHash::getMethodFromHash(uint8 h) { return (uint4)((h>>44)&0xf); } /// The hash encodes the op-code of the p-code op attached to the root Varnode /// \param h is the hash value /// \return the op-code OpCode DynamicHash::getOpCodeFromHash(uint8 h) { return (OpCode)((h>>37)&0x7f); } /// The hash encodes the position of the root Varnode within the list of hash collisions /// \param h is the hash value /// \return the position of the root uint4 DynamicHash::getPositionFromHash(uint8 h) { return (uint4)((h>>49)&7); } /// The hash encodes the total number of collisions for that hash /// \param h is the hash value /// \return the total number of collisions uint4 DynamicHash::getTotalFromHash(uint8 h) { return ((uint4)((h>>52)&7)+1); } /// The hash encodes whether or not the root was directly attached to its PcodeOp /// \param h is the hash value /// \return \b true if the root was not attached bool DynamicHash::getIsNotAttached(uint8 h) { return (((h>>48)&1)!=0); } /// The position and total collisions fields are set by the uniqueness and /// need to be cleared when comparing raw hashes. /// \param h is a reference to the hash to modify void DynamicHash::clearTotalPosition(uint8 &h) { uint8 val = 0x3f; val <<= 49; val = ~val; h &= val; }