#!/usr/bin/env python # # Public Domain 2014-present MongoDB, Inc. # Public Domain 2008-2014 WiredTiger, Inc. # # This is free and unencumbered software released into the public domain. # # Anyone is free to copy, modify, publish, use, compile, sell, or # distribute this software, either in source code form or as a compiled # binary, for any purpose, commercial or non-commercial, and by any # means. # # In jurisdictions that recognize copyright laws, the author or authors # of this software dedicate any and all copyright interest in the # software to the public domain. We make this dedication for the benefit # of the public at large and to the detriment of our heirs and # successors. We intend this dedication to be an overt act of # relinquishment in perpetuity of all present and future rights to this # software under copyright law. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. import string, os, sys, random from suite_subprocess import suite_subprocess import wiredtiger, wttest def timestamp_str(t): return '%x' % t # test_util01.py # Utilities: wt dump, as well as the dump cursor class test_util01(wttest.WiredTigerTestCase, suite_subprocess): """ Test wt dump. We check for specific output. Note that we don't test dumping {key,value}_format that are integer here. That's because the integer values are encoded and we don't want to duplicate the encoding/decoding algorithms. Integer dump is tested implicity by test_util02 (which loads dumps created in various ways). """ tablename = 'test_util01.a' nentries = 1000 session_config = 'isolation=snapshot' stringclass = ''.__class__ def compare_config(self, expected_cfg, actual_cfg): # Replace '(' characters so configuration groups don't break parsing. # If we ever want to look for config groups this will need to change. da = dict(kv.split('=') for kv in actual_cfg.strip().replace('(',',').split(',')) de = da.copy() de.update(kv.split('=') for kv in expected_cfg.strip().split(',')) return da == de def compare_files(self, filename1, filename2): inheader = isconfig = False for l1, l2 in zip(open(filename1, "r"), open(filename2, "r")): if isconfig: if not self.compare_config(l1, l2): self.tty('Failed comparing: ' + l1 + '<<<>>>' + l2) return False elif l1 != l2: return False if inheader: isconfig = not isconfig if l1.strip() == 'Header': inheader = True if l1.strip() == 'Data': inheader = isconfig = False return True def get_bytes(self, i, len): """ Return a pseudo-random, but predictable string that uses all characters. As a special case, key 0 returns all characters 1-255 repeated """ ret = b'' if i == 0: for j in range (0, len): ret += bytes([j%255 + 1]) else: for j in range(0, len // 3): k = i + j ret += bytes([k%255 + 1, (k*3)%255 + 1, (k*7)%255 + 1]) return ret + bytes([0]) # Add a final null byte def get_key(self, i): return (b"%0.6d" % i) + b':' + self.get_bytes(i, 20) def get_value(self, i): return self.get_bytes(i, 1000) def _ord(self, byte): return byte def _byte_to_str(self, byte): return chr(byte) def dumpstr(self, s, hexoutput): """ Return a key or value string formatted just as 'wt dump' would. Most printable characters (except tab, newline,...) are printed as is, otherwise, backslash hex is used. """ result = '' for c in s: c = self._byte_to_str(c) if hexoutput: result += "%0.2x" % ord(c) elif c == '\\': result += '\\\\' elif c == ' ' or (c in string.printable and not c in string.whitespace): result += c else: result += '\\' + "%0.2x" % ord(c) if hexoutput: result += '\n' else: result += '\n' return result def table_config(self): # Using u configuration lets us store and print all the byte values. return 'key_format=u,value_format=u' def dump_kv_to_line(self, b): # The output from dump is a 'u' format. # Printable chars appear 'as is', unprintable chars # appear as \hh where hh are hex digits. # We can't decode the entire byte array, some Unicode decoders # will complain as the set of bytes don't represent UTF-8 encoded # characters. # Create byte representation of printable ascii chars printable_chars = bytes(string.printable, 'ascii') result = '' for byte in b.strip(b'\x00'): if byte in printable_chars: result += bytearray([byte]).decode() else: result += "\\{:02x}".format(byte) return result + '\n' def write_entries(self, cursor, expectout, hexoutput, commit_timestamp, write_expected): if commit_timestamp is not None: self.session.begin_transaction() for i in range(0, self.nentries): key = self.get_key(i) value = 0 if write_expected: value = self.get_value(i) else: value = self.get_value(i + random.randint(1, self.nentries)) cursor[key] = value if write_expected: expectout.write(self.dumpstr(key, hexoutput)) expectout.write(self.dumpstr(value, hexoutput)) if commit_timestamp is not None: self.session.commit_transaction('commit_timestamp=' + timestamp_str(commit_timestamp)) def dump(self, usingapi, hexoutput, commit_timestamp, read_timestamp): params = self.table_config() self.session.create('table:' + self.tablename, params) cursor = self.session.open_cursor('table:' + self.tablename, None, None) ver = wiredtiger.wiredtiger_version() verstring = str(ver[1]) + '.' + str(ver[2]) + '.' + str(ver[3]) with open("expect.out", "w") as expectout: if not usingapi: # Note: this output is sensitive to the precise output format # generated by wt dump. If this is likely to change, we should # make this test more accommodating. expectout.write('WiredTiger Dump (WiredTiger Version ' + verstring + ')\n') if hexoutput: expectout.write('Format=hex\n') else: expectout.write('Format=print\n') expectout.write('Header\n') expectout.write('table:' + self.tablename + '\n') expectout.write('colgroups=,columns=,' + params + '\n') expectout.write('Data\n') if commit_timestamp is not None and read_timestamp is not None: if commit_timestamp == read_timestamp: self.write_entries(cursor, expectout, hexoutput, commit_timestamp, True) self.write_entries(cursor, expectout, hexoutput, commit_timestamp + 1, False) elif commit_timestamp < read_timestamp: self.write_entries(cursor, expectout, hexoutput, commit_timestamp, False) self.write_entries(cursor, expectout, hexoutput, commit_timestamp + 1, True) else: self.write_entries(cursor, expectout, hexoutput, commit_timestamp, False) self.write_entries(cursor, expectout, hexoutput, commit_timestamp + 1, False) else: self.write_entries(cursor, expectout, hexoutput, commit_timestamp, True) cursor.close() self.pr('calling dump') with open("dump.out", "w") as dumpout: if usingapi: if hexoutput: dumpopt = "dump=hex" else: dumpopt = "dump=print" dumpcurs = self.session.open_cursor('table:' + self.tablename, None, dumpopt) for key, val in dumpcurs: dumpout.write(self.dump_kv_to_line(key) + \ self.dump_kv_to_line(val)) dumpcurs.close() else: dumpargs = ["dump"] if hexoutput: dumpargs.append("-x") if read_timestamp: dumpargs.append("-t " + str(read_timestamp)) dumpargs.append(self.tablename) self.runWt(dumpargs, outfilename="dump.out") self.assertTrue(self.compare_files("expect.out", "dump.out")) def test_dump_process(self): self.dump(False, False, None, None) def test_dump_process_hex(self): self.dump(False, True, None, None) def test_dump_api(self): self.dump(True, False, None, None) def test_dump_api_hex(self): self.dump(True, True, None, None) def test_dump_process_timestamp_old(self): self.dump(False, False, 5, 5) def test_dump_process_timestamp_none(self): self.dump(False, False, 5 , 3) def test_dump_process_timestamp_new(self): self.dump(False, False, 5, 7) if __name__ == '__main__': wttest.run()