#!/usr/bin/env python
#
# Public Domain 2014-present MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

import wiredtiger, wttest
from wiredtiger import stat
from wtscenario import make_scenarios

def timestamp_str(t):
    return '%x' % t

# test_hs06.py
# Verify that triggering history store usage does not cause a spike in memory usage
# to form an update chain from the history store contents.
#
# The required value should be fetched from the history store and then passed straight
# back to the user without putting together an update chain.
#
# TODO: Uncomment the checks after the main portion of the relevant history
# project work is complete.
class test_hs06(wttest.WiredTigerTestCase):
    # Force a small cache.
    conn_config = 'cache_size=50MB,statistics=(fast)'
    session_config = 'isolation=snapshot'
    key_format_values = [
        ('column', dict(key_format='r')),
        ('integer', dict(key_format='i')),
        ('string', dict(key_format='S'))
    ]
    scenarios = make_scenarios(key_format_values)

    def get_stat(self, stat):
        stat_cursor = self.session.open_cursor('statistics:')
        val = stat_cursor[stat][2]
        stat_cursor.close()
        return val

    def get_non_page_image_memory_usage(self):
        return self.get_stat(stat.conn.cache_bytes_other)

    def create_key(self, i):
        if self.key_format == 'S':
            return str(i)
        return i

    def test_hs_reads(self):
        # Create a small table.
        uri = "table:test_hs06"
        create_params = 'key_format={},value_format=S'.format(self.key_format)
        self.session.create(uri, create_params)

        value1 = 'a' * 500
        value2 = 'b' * 500

        # Load 1Mb of data.
        self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
        cursor = self.session.open_cursor(uri)
        self.session.begin_transaction()
        for i in range(1, 2000):
            cursor[self.create_key(i)] = value1
        self.session.commit_transaction('commit_timestamp=' + timestamp_str(2))

        # Load another 1Mb of data with a later timestamp.
        self.session.begin_transaction()
        for i in range(1, 2000):
            cursor[self.create_key(i)] = value2
        self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))

        # Write a version of the data to disk.
        self.conn.set_timestamp('stable_timestamp=' + timestamp_str(2))
        self.session.checkpoint()

        # Check the checkpoint wrote the expected values.
        #
        # FIXME-WT-5927: Checkpoint cursors are known to have issues in durable history so we've
        # removing the use of checkpoint handles in this test. As part of WT-5927, we should either
        # re-enable the testing of checkpoint cursors or remove this comment.
        #
        # cursor2 = self.session.open_cursor(uri, None, 'checkpoint=WiredTigerCheckpoint')
        cursor2 = self.session.open_cursor(uri)
        self.session.begin_transaction('read_timestamp=' + timestamp_str(2))
        for key, value in cursor2:
            self.assertEqual(value, value1)
        self.session.commit_transaction()
        cursor2.close()

        start_usage = self.get_non_page_image_memory_usage()

        # Whenever we request something out of cache of timestamp 2, we should
        # be reading it straight from the history store without initialising a full
        # update chain of every version of the data.
        self.session.begin_transaction('read_timestamp=' + timestamp_str(2))
        for i in range(1, 2000):
            self.assertEqual(cursor[self.create_key(i)], value1)
        self.session.rollback_transaction()

        end_usage = self.get_non_page_image_memory_usage()

        # Non-page related memory usage shouldn't spike significantly.
        #
        # Prior to this change, this type of workload would use a lot of memory
        # to recreate update lists for each page.
        #
        # This check could be more aggressive but to avoid potential flakiness,
        # lets just ensure that it hasn't doubled.
        #
        # TODO: Uncomment this once the project work is done.
        # self.assertLessEqual(end_usage, (start_usage * 2))

    # WT-5336 causing the read at timestamp 4 returning the value committed at timestamp 5 or 3
    def test_hs_modify_reads(self):
        # Create a small table.
        uri = "table:test_hs06"
        create_params = 'key_format={},value_format=S'.format(self.key_format)
        self.session.create(uri, create_params)

        # Create initial large values.
        value1 = 'a' * 500
        value2 = 'd' * 500

        # Load 1Mb of data.
        self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
        cursor = self.session.open_cursor(uri)
        self.session.begin_transaction()
        for i in range(1, 2000):
            cursor[self.create_key(i)] = value1
        self.session.commit_transaction('commit_timestamp=' + timestamp_str(2))

        # Load a slight modification with a later timestamp.
        self.session.begin_transaction()
        for i in range(1, 2000):
            cursor.set_key(self.create_key(i))
            mods = [wiredtiger.Modify('B', 100, 1)]
            self.assertEqual(cursor.modify(mods), 0)
        self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))

        # And another.
        self.session.begin_transaction()
        for i in range(1, 2000):
            cursor.set_key(self.create_key(i))
            mods = [wiredtiger.Modify('C', 200, 1)]
            self.assertEqual(cursor.modify(mods), 0)
        self.session.commit_transaction('commit_timestamp=' + timestamp_str(4))

        # Now write something completely different.
        self.session.begin_transaction()
        for i in range(1, 2000):
            cursor[self.create_key(i)] = value2
        self.session.commit_transaction('commit_timestamp=' + timestamp_str(5))

        # Now the latest version will get written to the data file.
        self.session.checkpoint()

        expected = list(value1)
        expected[100] = 'B'
        expected = str().join(expected)

        # Whenever we request something of timestamp 3, this should be a modify
        # op. We should looking forwards in the history store until we find the
        # newest whole update (timestamp 4).
        #
        # t5: value1 (full update on page)
        # t4: full update in las
        # t3: (reverse delta in las) <= We're querying for t4 so we begin here.
        # t2: value2 (full update in las)
        self.session.begin_transaction('read_timestamp=' + timestamp_str(3))
        for i in range(1, 2000):
            self.assertEqual(cursor[self.create_key(i)], expected)
        self.session.rollback_transaction()

        expected = list(expected)
        expected[200] = 'C'
        expected = str().join(expected)

        # Whenever we request something of timestamp 4, this should be a full
        # update. We should get it from las directly.
        #
        # t5: value1 (full update)
        # t4: full update in las <= We're querying for t4 and we return.
        # t3: (reverse delta in las)
        # t2: value2 (full update in las)
        self.session.begin_transaction('read_timestamp=' + timestamp_str(4))
        for i in range(1, 2000):
            self.assertEqual(cursor[self.create_key(i)], expected)
        self.session.rollback_transaction()

    def test_hs_prepare_reads(self):
        # FIXME-WT-6061: Prepare reads currently not supported with columnar store.
        # Remove this once prepare reads is supported.
        if self.key_format == 'r':
            return

        # Create a small table.
        uri = "table:test_hs06"
        create_params = 'key_format={},value_format=S'.format(self.key_format)
        self.session.create(uri, create_params)

        value1 = 'a' * 500
        value2 = 'b' * 500

        self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
        cursor = self.session.open_cursor(uri)
        for i in range(1, 2000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value1
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(2))

        # Load prepared data and leave it in a prepared state.
        prepare_session = self.conn.open_session(self.session_config)
        prepare_cursor = prepare_session.open_cursor(uri)
        prepare_session.begin_transaction()
        for i in range(1, 11):
            prepare_cursor[self.create_key(i)] = value2
        prepare_session.prepare_transaction(
            'prepare_timestamp=' + timestamp_str(3))

        # Write some more to cause eviction of the prepared data.
        for i in range(11, 2000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value2
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(4))

        self.session.checkpoint()

        # Try to read every key of the prepared data again.
        # Ensure that we read the history store to find the prepared update and
        # return a prepare conflict as appropriate.
        self.session.begin_transaction('read_timestamp=' + timestamp_str(3))
        for i in range(1, 11):
            cursor.set_key(self.create_key(i))
            self.assertRaisesException(
                wiredtiger.WiredTigerError,
                lambda: cursor.search(),
                '/conflict with a prepared update/')
        self.session.rollback_transaction()

        prepare_session.commit_transaction(
            'commit_timestamp=' + timestamp_str(5) + ',durable_timestamp=' + timestamp_str(6))

        self.session.begin_transaction('read_timestamp=' + timestamp_str(5))
        for i in range(1, 11):
            self.assertEquals(value2, cursor[self.create_key(i)])
        self.session.rollback_transaction()

    def test_hs_multiple_updates(self):
        # Create a small table.
        uri = "table:test_hs06"
        create_params = 'key_format={},value_format=S'.format(self.key_format)
        self.session.create(uri, create_params)

        value1 = 'a' * 500
        value2 = 'b' * 500
        value3 = 'c' * 500
        value4 = 'd' * 500

        # Load 1Mb of data.
        self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
        cursor = self.session.open_cursor(uri)
        for i in range(1, 2000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value1
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(2))

        # Do two different updates to the same key with the same timestamp.
        # We want to make sure that the second value is the one that is visible even after eviction.
        for i in range(1, 11):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value2
            cursor[self.create_key(i)] = value3
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))

        # Write a newer value on top.
        for i in range(1, 2000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value4
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(4))

        # Ensure that we see the last of the two updates that got applied.
        self.session.begin_transaction('read_timestamp=' + timestamp_str(3))
        for i in range(1, 11):
            self.assertEquals(cursor[self.create_key(i)], value3)
        self.session.rollback_transaction()

    def test_hs_multiple_modifies(self):
        # Create a small table.
        uri = "table:test_hs06"
        create_params = 'key_format={},value_format=S'.format(self.key_format)
        self.session.create(uri, create_params)

        value1 = 'a' * 500
        value2 = 'b' * 500

        # Load 1Mb of data.
        self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
        cursor = self.session.open_cursor(uri)
        for i in range(1, 2000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value1
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(2))

        # Apply three sets of modifies.
        # They specifically need to be in separate modify calls.
        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('B', 100, 1)]), 0)
            self.assertEqual(cursor.modify([wiredtiger.Modify('C', 200, 1)]), 0)
            self.assertEqual(cursor.modify([wiredtiger.Modify('D', 300, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))

        expected = list(value1)
        expected[100] = 'B'
        expected[200] = 'C'
        expected[300] = 'D'
        expected = str().join(expected)

        # Write a newer value on top.
        for i in range(1, 2000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value2
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(4))

        # Go back and read. We should get the initial value with the 3 modifies applied on top.
        self.session.begin_transaction('read_timestamp=' + timestamp_str(3))
        for i in range(1, 11):
            self.assertEqual(cursor[self.create_key(i)], expected)
        self.session.rollback_transaction()

    def test_hs_instantiated_modify(self):
        # Create a small table.
        uri = "table:test_hs06"
        create_params = 'key_format={},value_format=S'.format(self.key_format)
        self.session.create(uri, create_params)

        value1 = 'a' * 500
        value2 = 'b' * 500

        # Load 5Mb of data.
        self.conn.set_timestamp(
            'oldest_timestamp=' + timestamp_str(1) + ',stable_timestamp=' + timestamp_str(1))
        cursor = self.session.open_cursor(uri)
        for i in range(1, 10000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value1
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(2))

        # Apply three sets of modifies.
        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('B', 100, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))

        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('C', 200, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(4))

        # Since the stable timestamp is still at 1, there will be no birthmark record.
        # History store instantiation should choose this update since it is the most recent.
        # We want to check that it gets converted into a standard update as appropriate.
        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('D', 300, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(5))

        # Make a bunch of updates to another table to flush everything out of cache.
        uri2 = 'table:test_hs06_extra'
        self.session.create(uri2, create_params)
        cursor2 = self.session.open_cursor(uri2)
        for i in range(1, 10000):
            self.session.begin_transaction()
            cursor2[self.create_key(i)] = value2
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(6))

        expected = list(value1)
        expected[100] = 'B'
        expected[200] = 'C'
        expected[300] = 'D'
        expected = str().join(expected)

        # Go back and read. We should get the initial value with the 3 modifies applied on top.
        self.session.begin_transaction('read_timestamp=' + timestamp_str(5))
        for i in range(1, 11):
            self.assertEqual(cursor[self.create_key(i)], expected)
        self.session.rollback_transaction()

    def test_hs_modify_stable_is_base_update(self):
        # Create a small table.
        uri = "table:test_hs06"
        create_params = 'key_format={},value_format=S'.format(self.key_format)
        self.session.create(uri, create_params)

        value1 = 'a' * 500
        value2 = 'b' * 500

        # Load 5Mb of data.
        self.conn.set_timestamp(
            'oldest_timestamp=' + timestamp_str(1) + ',stable_timestamp=' + timestamp_str(1))

        # The base update is at timestamp 1.
        # When we history store evict these pages, the base update is the only thing behind
        # the stable timestamp.
        cursor = self.session.open_cursor(uri)
        for i in range(1, 10000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value1
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(1))

        # Apply three sets of modifies.
        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('B', 100, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(2))

        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('C', 200, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))

        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('D', 300, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(4))

        # Make a bunch of updates to another table to flush everything out of cache.
        uri2 = 'table:test_hs06_extra'
        self.session.create(uri2, create_params)
        cursor2 = self.session.open_cursor(uri2)
        for i in range(1, 10000):
            self.session.begin_transaction()
            cursor2[self.create_key(i)] = value2
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(5))

        expected = list(value1)
        expected[100] = 'B'
        expected[200] = 'C'
        expected[300] = 'D'
        expected = str().join(expected)

        # Go back and read.
        self.session.begin_transaction('read_timestamp=' + timestamp_str(4))
        for i in range(1, 11):
            self.assertEqual(cursor[self.create_key(i)], expected)
        self.session.rollback_transaction()

    def test_hs_rec_modify(self):
        # Create a small table.
        uri = "table:test_hs06"
        create_params = 'key_format={},value_format=S'.format(self.key_format)
        self.session.create(uri, create_params)

        value1 = 'a' * 500
        value2 = 'b' * 500

        self.conn.set_timestamp(
            'oldest_timestamp=' + timestamp_str(1) + ',stable_timestamp=' + timestamp_str(1))
        cursor = self.session.open_cursor(uri)

        # Base update.
        for i in range(1, 10000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value1
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(2))

        # Apply three sets of modifies.
        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('B', 100, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))

        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('C', 200, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(4))

        # This is the one we want to be selected by the checkpoint.
        for i in range(1, 11):
            self.session.begin_transaction()
            cursor.set_key(self.create_key(i))
            self.assertEqual(cursor.modify([wiredtiger.Modify('D', 300, 1)]), 0)
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(5))

        # Apply another update and evict the pages with the modifies out of cache.
        for i in range(1, 10000):
            self.session.begin_transaction()
            cursor[self.create_key(i)] = value2
            self.session.commit_transaction('commit_timestamp=' + timestamp_str(6))

        # Checkpoint such that the modifies will be selected. When we grab it from the history
        # store, we'll need to unflatten it before using it for reconciliation.
        self.conn.set_timestamp('stable_timestamp=' + timestamp_str(5))
        self.session.checkpoint()

        expected = list(value1)
        expected[100] = 'B'
        expected[200] = 'C'
        expected[300] = 'D'
        expected = str().join(expected)

        # Check that the correct value is visible after checkpoint.
        self.session.begin_transaction('read_timestamp=' + timestamp_str(5))
        for i in range(1, 11):
            self.assertEqual(cursor[self.create_key(i)], expected)
        self.session.rollback_transaction()

if __name__ == '__main__':
    wttest.run()