{ "cells": [ { "cell_type": "markdown", "id": "525de621-5f6e-4c4f-9f93-ce65c4daebe8", "metadata": {}, "source": [ "# Version Control with Icechunk" ] }, { "cell_type": "code", "execution_count": 1, "id": "020e322c-4323-4064-b17e-a1e95f710d21", "metadata": {}, "outputs": [], "source": [ "import zarr\n", "from icechunk import IcechunkStore, StorageConfig" ] }, { "cell_type": "markdown", "id": "33262ee6-4e77-423d-a4c7-7ff28fd6f3a6", "metadata": {}, "source": [ "## Create a new Zarr store backed by Icechunk\n", "\n", "This example uses an in-memory store." ] }, { "cell_type": "code", "execution_count": 2, "id": "dd35041c-7981-446a-8981-d1eae02f4fff", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "store = await IcechunkStore.create(\n", " storage=StorageConfig.memory(\"test\"),\n", " mode=\"w\",\n", ")\n", "store" ] }, { "cell_type": "markdown", "id": "fd4931a1-6988-4c1f-978a-70e375377681", "metadata": {}, "source": [ "1. Why not checkout main by default?\n", "2. Why can I create snapshots on the `None` branch" ] }, { "cell_type": "markdown", "id": "fae0db7f-fc74-4fc6-afeb-fa4a7e59aa64", "metadata": {}, "source": [ "## Snaphotting\n", "\n", "### Concepts\n", "\n", "1. `store.commit` creates a _snapshot_ of the data.\n", "2. Every snapshot is associated with a _snapshot ID_.\n", "3. Use the _snapshot ID_ to time-travel within your data's history." ] }, { "cell_type": "markdown", "id": "c8d89e5a-9205-45d4-b52e-cefef5ddfc4d", "metadata": {}, "source": [ "### Create a snapshot" ] }, { "cell_type": "code", "execution_count": 3, "id": "51654a0d-58b2-43a9-acd9-0214f22c3dc5", "metadata": {}, "outputs": [], "source": [ "root_group = zarr.group(store=store)" ] }, { "cell_type": "code", "execution_count": 4, "id": "d8bf3160-2a39-48be-82ea-e800fd3164b3", "metadata": {}, "outputs": [], "source": [ "root_group.attrs[\"attr\"] = \"first_attr\"" ] }, { "cell_type": "code", "execution_count": 5, "id": "3a33f69c-9949-458a-9d3a-1f0d7f451553", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'51MXCR5RTNGPC54Z7WJG'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first_commit = await store.commit(\"first commit\")\n", "first_commit" ] }, { "cell_type": "code", "execution_count": 6, "id": "f41f701e-a513-4fe6-b23e-82200f5ab221", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'attr': 'first_attr'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dict(root_group.attrs)" ] }, { "cell_type": "code", "execution_count": 7, "id": "10e40f91-7f90-4feb-91ba-b51b709d508d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'45AE3AT46RHZCZ50HWEG'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "root_group.attrs[\"attr\"] = \"second_attr\"\n", "second_commit = await store.commit(\"second commit\")\n", "second_commit" ] }, { "cell_type": "markdown", "id": "fc5ae3f3-3add-45e7-8f5d-2e5909a6d579", "metadata": {}, "source": [ "### View the current snapshot ID" ] }, { "cell_type": "code", "execution_count": 8, "id": "34fff29b-2bec-490c-89ef-51e14fb4527f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'45AE3AT46RHZCZ50HWEG'" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "store.snapshot_id" ] }, { "cell_type": "markdown", "id": "c23784f4-7476-4e11-ac27-c5d8f98070b3", "metadata": {}, "source": [ "### Time-travel to a snapshot" ] }, { "cell_type": "markdown", "id": "5b4d922f-aea5-45f8-a9b5-2293ee46d3ba", "metadata": {}, "source": [ "Here's where we are:" ] }, { "cell_type": "code", "execution_count": 9, "id": "ff66cc99-84ca-4371-b63d-12efa6e98dc3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('45AE3AT46RHZCZ50HWEG', {'attr': 'second_attr'})" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "store.snapshot_id, dict(root_group.attrs)" ] }, { "cell_type": "code", "execution_count": 10, "id": "e785d9a1-36ec-4207-b334-20e0a68e3ac8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'attr': 'first_attr'}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "await store.checkout(snapshot_id=first_commit)\n", "root_group = zarr.group(store=store)\n", "dict(root_group.attrs)" ] }, { "cell_type": "markdown", "id": "2b549569-029d-4184-ba98-43c5288605a5", "metadata": {}, "source": [ "### Snapshotting is only allowed at the tip of a branch\n", "\n", "TODO: need better error message" ] }, { "cell_type": "code", "execution_count": 11, "id": "257215f2-fa09-4730-a1da-07a4d3d12b0c", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "store error: all commits must be made on a branch", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m root_group\u001b[38;5;241m.\u001b[39mattrs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mattr\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwill_fail\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m store\u001b[38;5;241m.\u001b[39mcommit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthis should fail\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "File \u001b[0;32m~/Developer/icechunk/icechunk-python/python/icechunk/__init__.py:261\u001b[0m, in \u001b[0;36mIcechunkStore.commit\u001b[0;34m(self, message)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcommit\u001b[39m(\u001b[38;5;28mself\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 256\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Commit any uncommitted changes to the store.\u001b[39;00m\n\u001b[1;32m 257\u001b[0m \n\u001b[1;32m 258\u001b[0m \u001b[38;5;124;03m This will create a new snapshot on the current branch and return\u001b[39;00m\n\u001b[1;32m 259\u001b[0m \u001b[38;5;124;03m the snapshot id.\u001b[39;00m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 261\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_store\u001b[38;5;241m.\u001b[39mcommit(message)\n", "\u001b[0;31mValueError\u001b[0m: store error: all commits must be made on a branch" ] } ], "source": [ "root_group.attrs[\"attr\"] = \"will_fail\"\n", "await store.commit(\"this should fail\")" ] }, { "cell_type": "markdown", "id": "0fc2f749-19cd-46ff-9619-4d3091cd5bfa", "metadata": {}, "source": [ "## Branching" ] }, { "cell_type": "markdown", "id": "a4b22149-9877-40e7-a470-90505b5b3e81", "metadata": {}, "source": [ "\n", "### View the current branch\n", "\n", "TODO: why is this None" ] }, { "cell_type": "code", "execution_count": 12, "id": "4ccb3c84-787a-43c4-b9af-606e6b8212ed", "metadata": {}, "outputs": [], "source": [ "store.branch" ] }, { "cell_type": "markdown", "id": "7e80c8a1-6fba-4aba-aa04-4fcc9cf3c7e9", "metadata": {}, "source": [ "### Create a new branch\n", "\n", "We will create a new branch starting at `first_commit`" ] }, { "cell_type": "code", "execution_count": 13, "id": "81f676de-48f7-4dd1-bbf9-300f97700f32", "metadata": {}, "outputs": [], "source": [ "await store.reset()" ] }, { "cell_type": "code", "execution_count": 14, "id": "94b4e4d8-767a-45d0-9f4f-b0a473e9520a", "metadata": {}, "outputs": [], "source": [ "assert store.snapshot_id == first_commit" ] }, { "cell_type": "code", "execution_count": 15, "id": "b35947c7-e634-41e7-a78e-89447a5f4f8e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'51MXCR5RTNGPC54Z7WJG'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "await store.new_branch(\"new-branch\")" ] }, { "cell_type": "code", "execution_count": 16, "id": "121d2b55-9311-4f1a-813a-c6c49bbc4a4f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'new-branch'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "store.branch" ] }, { "cell_type": "code", "execution_count": 17, "id": "799cfce7-7385-4ae6-8868-77d4789c5cdb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'attr': 'first_attr'}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "root_group = zarr.group(store=store)\n", "dict(root_group.attrs)" ] }, { "cell_type": "code", "execution_count": 18, "id": "c3484aba-d25b-4d26-aa59-714e1f236d24", "metadata": {}, "outputs": [], "source": [ "root_group.attrs[\"attr\"] = \"new_branch_attr\"\n", "new_branch_commit = await store.commit(\"commit on new branch\")" ] }, { "cell_type": "markdown", "id": "56beea9e-7c37-4d7b-ae38-46a19d8ad7bc", "metadata": {}, "source": [ "### Switch branches\n", "\n", "Use the `branch` argument of `checkout` to switch to a different branch" ] }, { "cell_type": "code", "execution_count": 19, "id": "5ed7e6ed-47db-4542-b773-6ab128a10395", "metadata": {}, "outputs": [], "source": [ "await store.checkout(branch=\"main\")" ] }, { "cell_type": "code", "execution_count": 20, "id": "b495cfbf-3f82-4f8c-9943-119e6a69dafb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "store.snapshot_id == second_commit" ] }, { "cell_type": "code", "execution_count": 21, "id": "c80da99a-a78f-419f-a92d-bcf770c0db53", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "await store.checkout(branch=\"new-branch\")\n", "store.snapshot_id == new_branch_commit" ] }, { "cell_type": "markdown", "id": "9f7ef9de-345a-4266-a475-a0635c4cca9f", "metadata": {}, "source": [ "## Tagging" ] }, { "cell_type": "markdown", "id": "acbda012-f712-44d2-9397-85c26efdfc0c", "metadata": {}, "source": [ "### Creating a new tag" ] }, { "cell_type": "code", "execution_count": 22, "id": "47a4a3a2-0ae2-4e93-a1a6-a2d4706339db", "metadata": {}, "outputs": [], "source": [ "await store.checkout(branch=\"main\")" ] }, { "cell_type": "markdown", "id": "359345d9-0757-4c2d-a0d8-2b608cca0508", "metadata": {}, "source": [ "TODO: use current snapshot_id by default?" ] }, { "cell_type": "code", "execution_count": 23, "id": "6366cb69-cb02-4b61-9607-dc4b0ba08517", "metadata": {}, "outputs": [], "source": [ "await store.tag(\"v0\", snapshot_id=store.snapshot_id)" ] }, { "cell_type": "code", "execution_count": 24, "id": "81da441c-ccab-43c2-b50e-0588fb4c91bc", "metadata": {}, "outputs": [], "source": [ "await store.tag(\"v-1\", snapshot_id=first_commit)" ] }, { "cell_type": "markdown", "id": "09d79aa7-a6a5-4ba6-a843-f1d0da313c3f", "metadata": {}, "source": [ "### Time-travel to a tag\n", "\n", "Pass the `tag` argument to `checkout`" ] }, { "cell_type": "code", "execution_count": 25, "id": "dc509bde-2510-48f1-90b0-69a065393ced", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "await store.checkout(tag=\"v-1\")\n", "store.snapshot_id == first_commit" ] }, { "cell_type": "code", "execution_count": 26, "id": "b8b221c1-d94d-4971-97b9-ffa1948ce93d", "metadata": {}, "outputs": [], "source": [ "await store.checkout(branch=\"main\")" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }