{ "cells": [ { "cell_type": "markdown", "id": "d38f2066-411f-4c74-bf6f-aaa54b3472f5", "metadata": {}, "source": [ "# Icechunk with dummy data\n", "\n", "This demo illustrates how to use Icechunk as a Zarr store" ] }, { "cell_type": "code", "execution_count": 2, "id": "2abaa80e-f07f-4e6d-b322-0ed280ec77e8", "metadata": {}, "outputs": [], "source": [ "import math\n", "\n", "import numpy as np\n", "import zarr\n", "from icechunk import IcechunkStore, StorageConfig" ] }, { "cell_type": "markdown", "id": "0a382ed5-cd95-481a-b692-c1010c492cff", "metadata": {}, "source": [ "## Create a new Zarr store backed by Icechunk\n", "\n", "This example uses an in-memory store." ] }, { "cell_type": "code", "execution_count": 3, "id": "4f40240b-eb4b-408b-8fd9-bb4e5a60a34d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "store = await IcechunkStore.create(\n", " storage=StorageConfig.memory(\"icechunk-demo\"),\n", " mode=\"w\",\n", ")\n", "store" ] }, { "cell_type": "markdown", "id": "0dc0d6b9-92be-42f0-9cc6-6930954deb3f", "metadata": {}, "source": [ "This dictionary will contain array paths and data that were written to Icechunk, so that we can check correctness." ] }, { "cell_type": "code", "execution_count": 4, "id": "8fa3197a-0674-431f-9dc1-c59fab055cc0", "metadata": {}, "outputs": [], "source": [ "expected = {}" ] }, { "cell_type": "markdown", "id": "cba285dc-d153-47f7-b854-4eb67b09995a", "metadata": {}, "source": [ "These two utility functions generate and write dummy array data to a group." ] }, { "cell_type": "code", "execution_count": 5, "id": "9690e567-2494-4421-bf47-d9e442c4975f", "metadata": {}, "outputs": [], "source": [ "def generate_array_chunks(size: int, dtype=np.int32):\n", " # dim sizes\n", " nz = 64\n", " nt = 128\n", " nx = ny = int(math.sqrt(size / nz / nt))\n", "\n", " # chunk sizes\n", " ct = 2\n", " cz = 8\n", " cx = max(nx // 3, 1)\n", " cy = max(ny // 2, 1)\n", " chunk_shape = (cx, cy, cz, ct)\n", " shape = (nx, ny, nz, nt)\n", "\n", " array = np.arange(nx * ny * nz * nt, dtype=dtype).reshape(shape)\n", "\n", " return array, chunk_shape\n", "\n", "\n", "def create_array(*, group, name, size, dtype, fill_value) -> np.ndarray:\n", " dims = (\"x\", \"y\", \"z\", \"t\")\n", " attrs = {\"description\": \"icechunk test data\"}\n", "\n", " array, chunk_shape = generate_array_chunks(size=size, dtype=dtype)\n", "\n", " group.create_array(\n", " name=name,\n", " shape=array.shape,\n", " dtype=dtype,\n", " fill_value=fill_value,\n", " chunk_shape=chunk_shape,\n", " dimension_names=dims,\n", " attributes=attrs,\n", " data=array,\n", " exists_ok=True,\n", " )\n", "\n", " return array" ] }, { "cell_type": "markdown", "id": "68a37c53-0c5c-44dd-a691-f37541c264d2", "metadata": {}, "source": [ "## A versioned transactional Zarr store\n", "\n", "### Open the root group, write an attribute, commit" ] }, { "cell_type": "code", "execution_count": 6, "id": "34a6780e-379d-47ec-bc2a-b599cfab105a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'foo': 'foo'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "root_group = zarr.group(store=store, overwrite=True)\n", "root_group.attrs[\"foo\"] = \"foo\"\n", "dict(root_group.attrs) # check that it was written" ] }, { "cell_type": "markdown", "id": "6289c6c8-41e7-40a4-a451-074f72e12c98", "metadata": {}, "source": [ "Commit that change" ] }, { "cell_type": "code", "execution_count": 7, "id": "d43060dd-6678-45f0-91ed-6786dea6cfa7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'2471WQXNN789CTT07172HDDBQG'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first_commit = await store.commit(\"wrote a root group attribute\")\n", "first_commit" ] }, { "cell_type": "markdown", "id": "f10f79b2-c44c-4842-84ad-f301934464f6", "metadata": {}, "source": [ "### Add a array to the root group\n", "\n", "We save the created array in `expected` to check that the write was correct (later)." ] }, { "cell_type": "code", "execution_count": 8, "id": "bcaf1dec-65de-4572-ac05-a470ce45e100", "metadata": {}, "outputs": [], "source": [ "expected[\"root-foo\"] = create_array(\n", " group=root_group,\n", " name=\"root-foo\",\n", " size=1 * 1024 * 256,\n", " dtype=np.int32,\n", " fill_value=-1,\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "id": "aae0f3e8-2db7-437a-8d67-11f07aa47d14", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "store_path \n", "{\n", " \"shape\": [\n", " 5,\n", " 5,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n", "(('root-foo', /root-foo shape=(5, 5, 64, 128) dtype=int32>),)\n" ] } ], "source": [ "print(root_group.members())" ] }, { "cell_type": "code", "execution_count": 10, "id": "59254f22-48f2-4629-aec8-546cb12e8f02", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"shape\": [\n", " 5,\n", " 5,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n" ] }, { "data": { "text/plain": [ "{'description': 'icechunk test data'}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dict(root_group[\"root-foo\"].attrs)" ] }, { "cell_type": "code", "execution_count": 11, "id": "78b45ec7-ead8-46c5-b553-476abbd2bca4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"shape\": [\n", " 5,\n", " 5,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n" ] } ], "source": [ "root_group[\"root-foo\"].attrs[\"update\"] = \"new attr\"" ] }, { "cell_type": "code", "execution_count": 12, "id": "2399312c-d53f-443f-8be1-b8702ba6513e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'SQ4T6Y45DXY9F0EYXE7ECBWHPC'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "second_commit = await store.commit(\"added array, updated attr\")\n", "second_commit" ] }, { "cell_type": "code", "execution_count": 13, "id": "edad201d-d9b3-4825-887a-1e6b3bf07e57", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"shape\": [\n", " 5,\n", " 5,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\",\n", " \"update\": \"new attr\"\n", " }\n", "}\n", "store_path \n", "{\n", " \"shape\": [\n", " 5,\n", " 5,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\",\n", " \"update\": \"new attr\"\n", " }\n", "}\n" ] } ], "source": [ "assert len(root_group[\"root-foo\"].attrs) == 2\n", "assert len(root_group.members()) == 1" ] }, { "cell_type": "markdown", "id": "f2b6a3c6-9518-4ec4-921f-5303d4e851c7", "metadata": {}, "source": [ "### Commiting when not on `HEAD` will fail." ] }, { "cell_type": "code", "execution_count": 14, "id": "d904f719-98cf-4f51-8e9a-1631dcb3fcba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "store error: all commits must be made on a branch\n" ] } ], "source": [ "await store.checkout(first_commit)\n", "root_group.attrs[\"update\"] = \"new attr 2\"\n", "\n", "try:\n", " await store.commit(\"new attr 2\")\n", "except ValueError as e:\n", " print(e)\n", "else:\n", " raise ValueError(\"should have failed\")" ] }, { "cell_type": "markdown", "id": "7b135e56-d2c6-43dd-9222-7ecd251b1a7b", "metadata": {}, "source": [ "### Checkout `HEAD`, make a change, and commit." ] }, { "cell_type": "code", "execution_count": 19, "id": "d31009db-8f99-48f1-b7bb-3f66875575cc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"shape\": [\n", " 5,\n", " 5,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\",\n", " \"update\": \"new attr\"\n", " }\n", "}\n" ] }, { "data": { "text/plain": [ "'DZNCW2X281JE5PXSE15N85THAW'" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "await store.reset()\n", "await store.checkout(branch=\"main\")\n", "root_group[\"root-foo\"].attrs[\"update\"] = \"new attr 2\"\n", "third_commit = await store.commit(\"new attr 2\")\n", "third_commit" ] }, { "cell_type": "code", "execution_count": 21, "id": "03f8d62b-d8a7-452c-b086-340bfcb76d50", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'DP8CF4KE7XYHVD0H1GPZ8H58V0'" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "root_group.attrs[\"update\"] = \"new attr 2\"\n", "fourth_commit = await store.commit(\"rewrote array\")\n", "fourth_commit" ] }, { "cell_type": "markdown", "id": "985f6512-1b90-452d-ad51-955f7ca78f1e", "metadata": {}, "source": [ "### Create a hierarchy" ] }, { "cell_type": "code", "execution_count": 22, "id": "aee87354-4c44-4428-a4bf-d38d99b7e608", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'root-foo': dtype('int32')}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "{k: v.dtype for k, v in expected.items()}" ] }, { "cell_type": "code", "execution_count": 23, "id": "f389f3f9-03d5-4625-9856-145e065785f2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'3ZBWXTZEYPJH8MEZVKM5MW7S0G'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "newgroup = zarr.group(store=store, path=\"group1/\")\n", "expected[\"group1/foo1\"] = create_array(\n", " group=newgroup, name=\"foo1\", dtype=np.float32, size=1 * 1024 * 128, fill_value=-1234\n", ")\n", "expected[\"group1/foo2\"] = create_array(\n", " group=newgroup, name=\"foo2\", dtype=np.float16, size=1 * 1024 * 64, fill_value=-1234\n", ")\n", "newgroup = zarr.group(store=store, path=\"group2/\")\n", "expected[\"group2/foo3\"] = create_array(\n", " group=newgroup, name=\"foo3\", dtype=np.int64, size=1 * 1024 * 32, fill_value=-1234\n", ")\n", "fifth_commit = await store.commit(\"added groups and arrays\")\n", "fifth_commit" ] }, { "cell_type": "markdown", "id": "f2edb658-d9ff-43d3-b028-2aebcf08c86a", "metadata": {}, "source": [ "### Overwrite an array" ] }, { "cell_type": "code", "execution_count": 24, "id": "bc9d1ef4-2c06-4147-ad4d-9e8051ac4ea8", "metadata": {}, "outputs": [], "source": [ "expected[\"root-foo\"] = create_array(\n", " group=root_group,\n", " name=\"root-foo\",\n", " size=1 * 1024 * 128,\n", " dtype=np.int32,\n", " fill_value=-1,\n", ")" ] }, { "cell_type": "code", "execution_count": 25, "id": "4264bbfa-4193-45e9-bc82-932f488bff28", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'B33DVM1FBXFYB0S1EVH8SHG29G'" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "await store.commit(\"overwrote root-foo\")" ] }, { "cell_type": "markdown", "id": "30217cff-d99e-4397-9c1a-a01d1c39c157", "metadata": {}, "source": [ "### Examine the hierarchy" ] }, { "cell_type": "code", "execution_count": 26, "id": "895faf9f-c1ec-4b9b-9676-f6b1745d73de", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "store_path \n", "{\n", " \"shape\": [\n", " 4,\n", " 4,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n" ] }, { "data": { "text/plain": [ "(('group1',\n", " Group(_async_group=/group1>)),\n", " ('root-foo',\n", " /root-foo shape=(4, 4, 64, 128) dtype=int32>),\n", " ('group2',\n", " Group(_async_group=/group2>)))" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "root_group.members()" ] }, { "cell_type": "code", "execution_count": 27, "id": "14c5afc8-640f-464f-8ee0-b8631e0aacc7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "store_path group1\n", "{\n", " \"shape\": [\n", " 4,\n", " 4,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"float32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1234.0,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n", "{\n", " \"shape\": [\n", " 2,\n", " 2,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"float16\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 1,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1234.0,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n" ] }, { "data": { "text/plain": [ "(('foo1',\n", " /group1/foo1 shape=(4, 4, 64, 128) dtype=float32>),\n", " ('foo2',\n", " /group1/foo2 shape=(2, 2, 64, 128) dtype=float16>))" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "root_group[\"group1\"].members()" ] }, { "cell_type": "code", "execution_count": 28, "id": "1fc3f29a-5915-4c66-bfed-5b75389e44e2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "store_path group2\n", "{\n", " \"shape\": [\n", " 2,\n", " 2,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int64\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 1,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1234,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n" ] }, { "data": { "text/plain": [ "(('foo3',\n", " /group2/foo3 shape=(2, 2, 64, 128) dtype=int64>),)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "root_group[\"group2\"].members()" ] }, { "cell_type": "markdown", "id": "4ff3d952-a2e9-4065-aa3e-239bc0d04c45", "metadata": {}, "source": [ "### Append" ] }, { "cell_type": "code", "execution_count": 29, "id": "fb608382-04e8-4deb-8e2e-3f130845cf8c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"shape\": [\n", " 2,\n", " 2,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int64\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 1,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1234,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n", "/group2/foo3 shape=(2, 2, 64, 128) dtype=int64>\n", "/group2/foo3 shape=(4, 2, 64, 128) dtype=int64>\n", "[ 0 16384]\n" ] }, { "data": { "text/plain": [ "'JG601CAP09Q7P19RQ7JSH3AWNR'" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "array = root_group[\"group2/foo3\"]\n", "print(array)\n", "\n", "array = array.resize((array.shape[0] * 2, *array.shape[1:]))\n", "print(array)\n", "array[array.shape[0] // 2 :, ...] = expected[\"group2/foo3\"]\n", "print(array[2:, 0, 0, 0])\n", "expected[\"group2/foo3\"] = np.concatenate([expected[\"group2/foo3\"]] * 2, axis=0)\n", "\n", "await store.commit(\"appended to group2/foo3\")" ] }, { "cell_type": "markdown", "id": "26c1b807-ea2f-4f54-965b-c857dbef102a", "metadata": {}, "source": [ "### Check that values are correct" ] }, { "cell_type": "code", "execution_count": 30, "id": "820cb181-06cb-4ee2-af5b-f5904a147b32", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "root-foo\n", "{\n", " \"shape\": [\n", " 4,\n", " 4,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n", "numchunks: 4096\n", "0.486346960067749\n", "group1/foo1\n", "{\n", " \"shape\": [\n", " 4,\n", " 4,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"float32\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 2,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1234.0,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n", "numchunks: 4096\n", "0.42878198623657227\n", "group1/foo2\n", "{\n", " \"shape\": [\n", " 2,\n", " 2,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"float16\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 1,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1234.0,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n", "numchunks: 2048\n", "0.2633249759674072\n", "group2/foo3\n", "{\n", " \"shape\": [\n", " 4,\n", " 2,\n", " 64,\n", " 128\n", " ],\n", " \"data_type\": \"int64\",\n", " \"chunk_grid\": {\n", " \"name\": \"regular\",\n", " \"configuration\": {\n", " \"chunk_shape\": [\n", " 1,\n", " 1,\n", " 8,\n", " 2\n", " ]\n", " }\n", " },\n", " \"chunk_key_encoding\": {\n", " \"name\": \"default\",\n", " \"configuration\": {\n", " \"separator\": \"/\"\n", " }\n", " },\n", " \"fill_value\": -1234,\n", " \"codecs\": [\n", " {\n", " \"name\": \"bytes\",\n", " \"configuration\": {\n", " \"endian\": \"little\"\n", " }\n", " }\n", " ],\n", " \"dimension_names\": [\n", " \"x\",\n", " \"y\",\n", " \"z\",\n", " \"t\"\n", " ],\n", " \"attributes\": {\n", " \"description\": \"icechunk test data\"\n", " }\n", "}\n", "numchunks: 4096\n", "0.4318978786468506\n" ] } ], "source": [ "import time\n", "\n", "for key, value in expected.items():\n", " print(key)\n", " tic = time.time()\n", " array = root_group[key]\n", " assert array.dtype == value.dtype, (array.dtype, value.dtype)\n", " print(f\"numchunks: {math.prod(s // c for s, c in zip(array.shape, array.chunks, strict=False))}\")\n", " np.testing.assert_array_equal(array[:], value)\n", " print(time.time() - tic)" ] }, { "cell_type": "markdown", "id": "4c728fd3-4dc0-4b23-91c8-7cfe0537050b", "metadata": {}, "source": [ "change values of \"group1/foo1\"" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }