{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import matplotlib\n", "import numpy as np\n", "from mpmath import mp\n", "import matplotlib.pyplot as plt\n", "from mpl_toolkits.axes_grid1 import host_subplot\n", "import mpl_toolkits.axisartist as AA\n", "import math\n", "import warnings\n", "from ipywidgets import interact, interactive, fixed, interact_manual\n", "plt.rcParams['figure.dpi'] = 180\n", "plt.rcParams['figure.figsize'] = [12.0, 8.0]\n", "plt.rcParams['text.latex.unicode'] = True\n", "plt.rcParams['text.usetex'] = True\n", "plt.rcParams['mathtext.fontset'] = 'stix'\n", "plt.rcParams['font.family'] = 'STIXGeneral'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "log_sizes = np.array([i for i in range(1,35)])\n", "sizes = np.array([2**i for i in range(1,35)])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Register file size is speculative\n", "# Defaults are for AWS EC2 c5.2xlarge, an \"Intel(R) Xeon(R) Platinum 8124M CPU @ 3.00GHz\"\n", "def init_plot(reg_file=128 * 8, l1=32*1024, l2=1024**2, l3=24 * 1024**2, ram=16 * 1024**3, disk=256 * 1024**3):\n", " plt.xlabel('Size $\\\\left[\\\\log_2 n\\\\right]$')\n", " plt.xscale('log')\n", " plt.xticks(sizes, [str(n) for n in log_sizes])\n", " plt.gca().xaxis.set_minor_locator(plt.NullLocator())\n", " \n", " def mem_line(size, label):\n", " plt.axvline(size / 32, color='grey', linestyle='--')\n", " plt.text(size / 32, 100, label)\n", " \n", " mem_line(reg_file, \"REG\")\n", " mem_line(l1, \"L1\")\n", " mem_line(l2, \"L2\")\n", " mem_line(l3, \"L3\")\n", " mem_line(ram, \"RAM\")\n", " mem_line(disk, \"DISK\")\n", "\n", " plt.yscale('log')\n", " plt.ylabel('Time $\\\\left[\\\\mathtt{sec}\\\\right]$')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def series(data, color='tab:blue'):\n", " plt.plot(sizes[:len(data)], data, color=color, marker='.')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Benchmark c5.2xlarge" ] }, { "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true, "toc-nb-collapsed": true }, "source": [ "On AWS EC2 instance type `c5.2xlarge`. Root drive size increased to 256GB and a 64GB swapfile is added. Using `--allocation heap`." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft = np.fromstring(\"\"\"\n", "1\t0.00000531\n", "2\t0.000004477\n", "3\t0.000007344\n", "4\t0.000009439\n", "5\t0.000013781\n", "6\t0.000023936\n", "7\t0.000042144\n", "8\t0.000077292\n", "9\t0.000149075\n", "10\t0.000309063\n", "11\t0.000690227\n", "12\t0.001507601\n", "13\t0.00330798\n", "14\t0.00717378\n", "15\t0.002854655\n", "16\t0.005344267\n", "17\t0.010803624\n", "18\t0.022976793\n", "19\t0.046437339\n", "20\t0.097819326\n", "21\t0.203701168\n", "22\t0.424807287\n", "23\t0.907349772\n", "24\t1.858657419\n", "25\t4.064888369\n", "26\t8.003252785\n", "27\t17.67193235\n", "28\t34.221835982\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft_sqrt = np.fromstring(\"\"\"\n", "1\t0.000008935\n", "2\t0.000038013\n", "3\t0.000023374\n", "4\t0.000026747\n", "5\t0.000029887\n", "6\t0.00004039\n", "7\t0.000048373\n", "8\t0.000072872\n", "9\t0.000123616\n", "10\t0.000250539\n", "11\t0.000507387\n", "12\t0.001089775\n", "13\t0.002354191\n", "14\t0.005096965\n", "15\t0.010595209\n", "16\t0.022575648\n", "17\t0.0473638\n", "18\t0.100595503\n", "19\t0.211823046\n", "20\t0.446282983\n", "21\t0.938822169\n", "22\t1.9707616190000001\n", "23\t4.142269804\n", "24\t8.607713068\n", "25\t18.171836223\n", "26\t37.403999458\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft_rec = np.fromstring(\"\"\"\n", "1\t0.000006159\n", "2\t0.000004487\n", "3\t0.000006596\n", "4\t0.000008758\n", "5\t0.000011726\n", "6\t0.000026389\n", "7\t0.000039159\n", "8\t0.000073156\n", "9\t0.000148793\n", "10\t0.00030885\n", "11\t0.000692644\n", "12\t0.001506197\n", "13\t0.003307426\n", "14\t0.00717706\n", "15\t0.015458752\n", "16\t0.033280211\n", "17\t0.071393263\n", "18\t0.152663299\n", "19\t0.325223417\n", "20\t0.691438377\n", "21\t1.455103663\n", "22\t3.058479201\n", "23\t6.4278282\n", "24\t13.515026047\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "init_plot()\n", "series(fft, 'tab:red')\n", "series(fft_sqrt, 'tab:orange')\n", "series(fft_rec, 'tab:blue')" ] }, { "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true, "toc-nb-collapsed": true }, "source": [ "## Benchmark c5.2xlarge (64MiB RAM)" ] }, { "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true, "toc-nb-collapsed": true }, "source": [ "On AWS EC2 instance type `c5.2xlarge`. Root drive size increased to 256GiB and a 64GiB swapfile is added.\n", "\n", "```\n", "L1 Instruction-Cache: (32 KiB, 8-way associativity, direct-mapped)\n", "L1 Data-Cache: (32 KiB, 8-way associativity, direct-mapped)\n", "L2 Unified-Cache: (1024 KiB, 16-way associativity, direct-mapped)\n", "L3 Unified-Cache: (24 MiB, 11-way associativity, hash-based-mapping)\n", "```\n", "\n", "Memory is restricted to 64MiB RAM using cgroups:\n", "\n", "```\n", "sudo cgcreate -t $USER:$USER -a $USER:$USER -g memory:limited\n", "echo 67108864 > /sys/fs/cgroup/memory/limited/memory.limit_in_bytes\n", "cgexec -g memory:limited ./fft\n", "```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft_heap = np.fromstring(\"\"\"\n", "1\t0.00034372\n", "2\t0.000039486\n", "3\t0.000035912\n", "4\t0.000037605\n", "5\t0.000041504\n", "6\t0.000042154\n", "7\t0.000055761\n", "8\t0.000055802\n", "9\t0.000086509\n", "10\t0.000104544\n", "11\t0.000249147\n", "12\t0.000277132\n", "13\t0.000917944\n", "14\t0.001096818\n", "15\t0.00341159\n", "16\t0.004738028\n", "17\t0.015229078\n", "18\t0.020695995\n", "19\t0.070773651\n", "20\t0.091959886\n", "21\t30.928260041\n", "22\t40.091185241\n", "23\t163.181767742\n", "24\t184.1793349\n", "25\t832.334289622\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft_mmap = np.fromstring(\"\"\"\n", "1\t0.000327282\n", "2\t0.000035651\n", "3\t0.000028616\n", "4\t0.000040656\n", "5\t0.000043662\n", "6\t0.000036332\n", "7\t0.000054138\n", "8\t0.000056564\n", "9\t0.000078225\n", "10\t0.000108604\n", "11\t0.000244958\n", "12\t0.000287139\n", "13\t0.000859705\n", "14\t0.001116579\n", "15\t0.003552164\n", "16\t0.004715079\n", "17\t0.015413409\n", "18\t0.021136953\n", "19\t0.073038566\n", "20\t0.092504724\n", "21\t76.226779587\n", "22\t68.887253346\n", "23\t726.08383654\n", "24\t527.209818911\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "transpose_heap = np.fromstring(\"\"\"\n", "1\t0.000000407\n", "2\t0.000000201\n", "3\t0.000000508\n", "4\t0.000000106\n", "5\t0.000000378\n", "6\t0.000000184\n", "7\t0.000004731\n", "8\t0.000000604\n", "9\t0.000007464\n", "10\t0.000001546\n", "11\t0.000031108\n", "12\t0.000005326\n", "13\t0.00012615\n", "14\t0.000028176\n", "15\t0.000708114\n", "16\t0.000138124\n", "17\t0.003178672\n", "18\t0.000945967\n", "19\t0.015287759\n", "20\t0.005595849\n", "21\t10.570897667\n", "22\t11.281882518\n", "23\t48.99322635\n", "24\t44.853152764\n", "25\t685.421398633\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "transpose_mmap = np.fromstring(\"\"\"\n", "1\t0.000001035\n", "2\t0.000000202\n", "3\t0.000000511\n", "4\t0.000000152\n", "5\t0.000000656\n", "6\t0.000000288\n", "7\t0.000007064\n", "8\t0.000000804\n", "9\t0.000011185\n", "10\t0.000002899\n", "11\t0.000031892\n", "12\t0.00000538\n", "13\t0.000134255\n", "14\t0.000028246\n", "15\t0.000710412\n", "16\t0.000142058\n", "17\t0.003230439\n", "18\t0.001012398\n", "19\t0.015829714\n", "20\t0.005500206\n", "21\t10.533343567\n", "22\t11.358831156\n", "23\t48.803206975\n", "24\t44.780711319\n", "25\t186.202560067\n", "26\t145.289483772\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "init_plot(ram=64*1024**2)\n", "series(fft_heap, 'tab:red')\n", "series(fft_mmap, 'tab:blue')\n", "series(transpose_heap, 'tab:orange')\n", "series(transpose_mmap, 'tab:cyan')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Benchmark single thread" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "c5.2xlarge" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft = np.fromstring(\"\"\"\n", "1\t0.000010837\n", "2\t0.00003163\n", "3\t0.000021725\n", "4\t0.000027168\n", "5\t0.000025975\n", "6\t0.000034325\n", "7\t0.000043394\n", "8\t0.000069395\n", "9\t0.00013186\n", "10\t0.000252507\n", "11\t0.000502172\n", "12\t0.001078727\n", "13\t0.00234277\n", "14\t0.00504515\n", "15\t0.010528185\n", "16\t0.022451856\n", "17\t0.046697365\n", "18\t0.099328349\n", "19\t0.207932491\n", "20\t0.444358145\n", "21\t0.93529772\n", "22\t1.964368904\n", "23\t4.132257624\n", "24\t8.600371875\n", "25\t18.178107202\n", "26\t37.357505465\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft_iterative = np.fromstring(\"\"\"\n", "1\t0.000005481\n", "2\t0.000004502\n", "3\t0.00000692\n", "4\t0.000008215\n", "5\t0.000011034\n", "6\t0.000020269\n", "7\t0.000032115\n", "8\t0.000056194\n", "9\t0.000098589\n", "10\t0.000203465\n", "11\t0.000447601\n", "12\t0.000986478\n", "13\t0.002059617\n", "14\t0.004453699\n", "15\t0.009803471\n", "16\t0.022309177\n", "17\t0.047588306\n", "18\t0.106476312\n", "19\t0.26432274\n", "20\t0.60108076\n", "21\t1.2733614389999999\n", "22\t2.688171277\n", "23\t5.689161517\n", "24\t12.132268398\n", "25\t25.850465222\n", "26\t55.871878074\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft_depth_first = np.fromstring(\"\"\"\n", "1\t0.000005586\n", "2\t0.000005259\n", "3\t0.000009514\n", "4\t0.000010033\n", "5\t0.000013038\n", "6\t0.000027076\n", "7\t0.000037728\n", "8\t0.000062199\n", "9\t0.000136259\n", "10\t0.000282032\n", "11\t0.000628744\n", "12\t0.001382235\n", "13\t0.003047723\n", "14\t0.006714292\n", "15\t0.014992353\n", "16\t0.033965664\n", "17\t0.073531157\n", "18\t0.170479579\n", "19\t0.409346494\n", "20\t1.049937156\n", "21\t2.444037146\n", "22\t5.505871476\n", "23\t12.131428218\n", "24\t26.768131456\n", "25\t61.079266758\n", "26\t133.57830445\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft_recursive = np.fromstring(\"\"\"\n", "1\t0.000005541\n", "2\t0.000004514\n", "3\t0.000004572\n", "4\t0.000005506\n", "5\t0.000008072\n", "6\t0.000014245\n", "7\t0.000031183\n", "8\t0.00006486\n", "9\t0.000143123\n", "10\t0.000324709\n", "11\t0.000689698\n", "12\t0.00150638\n", "13\t0.003293801\n", "14\t0.007106039\n", "15\t0.01528987\n", "16\t0.032758035\n", "17\t0.069945614\n", "18\t0.148304918\n", "19\t0.315367176\n", "20\t0.670398505\n", "21\t1.417594193\n", "22\t2.980605325\n", "23\t6.24611864\n", "24\t13.096406455\n", "25\t27.498664066\n", "26\t57.386700655\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "init_plot()\n", "series(fft, 'black')\n", "series(fft_iterative, 'tab:blue')\n", "series(fft_depth_first, 'tab:orange')\n", "series(fft_recursive, 'tab:pink')\n", "series(fft2_heap, 'tab:cyan')" ] }, { "cell_type": "markdown", "metadata": { "toc-hr-collapsed": true, "toc-nb-collapsed": true }, "source": [ "## Memory access pattern" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def fft_df(values, size, offset, stride, loop):\n", " if size == 1:\n", " values += [offset]\n", " else:\n", " if stride == loop and loop < 128:\n", " fft_df(values, size // 2, offset, 2 * stride, 2 * loop)\n", " else:\n", " fft_df(values, size // 2, offset, 2 * stride, loop)\n", " fft_df(values, size // 2, offset + stride, 2 * stride, loop)\n", " for i in range(size // 2):\n", " for j in range(loop):\n", " values += [offset + 2 * i * stride + j]\n", " values += [offset + 2 * i * stride + j + stride]\n", " return values" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "a = fft_df([], 16384, 0, 1, 1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.plot(a, linestyle='', marker='.')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "2**15 / 64 / 8" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Threads" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fft_threads = np.fromstring(\"\"\"\n", "24\t8.602388099\n", "24\t4.433958019\n", "24\t3.055124852\n", "24\t2.35013958\n", "24\t2.18961738\n", "24\t2.051058217\n", "24\t1.933472554\n", "24\t1.826348783\n", "24\t1.8278633229999999\n", "24\t1.837789269\n", "\"\"\", sep=' ').reshape((-1, 2))[:,1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "threads = np.array(range(1,20))\n", "plt.xticks(threads, [str(n) for n in threads])\n", "plt.axvline(4, color='grey', linestyle='--')\n", "plt.text(4, 1, 'Cores')\n", "plt.axvline(8, color='grey', linestyle='--')\n", "plt.text(8, 1, 'Hyper threads')\n", "plt.plot(threads[:len(fft_threads)], fft_threads * threads[:len(fft_threads)] / fft_threads[0], marker = '.')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "threads[:len(fft_threads)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "2.99\n", "\n", "\n", "$69.7 billion" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "104 / 2.3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }