{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "6ab2335c", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 38, "id": "f7db5c05", "metadata": {}, "outputs": [], "source": [ "labels = pd.read_csv(\"tests/data/citeseer/nodes.tsv\", sep=\"\\t\")[[\"node_type\"]]\n", "labels.to_csv(\"tests/data/citeseer/node_list.csv\", header=None, index=None)" ] }, { "cell_type": "code", "execution_count": 47, "id": "a5657bca", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "node_type\r\n", "Agents\r\n", "IR\r\n", "DB\r\n", "AI\r\n", "HCI\r\n", "ML\r\n", "Word\r\n", "Unknown\r\n" ] } ], "source": [ "!head /bfd/graphs/linqs/CiteSeer/latest/preprocessed/undirected/733d2a25a673d8b777ecd366efd2cef45483474937ae74abfe53ef1cdd791b6d/node_types.tsv" ] }, { "cell_type": "code", "execution_count": 41, "id": "df8e68b7", "metadata": {}, "outputs": [], "source": [ "edges = pd.read_csv(\"tests/data/citeseer/edges.tsv\", sep=\"\\t\", header=None)[[0, 1]]\n", "#labels.to_csv(\"tests/data/citeseer/edge_list.csv\", header=None, index=None)" ] }, { "cell_type": "code", "execution_count": 53, "id": "81f44615", "metadata": {}, "outputs": [], "source": [ "edges[((edges < 3312).all(axis=1)) & (edges[0] != edges[1])].to_csv(\"tests/data/citeseer/edge_list.csv\", header=None, index=None)" ] }, { "cell_type": "code", "execution_count": 19, "id": "fb766d80", "metadata": {}, "outputs": [], "source": [ "edges = pd.read_csv(\"tests/data/cora/edge_list.csv\", header=None)" ] }, { "cell_type": "code", "execution_count": 1, "id": "720ec18a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3412" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "12 * 4**4 + 4**4+ 4**3 + 4**2 + 4" ] }, { "cell_type": "code", "execution_count": 21, "id": "f3623a4d", "metadata": {}, "outputs": [ { "ename": "IndexError", "evalue": "index 4140 is out of bounds for axis 0 with size 4140", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/tmp/ipykernel_2466193/876094773.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0medges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m7\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mIndexError\u001b[0m: index 4140 is out of bounds for axis 0 with size 4140" ] } ], "source": [ "edges[labels.values[edges[0]] == 7]" ] }, { "cell_type": "code", "execution_count": 32, "id": "780c2e3e", "metadata": {}, "outputs": [], "source": [ "edges[(edges < 2708).all(axis=1)].to_csv(\"tests/data/cora/edge_list.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 23, "id": "83f569aa", "metadata": {}, "outputs": [ { "ename": "IndexError", "evalue": "index 4140 is out of bounds for axis 0 with size 4140", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/tmp/ipykernel_2466193/471396866.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mlabels\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0medges\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m7\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mIndexError\u001b[0m: index 4140 is out of bounds for axis 0 with size 4140" ] } ], "source": [ "labels.values[edges[0]] == 7" ] }, { "cell_type": "code", "execution_count": 24, "id": "26edc03f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
001
101
208
3014
40258
.........
10898541401772
10898641401859
10898741401894
10898841402228
10898941402400
\n", "

108990 rows × 2 columns

\n", "
" ], "text/plain": [ " 0 1\n", "0 0 1\n", "1 0 1\n", "2 0 8\n", "3 0 14\n", "4 0 258\n", "... ... ...\n", "108985 4140 1772\n", "108986 4140 1859\n", "108987 4140 1894\n", "108988 4140 2228\n", "108989 4140 2400\n", "\n", "[108990 rows x 2 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "edges" ] }, { "cell_type": "code", "execution_count": null, "id": "a942bcbc", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }