mirror of
https://github.com/believethehype/nostrdvm.git
synced 2025-11-19 18:56:26 +01:00
579 lines
25 KiB
Plaintext
579 lines
25 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c1935f9d-02db-468c-ade3-87f164b6d4f6",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Pagerank on subgraphs—efficient Monte-Carlo estimation\n",
|
||
"\n",
|
||
"In this repo you can find the reference code for my novel Subrank algorithm for efficiently computing the Pagerank distribution over $S$ subgraph of $G$.\n",
|
||
"For the reasoning behind the algorithm, the definition and the analysis, I invite the interested reader to [read the paper](https://pippellia.com/pippellia/Social+Graph/Pagerank+on+subgraphs%E2%80%94efficient+Monte-Carlo+estimation).\n",
|
||
"\n",
|
||
"To play with it, follow these steps:"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "bcce6ba6-2539-4bd3-85c8-608eb83bfeb3",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Step 0: Build and store the Graph"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"outputs": [],
|
||
"source": [
|
||
"# Imports\n",
|
||
"from nostr_dvm.utils.wot_utils import build_wot_network, save_network, load_network, get_mc_pagerank, get_subrank, get_metadata, print_results\n",
|
||
"import time\n",
|
||
"import networkx as nx\n",
|
||
"import random\n",
|
||
"\n",
|
||
"\n"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:00:01.633887Z",
|
||
"start_time": "2024-07-30T14:00:01.054687Z"
|
||
}
|
||
},
|
||
"id": "5ac404375ef61608"
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [],
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"id": "eadbd0b491bc2f00"
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"outputs": [],
|
||
"source": [
|
||
"user = '99bb5591c9116600f845107d31f9b59e2f7c7e09a1ff802e84f1d43da557ca64'\n",
|
||
"show_results_num = 20\n",
|
||
"use_files = False\n",
|
||
"fetch_metadata = True"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:02:33.979216Z",
|
||
"start_time": "2024-07-30T14:02:33.977432Z"
|
||
}
|
||
},
|
||
"id": "8d89d517de8b506e"
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Step 1: fetching kind 3 events from relays & pre-processing\n",
|
||
"current network: 44014 npubs\r\n",
|
||
"Finished in 58.22388768196106\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"index_map, G = await build_wot_network(user, depth=2, max_batch=500, max_time_request=10)\n",
|
||
"if use_files:\n",
|
||
" save_network(index_map, G, user)"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:01:58.889751Z",
|
||
"start_time": "2024-07-30T14:01:00.661550Z"
|
||
}
|
||
},
|
||
"id": "c5de7bf0bac361ff"
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c68b3da4-39c0-4fb0-8687-f08aea70f25d",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Step 1: load the graph database\n",
|
||
"\n",
|
||
"First, you have to load the networkx graph database into memory by running the following code."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "f5a48d91-80e1-4016-8030-9ac9ef0ab1af",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:02:05.554025Z",
|
||
"start_time": "2024-07-30T14:02:05.546180Z"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"if use_files:\n",
|
||
" # loading the database\n",
|
||
" print('loading the database...')\n",
|
||
" tic = time.time()\n",
|
||
" \n",
|
||
" index_map, G = load_network(user)\n",
|
||
" \n",
|
||
" toc = time.time()\n",
|
||
" print(f'finished in {toc-tic} seconds')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "33a1dc28-5af4-4097-9987-103520588f81",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Step 2: Compute Pagerank over $G$\n",
|
||
"\n",
|
||
"Compute the pagerank over $G$ by using the networkx built-in pagerank function that uses the power iteration method.\n",
|
||
"This vector will be considered as the real Pagerank vector and will be used to compute the errors of the Monte-Carlo algorithm."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"id": "bc5a4f83-d5b9-4def-a7f7-403cd0beedfe",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:03:25.690080Z",
|
||
"start_time": "2024-07-30T14:02:59.127942Z"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"computing global pagerank...\n",
|
||
"Don't ₿elieve the Hype 🦊(npub1nxa4tywfz9nqp7z9zp7nr7d4nchhclsf58lcqt5y782rmf2hefjquaa6q8) 8.163684335574227e-05\n",
|
||
"The: Daniel⚡️(npub1aeh2zw4elewy5682lxc6xnlqzjnxksq303gwu2npfaxd49vmde6qcq4nwx) 5.7772373353072425e-05\n",
|
||
"zach(npub10fu0hlkx3s4n4dsgfu0cpqephga4afr4qtzpz9vsyqf7vj88v2yqdp8vp4) 4.225869500489523e-05\n",
|
||
"elsat(npub1zafcms4xya5ap9zr7xxr0jlrtrattwlesytn2s42030lzu0dwlzqpd26k5) 4.740428451060313e-05\n",
|
||
"opreturnbot(npub1e30jt8crv6phnrj22gr3mwuhywrs9lak7ry94akjw0ydm0juptas5xmkwq) 2.8600984950627014e-05\n",
|
||
"Seth For Privacy(npub1tr4dstaptd2sp98h7hlysp8qle6mw7wmauhfkgz3rmxdd8ndprusnw2y5g) 4.345057953406442e-05\n",
|
||
"JeffG(npub1zuuajd7u3sx8xu92yav9jwxpr839cs0kc3q6t56vd5u9q033xmhsk6c2uc) 6.123308353585933e-05\n",
|
||
"CARLA(npub1hu3hdctm5nkzd8gslnyedfr5ddz3z547jqcl5j88g4fame2jd08qh6h8nh) 7.928631886973004e-05\n",
|
||
"Derek Ross(npub18ams6ewn5aj2n3wt2qawzglx9mr4nzksxhvrdc4gzrecw7n5tvjqctp424) 9.094182894540007e-05\n",
|
||
"DickWhitman(npub102a0auqvye3eayugvfwy44un9l477t45uck8s2p08xzpgh784uvslsh7w9) 3.6363382167542e-05\n",
|
||
"Gutenberg(npub148xfa6g0e6rqxne88vzfwqag43qk8xuugthptgkzp6qencs6ad9s3rzddg) 2.664332507864098e-05\n",
|
||
"Jeff Booth(npub1s05p3ha7en49dv8429tkk07nnfa9pcwczkf5x5qrdraqshxdje9sq6eyhe) 8.054632796072222e-05\n",
|
||
"Kanuto(npub1yp7wfa7msdpusf4vupzttttu2mx3cns7whx5cgkt4yr9pkpvujus2mzys7) 3.4660795662680846e-05\n",
|
||
"NVK(npub1az9xj85cmxv8e9j9y80lvqp97crsqdu2fpu3srwthd99qfu9qsgstam8y8) 9.447440217961344e-05\n",
|
||
"ИΛKΛDΛI(npub1sqaxzwvh5fhgw9q3d7v658ucapvfeds3dcd2587fcwyesn7dnwuqt2r45v) 4.8115435493306357e-05\n",
|
||
"NunyaBidness(npub1vwymuey3u7mf860ndrkw3r7dz30s0srg6tqmhtjzg7umtm6rn5eq2qzugd) 5.391411823278926e-05\n",
|
||
"ODELL(npub1qny3tkh0acurzla8x3zy4nhrjz5zd8l9sy9jys09umwng00manysew95gx) 0.00010958137198559553\n",
|
||
"Platte(npub1jhgmf58wdd4mwe4t95ffea079kjxc7f62ncg9gdjmwcrmy0x6x8sfd8u8c) 2.9063569661285428e-05\n",
|
||
"preston(npub1s5yq6wadwrxde4lhfs56gn64hwzuhnfa6r9mj476r5s4hkunzgzqrs6q7z) 8.571223889338872e-05\n",
|
||
"saylor(npub15dqlghlewk84wz3pkqqvzl2w2w36f97g89ljds8x6c094nlu02vqjllm5m) 8.256293517091252e-05\n",
|
||
"finished in 26.598531246185303 seconds\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# computing the pagerank\n",
|
||
"print('computing global pagerank...')\n",
|
||
"tic = time.time()\n",
|
||
"\n",
|
||
"p_G = nx.pagerank(G, tol=1e-12)\n",
|
||
" \n",
|
||
"await print_results(p_G, index_map, show_results_num, getmetadata=fetch_metadata)\n",
|
||
" \n",
|
||
"toc = time.time()\n",
|
||
"print(f'finished in {toc-tic} seconds')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "50552e62-d66d-4a02-b49f-33c299e8311b",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Step 3: Approximate Pagerank over $G$ using Monte-Carlo\n",
|
||
"\n",
|
||
"Compute the pagerank over $G$ using a simple Monte-Carlo implementation and compute the L1 error.\n",
|
||
"This step is essential because it returns the csr-matrix `walk_visited_count`, that will be used later by the Subrank algorithm."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"id": "68bbe6d2-c19a-4a84-8ba0-13ae2b6e13d6",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:05:35.375935Z",
|
||
"start_time": "2024-07-30T14:05:18.516484Z"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"progress = 100% \r\n",
|
||
"Total walks performed: 440140\n",
|
||
"Don't ₿elieve the Hype 🦊(npub1nxa4tywfz9nqp7z9zp7nr7d4nchhclsf58lcqt5y782rmf2hefjquaa6q8) 8.164129902339354e-05\n",
|
||
"The: Daniel⚡️(npub1aeh2zw4elewy5682lxc6xnlqzjnxksq303gwu2npfaxd49vmde6qcq4nwx) 4.854347509499075e-05\n",
|
||
"zach(npub10fu0hlkx3s4n4dsgfu0cpqephga4afr4qtzpz9vsyqf7vj88v2yqdp8vp4) 4.413043190453705e-05\n",
|
||
"elsat(npub1zafcms4xya5ap9zr7xxr0jlrtrattwlesytn2s42030lzu0dwlzqpd26k5) 4.63369534997639e-05\n",
|
||
"opreturnbot(npub1e30jt8crv6phnrj22gr3mwuhywrs9lak7ry94akjw0ydm0juptas5xmkwq) 3.0891302333175936e-05\n",
|
||
"Seth For Privacy(npub1tr4dstaptd2sp98h7hlysp8qle6mw7wmauhfkgz3rmxdd8ndprusnw2y5g) 4.63369534997639e-05\n",
|
||
"JeffG(npub1zuuajd7u3sx8xu92yav9jwxpr839cs0kc3q6t56vd5u9q033xmhsk6c2uc) 7.722825583293983e-05\n",
|
||
"CARLA(npub1hu3hdctm5nkzd8gslnyedfr5ddz3z547jqcl5j88g4fame2jd08qh6h8nh) 7.060869104725928e-05\n",
|
||
"Derek Ross(npub18ams6ewn5aj2n3wt2qawzglx9mr4nzksxhvrdc4gzrecw7n5tvjqctp424) 8.605434221384724e-05\n",
|
||
"DickWhitman(npub102a0auqvye3eayugvfwy44un9l477t45uck8s2p08xzpgh784uvslsh7w9) 2.8684780737949082e-05\n",
|
||
"Gutenberg(npub148xfa6g0e6rqxne88vzfwqag43qk8xuugthptgkzp6qencs6ad9s3rzddg) 2.4271737547495376e-05\n",
|
||
"Jeff Booth(npub1s05p3ha7en49dv8429tkk07nnfa9pcwczkf5x5qrdraqshxdje9sq6eyhe) 7.502173423771299e-05\n",
|
||
"Kanuto(npub1yp7wfa7msdpusf4vupzttttu2mx3cns7whx5cgkt4yr9pkpvujus2mzys7) 3.3097823928402784e-05\n",
|
||
"NVK(npub1az9xj85cmxv8e9j9y80lvqp97crsqdu2fpu3srwthd99qfu9qsgstam8y8) 9.70869501899815e-05\n",
|
||
"ИΛKΛDΛI(npub1sqaxzwvh5fhgw9q3d7v658ucapvfeds3dcd2587fcwyesn7dnwuqt2r45v) 5.074999669021761e-05\n",
|
||
"NunyaBidness(npub1vwymuey3u7mf860ndrkw3r7dz30s0srg6tqmhtjzg7umtm6rn5eq2qzugd) 6.840216945203243e-05\n",
|
||
"ODELL(npub1qny3tkh0acurzla8x3zy4nhrjz5zd8l9sy9jys09umwng00manysew95gx) 9.70869501899815e-05\n",
|
||
"Platte(npub1jhgmf58wdd4mwe4t95ffea079kjxc7f62ncg9gdjmwcrmy0x6x8sfd8u8c) 3.0891302333175936e-05\n",
|
||
"preston(npub1s5yq6wadwrxde4lhfs56gn64hwzuhnfa6r9mj476r5s4hkunzgzqrs6q7z) 8.384782061862039e-05\n",
|
||
"saylor(npub15dqlghlewk84wz3pkqqvzl2w2w36f97g89ljds8x6c094nlu02vqjllm5m) 9.70869501899815e-05\n",
|
||
"performed random walks in 16.875550031661987 seconds\n",
|
||
"error pagerank vs mc pagerank in G = 0.019704373428918964\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"\n",
|
||
"# number of the random walks per node\n",
|
||
"R = 10\n",
|
||
"\n",
|
||
"# fix the order of the nodes\n",
|
||
"nodelist = list(G.nodes())\n",
|
||
"\n",
|
||
"tic = time.time()\n",
|
||
"\n",
|
||
"# perform the random walks and get the monte-carlo pagerank\n",
|
||
"walk_visited_count, mc_pagerank = get_mc_pagerank(G, R, nodelist)\n",
|
||
"\n",
|
||
"await print_results(mc_pagerank, index_map, show_results_num, getmetadata=fetch_metadata)\n",
|
||
"\n",
|
||
"toc = time.time()\n",
|
||
"print(f'performed random walks in {toc-tic} seconds')\n",
|
||
"\n",
|
||
"# computing the L1 error\n",
|
||
"error_G_mc = sum( abs(p_G[node] - mc_pagerank[node])\n",
|
||
" for node in G.nodes() )\n",
|
||
"\n",
|
||
"print(f'error pagerank vs mc pagerank in G = {error_G_mc}')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a7a3bf52-7841-4842-a3f2-6f6d066e0c94",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Step 4: Select random subgraph $S$ and compute its Pagerank distribution\n",
|
||
"\n",
|
||
"Select a random subgraph $S$ consisting of 50k nodes, and compute its Pagerank distribution."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"id": "c00c005e-2064-4975-acb6-3bcf52eb8594",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:05:57.760680Z",
|
||
"start_time": "2024-07-30T14:05:38.651646Z"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"computing local pagerank...\n",
|
||
"Robert(npub1zv62e6wxx4lnsnfuwek9xpxlt3ahx6xda7e3zh5w5dkzz5md9lps6ggzf0) 0.0019634124467849253\n",
|
||
"aragol7(npub1ptwt040pjt3pd2lx9x0reysshwu5l7t7gtclza94aty3f008y4csut9jy7) 0.0019634124467849253\n",
|
||
"Alex Jones(npub1fg38s8xuhn4petadndvekvspkz7vpmdundq4vza5fc4v9el4cd9qwghuct) 0.0019634124467849253\n",
|
||
"rohitkumarjain(npub17jp3xlr5quxul9nxh2muhqk5qm76thq974cx4wfvvztav9fejkrqc0w0tj) 0.0019634124467849253\n",
|
||
"Feynman(npub19xt4d6epa8xtse8x6wh0fqz0hc5kzu7cwr0677t2kshlrjzs2nzserr5fk) 0.0019634124467849253\n",
|
||
"saunter(npub1l3gfsderx4ktqhcmwzgegwatkv9v6fs0hujvlwznje0c90xm7m6qs2s6a5) 0.0019634124467849253\n",
|
||
"ACME(npub1pu5x5dmkryc7sp20399lvm6sh9rnp9gydwuc9jug6r88kcq6t85qalqymy) 0.0022018268153709605\n",
|
||
"Ali (npub13es8zhzmvmhfa0ekxm74ah94nhall24ke2005kdlkkcwwxlm5qaqpdxfxk) 0.0025831954811405765\n",
|
||
"(npub1egw0ecrcyxytmsl7kx2hjmrp2pua354dt2k23mjc8z4g4pwkqqvs68cr06) 0.0019634124467849253\n",
|
||
"nekio(npub1hzdf5vjg0hz7yxjvzrtvatv0wcjg52gd6a3ryerv5w79rfj5kzws3yf3mm) 0.0019634124467849253\n",
|
||
"rafbe(npub1f4z7l8x59ftwp76zn57uxu5pxvm5ut5r2ppgpxl9wkn6u0l9q87s8hkycf) 0.0019634124467849253\n",
|
||
"(npub1vl9m8kpcqrxp4ah462pp78y3rupnags7zf5l72kkzw4n4cyek8es5spupu) 0.002960186365116031\n",
|
||
"nobody(npub1elff7suqhwxk32s39z2lvp9sfad6fkewt76ygae2te5c4f283rzsfz2jra) 0.0019634124467849253\n",
|
||
"Dunderheid(npub1g3dx4zq6s4qv5n67sgzx6akekhmyggeass2g4qge3kjxpg9cn85qk7clcm) 0.0019634124467849253\n",
|
||
"甬男(npub1e9sh3syhmunycmggh4ndu2ajgzcydtuk9yxeltlmmvya7zfnyssqq5zjlt) 0.0019634124467849253\n",
|
||
"icota(npub16cyjeutda5599u28nddjfvc3zgjczg8rtp92ln4vpvd4wclegkksecz07g) 0.0019634124467849253\n",
|
||
"(npub1dm79u9tmzr5je7jtm5x6w9y5vnevus0plz8jcdj4e4qz72v8jflsxwf4ct) 0.0019634124467849253\n",
|
||
"(npub1qrm74fhua2udhzat9ycsw6jwy28t9z48zylv4jgk0nmp24pkak0qkn0nvj) 0.0019634124467849253\n",
|
||
"griff(npub1xf0ferksekrc7g9r2atfc8v2za23lx7lklqujq5agwn6lqtfutuqzf3rz8) 0.0019634124467849253\n",
|
||
"(npub1srhjym4dngwsgtdf9j3ejhxaf2w8eg2zj8rychzklnray78kmwasq7r8yn) 0.0019634124467849253\n",
|
||
"finished in 19.10466504096985 seconds\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# selecting random subgraph S\n",
|
||
"S_nodes = set(random.sample(list(G.nodes()), k=500)) #50000\n",
|
||
"S = G.subgraph(S_nodes).copy()\n",
|
||
"\n",
|
||
"# computing pagerank over S\n",
|
||
"print('computing local pagerank...')\n",
|
||
"tic = time.time()\n",
|
||
"\n",
|
||
"p_S = nx.pagerank(S, tol=1e-12)\n",
|
||
"await print_results(p_S, index_map, show_results_num, getmetadata=fetch_metadata)\n",
|
||
"\n",
|
||
"\n",
|
||
"toc = time.time()\n",
|
||
"print(f'finished in {toc-tic} seconds')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "8eb0ae95-87c1-4f5d-890f-42ff5892d881",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Step 4b: Use integrated functions\n",
|
||
"\n",
|
||
"Run the Subrank algorithm to approximate the Pagerank over $S$ subgraph of $G$. Then compute the L1 error."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"computing inteegrated pagerang function\n",
|
||
"Don't ₿elieve the Hype 🦊(npub1nxa4tywfz9nqp7z9zp7nr7d4nchhclsf58lcqt5y782rmf2hefjquaa6q8) 7.33073810886042e-05\n",
|
||
"The: Daniel⚡️(npub1aeh2zw4elewy5682lxc6xnlqzjnxksq303gwu2npfaxd49vmde6qcq4nwx) 4.82284734499105e-05\n",
|
||
"zach(npub10fu0hlkx3s4n4dsgfu0cpqephga4afr4qtzpz9vsyqf7vj88v2yqdp8vp4) 3.6096112371534015e-05\n",
|
||
"elsat(npub1zafcms4xya5ap9zr7xxr0jlrtrattwlesytn2s42030lzu0dwlzqpd26k5) 3.932073936226785e-05\n",
|
||
"opreturnbot(npub1e30jt8crv6phnrj22gr3mwuhywrs9lak7ry94akjw0ydm0juptas5xmkwq) 2.6532755448179916e-05\n",
|
||
"Seth For Privacy(npub1tr4dstaptd2sp98h7hlysp8qle6mw7wmauhfkgz3rmxdd8ndprusnw2y5g) 3.7662079240686915e-05\n",
|
||
"JeffG(npub1zuuajd7u3sx8xu92yav9jwxpr839cs0kc3q6t56vd5u9q033xmhsk6c2uc) 5.037600211657074e-05\n",
|
||
"CARLA(npub1hu3hdctm5nkzd8gslnyedfr5ddz3z547jqcl5j88g4fame2jd08qh6h8nh) 6.244046482882089e-05\n",
|
||
"Derek Ross(npub18ams6ewn5aj2n3wt2qawzglx9mr4nzksxhvrdc4gzrecw7n5tvjqctp424) 7.071551628113585e-05\n",
|
||
"DickWhitman(npub102a0auqvye3eayugvfwy44un9l477t45uck8s2p08xzpgh784uvslsh7w9) 3.2614318807201426e-05\n",
|
||
"Gutenberg(npub148xfa6g0e6rqxne88vzfwqag43qk8xuugthptgkzp6qencs6ad9s3rzddg) 2.555216772659302e-05\n",
|
||
"Jeff Booth(npub1s05p3ha7en49dv8429tkk07nnfa9pcwczkf5x5qrdraqshxdje9sq6eyhe) 6.4400793810946e-05\n",
|
||
"Kanuto(npub1yp7wfa7msdpusf4vupzttttu2mx3cns7whx5cgkt4yr9pkpvujus2mzys7) 3.209949445351193e-05\n",
|
||
"NVK(npub1az9xj85cmxv8e9j9y80lvqp97crsqdu2fpu3srwthd99qfu9qsgstam8y8) 7.061206554109254e-05\n",
|
||
"ИΛKΛDΛI(npub1sqaxzwvh5fhgw9q3d7v658ucapvfeds3dcd2587fcwyesn7dnwuqt2r45v) 4.055409483359867e-05\n",
|
||
"NunyaBidness(npub1vwymuey3u7mf860ndrkw3r7dz30s0srg6tqmhtjzg7umtm6rn5eq2qzugd) 4.5094809254399846e-05\n",
|
||
"ODELL(npub1qny3tkh0acurzla8x3zy4nhrjz5zd8l9sy9jys09umwng00manysew95gx) 8.515245131214811e-05\n",
|
||
"Platte(npub1jhgmf58wdd4mwe4t95ffea079kjxc7f62ncg9gdjmwcrmy0x6x8sfd8u8c) 2.7728750667446487e-05\n",
|
||
"preston(npub1s5yq6wadwrxde4lhfs56gn64hwzuhnfa6r9mj476r5s4hkunzgzqrs6q7z) 6.665485620723231e-05\n",
|
||
"saylor(npub15dqlghlewk84wz3pkqqvzl2w2w36f97g89ljds8x6c094nlu02vqjllm5m) 6.688756088914574e-05\n",
|
||
"performed random walks in -72.14416885375977 seconds\n",
|
||
"error pagerank vs subrank in S = 0.9886733121143664\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# computing subrank\n",
|
||
"print('computing integrated pagerang function')\n",
|
||
"tic = time.time()\n",
|
||
"\n",
|
||
"pr = nx.pagerank(G)\n",
|
||
"await print_results(pr, index_map, show_results_num, getmetadata=fetch_metadata)\n",
|
||
"print(f'performed random walks in {toc-tic} seconds')\n",
|
||
"\n",
|
||
"# computing the L1 error\n",
|
||
"error_S_subrank = sum( abs(p_S[node] - pr[node])\n",
|
||
" for node in S_nodes )\n",
|
||
"\n",
|
||
"print(f'error pagerank vs subrank in S = {error_S_subrank}')"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:07:28.145678Z",
|
||
"start_time": "2024-07-30T14:07:09.900257Z"
|
||
}
|
||
},
|
||
"id": "98adf91155b2429b"
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"id": "41637e2b-1998-43ff-9811-a7bbe658d742",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:07:43.572005Z",
|
||
"start_time": "2024-07-30T14:07:30.074996Z"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"computing subrank over S...\n",
|
||
"walks performed = 75\n",
|
||
"Robert(npub1zv62e6wxx4lnsnfuwek9xpxlt3ahx6xda7e3zh5w5dkzz5md9lps6ggzf0) 0.0019623233908948193\n",
|
||
"aragol7(npub1ptwt040pjt3pd2lx9x0reysshwu5l7t7gtclza94aty3f008y4csut9jy7) 0.0019623233908948193\n",
|
||
"Alex Jones(npub1fg38s8xuhn4petadndvekvspkz7vpmdundq4vza5fc4v9el4cd9qwghuct) 0.0019623233908948193\n",
|
||
"rohitkumarjain(npub17jp3xlr5quxul9nxh2muhqk5qm76thq974cx4wfvvztav9fejkrqc0w0tj) 0.0019623233908948193\n",
|
||
"Feynman(npub19xt4d6epa8xtse8x6wh0fqz0hc5kzu7cwr0677t2kshlrjzs2nzserr5fk) 0.0019623233908948193\n",
|
||
"saunter(npub1l3gfsderx4ktqhcmwzgegwatkv9v6fs0hujvlwznje0c90xm7m6qs2s6a5) 0.0019623233908948193\n",
|
||
"ACME(npub1pu5x5dmkryc7sp20399lvm6sh9rnp9gydwuc9jug6r88kcq6t85qalqymy) 0.0021585557299843012\n",
|
||
"Ali (npub13es8zhzmvmhfa0ekxm74ah94nhall24ke2005kdlkkcwwxlm5qaqpdxfxk) 0.002551020408163265\n",
|
||
"(npub1egw0ecrcyxytmsl7kx2hjmrp2pua354dt2k23mjc8z4g4pwkqqvs68cr06) 0.0019623233908948193\n",
|
||
"nekio(npub1hzdf5vjg0hz7yxjvzrtvatv0wcjg52gd6a3ryerv5w79rfj5kzws3yf3mm) 0.0019623233908948193\n",
|
||
"rafbe(npub1f4z7l8x59ftwp76zn57uxu5pxvm5ut5r2ppgpxl9wkn6u0l9q87s8hkycf) 0.0019623233908948193\n",
|
||
"(npub1vl9m8kpcqrxp4ah462pp78y3rupnags7zf5l72kkzw4n4cyek8es5spupu) 0.0027472527472527475\n",
|
||
"nobody(npub1elff7suqhwxk32s39z2lvp9sfad6fkewt76ygae2te5c4f283rzsfz2jra) 0.0019623233908948193\n",
|
||
"Dunderheid(npub1g3dx4zq6s4qv5n67sgzx6akekhmyggeass2g4qge3kjxpg9cn85qk7clcm) 0.0019623233908948193\n",
|
||
"甬男(npub1e9sh3syhmunycmggh4ndu2ajgzcydtuk9yxeltlmmvya7zfnyssqq5zjlt) 0.0019623233908948193\n",
|
||
"icota(npub16cyjeutda5599u28nddjfvc3zgjczg8rtp92ln4vpvd4wclegkksecz07g) 0.0019623233908948193\n",
|
||
"(npub1dm79u9tmzr5je7jtm5x6w9y5vnevus0plz8jcdj4e4qz72v8jflsxwf4ct) 0.0019623233908948193\n",
|
||
"(npub1qrm74fhua2udhzat9ycsw6jwy28t9z48zylv4jgk0nmp24pkak0qkn0nvj) 0.0019623233908948193\n",
|
||
"griff(npub1xf0ferksekrc7g9r2atfc8v2za23lx7lklqujq5agwn6lqtfutuqzf3rz8) 0.0019623233908948193\n",
|
||
"(npub1srhjym4dngwsgtdf9j3ejhxaf2w8eg2zj8rychzklnray78kmwasq7r8yn) 0.0019623233908948193\n",
|
||
"performed random walks in -92.26541304588318 seconds\n",
|
||
"performed random walks in -92.26541304588318 seconds\n",
|
||
"error pagerank vs subrank in S = 0.004822184121875954\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# computing subrank\n",
|
||
"print('computing subrank over S...')\n",
|
||
"tic = time.time()\n",
|
||
"\n",
|
||
"subrank = get_subrank(S, G, walk_visited_count, nodelist)\n",
|
||
"await print_results(subrank, index_map, show_results_num, getmetadata=fetch_metadata)\n",
|
||
" \n",
|
||
"print(f'performed random walks in {toc-tic} seconds')\n",
|
||
"print(f'performed random walks in {toc-tic} seconds')\n",
|
||
"\n",
|
||
"# computing the L1 error\n",
|
||
"error_S_subrank = sum( abs(p_S[node] - subrank[node])\n",
|
||
" for node in S_nodes )\n",
|
||
"\n",
|
||
"print(f'error pagerank vs subrank in S = {error_S_subrank}')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a929d6dc-37ef-4a4a-bc9c-c37bdafd012a",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Step 6: Approximate Pagerank over $S$ using Monte-Carlo naive recomputation\n",
|
||
"\n",
|
||
"Run the Monte-Carlo Pagerank algorithm on $S$ as a reference for the number of random walks required and the error achieved."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"id": "14907960-116b-405d-9f73-85e625958050",
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2024-07-30T14:08:12.513807Z",
|
||
"start_time": "2024-07-30T14:07:53.683940Z"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"computing naive monte-carlo pagerank over S\n",
|
||
"progress = 100% \r\n",
|
||
"Total walks performed: 5000\n",
|
||
"Robert(npub1zv62e6wxx4lnsnfuwek9xpxlt3ahx6xda7e3zh5w5dkzz5md9lps6ggzf0) 0.0019654088050314465\n",
|
||
"aragol7(npub1ptwt040pjt3pd2lx9x0reysshwu5l7t7gtclza94aty3f008y4csut9jy7) 0.0019654088050314465\n",
|
||
"Alex Jones(npub1fg38s8xuhn4petadndvekvspkz7vpmdundq4vza5fc4v9el4cd9qwghuct) 0.0019654088050314465\n",
|
||
"rohitkumarjain(npub17jp3xlr5quxul9nxh2muhqk5qm76thq974cx4wfvvztav9fejkrqc0w0tj) 0.0019654088050314465\n",
|
||
"Feynman(npub19xt4d6epa8xtse8x6wh0fqz0hc5kzu7cwr0677t2kshlrjzs2nzserr5fk) 0.0019654088050314465\n",
|
||
"saunter(npub1l3gfsderx4ktqhcmwzgegwatkv9v6fs0hujvlwznje0c90xm7m6qs2s6a5) 0.0019654088050314465\n",
|
||
"ACME(npub1pu5x5dmkryc7sp20399lvm6sh9rnp9gydwuc9jug6r88kcq6t85qalqymy) 0.0019654088050314465\n",
|
||
"Ali (npub13es8zhzmvmhfa0ekxm74ah94nhall24ke2005kdlkkcwwxlm5qaqpdxfxk) 0.0033411949685534592\n",
|
||
"(npub1egw0ecrcyxytmsl7kx2hjmrp2pua354dt2k23mjc8z4g4pwkqqvs68cr06) 0.0019654088050314465\n",
|
||
"nekio(npub1hzdf5vjg0hz7yxjvzrtvatv0wcjg52gd6a3ryerv5w79rfj5kzws3yf3mm) 0.0019654088050314465\n",
|
||
"rafbe(npub1f4z7l8x59ftwp76zn57uxu5pxvm5ut5r2ppgpxl9wkn6u0l9q87s8hkycf) 0.0019654088050314465\n",
|
||
"(npub1vl9m8kpcqrxp4ah462pp78y3rupnags7zf5l72kkzw4n4cyek8es5spupu) 0.002358490566037736\n",
|
||
"nobody(npub1elff7suqhwxk32s39z2lvp9sfad6fkewt76ygae2te5c4f283rzsfz2jra) 0.0019654088050314465\n",
|
||
"Dunderheid(npub1g3dx4zq6s4qv5n67sgzx6akekhmyggeass2g4qge3kjxpg9cn85qk7clcm) 0.0019654088050314465\n",
|
||
"甬男(npub1e9sh3syhmunycmggh4ndu2ajgzcydtuk9yxeltlmmvya7zfnyssqq5zjlt) 0.0019654088050314465\n",
|
||
"icota(npub16cyjeutda5599u28nddjfvc3zgjczg8rtp92ln4vpvd4wclegkksecz07g) 0.0019654088050314465\n",
|
||
"(npub1dm79u9tmzr5je7jtm5x6w9y5vnevus0plz8jcdj4e4qz72v8jflsxwf4ct) 0.0019654088050314465\n",
|
||
"(npub1qrm74fhua2udhzat9ycsw6jwy28t9z48zylv4jgk0nmp24pkak0qkn0nvj) 0.0019654088050314465\n",
|
||
"griff(npub1xf0ferksekrc7g9r2atfc8v2za23lx7lklqujq5agwn6lqtfutuqzf3rz8) 0.0019654088050314465\n",
|
||
"(npub1srhjym4dngwsgtdf9j3ejhxaf2w8eg2zj8rychzklnray78kmwasq7r8yn) 0.0019654088050314465\n",
|
||
"finished in 18.817513942718506 seconds\n",
|
||
"error pagerank vs mc pagerank in S = 0.00972193209444151\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"\n",
|
||
"# computing the monte-carlo pagerank \n",
|
||
"print('computing naive monte-carlo pagerank over S')\n",
|
||
"tic = time.time()\n",
|
||
"\n",
|
||
"_, mc_pagerank_S_naive = get_mc_pagerank(S,R)\n",
|
||
"\n",
|
||
"await print_results(mc_pagerank_S_naive, index_map, show_results_num, getmetadata=fetch_metadata)\n",
|
||
"\n",
|
||
"\n",
|
||
"toc = time.time()\n",
|
||
"print(f'finished in {toc-tic} seconds')\n",
|
||
"\n",
|
||
"# computing the L1 error\n",
|
||
"error_S_naive = sum( abs(p_S[node] - mc_pagerank_S_naive[node])\n",
|
||
" for node in S.nodes())\n",
|
||
"\n",
|
||
"print(f'error pagerank vs mc pagerank in S = {error_S_naive}')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"outputs": [],
|
||
"source": [],
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"id": "c000a6be0e1a6c0b"
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.12"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|