# Play with Orkhut graph using evcxr_jupyter and plotters

The purpose of this notebook is to see how the communities of the orkhut graph are situated with respect to the
block decomposition


## We reload the density decompositon and the graph. 
**Code copy/pasted from graphemded/examples/orkut.rs**

In [2]:
:dep graphembed = {path = "/home/jpboth/Rust/graphembed", features=["openblas-system"]}
:dep indxvec

:dep anyhow

extern crate petgraph;
extern crate log;

use graphembed::prelude::*;
use petgraph::prelude::*;

use petgraph::{Graph, Undirected};

use std::path::{Path};

pub fn read_orkutdir(dirpath : &Path) -> Result<Graph<u32, f64 , Undirected, u32>, anyhow::Error> {
    let fpath = dirpath.clone().join("com-orkut.ungraph.txt");
    // use csv to unweighted graph_map
    log::info!("read_orkutdir : reading {fpath:?}");
    let graphmap = weighted_csv_to_graphmap::<u32 ,f64, Undirected>(&fpath, b'\t');
    log::info!("read_orkutdir : reading {fpath:?}, done");
    if graphmap.is_err() {
        std::panic!("cannot open file : {fpath:?}");
    }
    let graph = graphmap.unwrap().into_graph::<u32>();
    log::info!("graph loaded");
    Ok(graph)
} // end of read_orkutdirq

let res = StableDecomposition::reload_json(Path::new("/home/jpboth/Rust/graphembed/orkut-decomposition.json"));

let stable = res.unwrap();

let b0 = stable.get_block_points(0).unwrap();
println!("b0.len = {}", b0.len()); 

In [3]:
let mut b0s = b0.clone();
b0s.sort();

b0.len = 28289


In [4]:
b0s[0..5]

[52, 471, 535, 664, 692]

In [5]:
b0s[0..5]

[52, 471, 535, 664, 692]

#### load the graph
adapt path for local situation

In [6]:
let graph = read_orkutdir(Path::new("/home/jpboth/Data/Graphs/Orkut")).unwrap();


In [7]:
pub fn get_degree_undirected<N,F>(graph : &Graph<N, F, Undirected>, rank : usize) -> Result<usize, anyhow::Error> {
    let nb_nodes = graph.node_count();
    if rank >= nb_nodes {
        return Err(anyhow::anyhow!("bad index, nb_nodes : {nb_nodes}"));
    }
    let neighbours = graph.neighbors(NodeIndex::new(rank));
    Ok(neighbours.count())
}




In [8]:
let degree_384225 = get_degree_undirected(&graph, 384225).unwrap();

In [9]:
degree_384225

1316

In [10]:
let degree_2357582 = get_degree_undirected(&graph, 2357582).unwrap();


In [11]:
degree_2357582

771

In [12]:
let mut b0mut = b0.clone();


In [13]:
:dep indxvec

In [14]:
use indxvec::Vecops;

In [15]:
let mut degrees_b0 : Vec<usize> = (0..b0.len()).into_iter().map(|i| get_degree_undirected(&graph, b0[i]).unwrap()).collect();


In [16]:
let mean_deg_b0 : f64 = degrees_b0.iter().sum::<usize>() as f64 / degrees_b0.len() as f64;


In [17]:
mean_deg_b0

942.6188978048004

**We get an index to examine degrees of bloc 0**

In [18]:
let degrees_b0_idx = degrees_b0.mergesort_indexed();
    

In [19]:
degrees_b0[degrees_b0_idx[0]]

560

In [20]:
degrees_b0[degrees_b0_idx[b0.len()-1]]

33313

In [21]:
degrees_b0[degrees_b0_idx[5]]

560

In [22]:
degrees_b0[degrees_b0_idx[500]]

564

In [23]:
let b0_sorted = b0.clone().sort();

In [24]:
stable.get_densest_block(72).unwrap()

107

#### add code to read the first 5000 communities. 
**code copy/pasted from graphembed/examples/orkut.rs**

In [25]:
use anyhow::{anyhow};

use std::io::{BufReader};
use std::fs::{OpenOptions};
use std::io::prelude::*;
use std::str::FromStr;

const ORKUT_DATA_DIR : &'static str = "/home/jpboth/Data/Graphs/Orkut/";

fn read_orkut_com(dirpath : &Path) -> anyhow::Result<Vec<Vec<usize>>> {
    let fpath = dirpath.clone().join("com-orkut.top5000.cmty.txt");
    log::info!("read_orkut_com : reading {fpath:?}");
    let fileres = OpenOptions::new().read(true).open(&fpath);
    if fileres.is_err() {
        log::error!("read_orkut_com : reload could not open file {:?}", fpath.as_os_str());
        println!("read_orkut_com could not open file {:?}", fpath.as_os_str());
        return Err(anyhow!("read_orkut_com could not open file {}", fpath.display()));            
    }
    let file = fileres?;
    let bufreader = BufReader::new(file);
    let lines = bufreader.lines();
    let mut numline = 0;
    let mut communities = Vec::<Vec<usize>>::with_capacity(5000);
    for line in lines {
        if line.is_err() {
            log::error!("error reading file : {:?} at line : {}",fpath.as_os_str(),numline);
            return Err(anyhow!(" error reading file : {:?} at line : {}",fpath.as_os_str(),numline));
        }
        // split and decode line. line consists in usize separated by a tab
        let line = line.unwrap();
        let splitted : Vec<&str>= line.split('\t').collect();
        let communitiy : Vec<usize> = splitted.iter().map(|s| usize::from_str(*s).unwrap()).collect();
        communities.push(communitiy);
        numline += 1;
    }
    //
    return Ok(communities);
} // end of read_orkut_com



In [26]:
let communities = read_orkut_com(Path::new(ORKUT_DATA_DIR)).unwrap();

In [27]:
communities.len()

5000

The first commuity has 4249 nodes

In [28]:
communities[0].len()

4249

**Examine first community**

In [29]:
let mut bc0 = communities[0].clone();

In [30]:
bc0.sort()

()

In [31]:
bc0[0..25]

[2, 5, 6, 37, 38, 41, 44, 47, 53, 56, 62, 67, 72, 73, 74, 80, 81, 173, 175, 177, 180, 309, 314, 483, 619]

In [32]:
stable.get_densest_block(74).unwrap()

191

In [33]:
let degree_73 = get_degree_undirected(&graph, 73).unwrap();
degree_73

211

In [34]:
get_degree_undirected(&graph, 74).unwrap()

18

In [35]:
stable.get_densest_block(619).unwrap()

185

**get blocks of first community**

In [36]:
let mut c0blocks = stable.get_blocks(&communities[0]);

In [37]:
c0blocks.sort();

In [38]:
c0blocks[0..25]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

**compute the mean block of community 0**

In [39]:
let meanc0 = c0blocks.iter().sum::<usize>() as f64/ c0blocks.len() as f64;
meanc0

133.2485290656625

**get list of degrees of node of first community**

In [40]:
let mut com0_degrees : Vec<usize> = communities[0].iter().map(|n| get_degree_undirected(&graph, *n).unwrap()).collect();

In [41]:
com0_degrees.sort()

()

In [42]:
com0_degrees[0..25]

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [43]:
com0_degrees[2000..2025]

[63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 64]

In [44]:
com0_degrees[4000..4025]

[251, 251, 251, 251, 252, 252, 252, 253, 254, 254, 254, 255, 255, 255, 255, 258, 258, 258, 259, 259, 259, 259, 260, 260, 260]

In [45]:
let com0_mean_degree = com0_degrees.iter().sum::<usize>() as f64 / com0_degrees.len() as f64;
com0_mean_degree

95.44175100023534

#### get size of communities

In [46]:
let com_length : Vec<usize> = communities.iter().map(|c| c.len()).collect();

In [47]:
com_length[0..5]

[4249, 837, 373, 176, 55]

get an index to sort com_length

In [48]:
let com_length_idx = com_length.mergesort_indexed();

In [49]:
let com_max_len = com_length[*com_length_idx.last().unwrap()];
com_max_len

4785

In [50]:
*com_length_idx.last().unwrap()

22

**So the largest community has 4785 nodes. It is community 22**

### add depenencies to get plotters

In [51]:
:dep plotters = {version = "0.3.4" ,default_features = false, features = ["evcxr", "all_series", "all_elements"] }
extern crate plotters;
use plotters::prelude::*;


histogram of blocks of community 0 (4249 nodes, blocks from 0 to 208)

## define a function displaying histogram of blocks of nodes in a communitiy

In [52]:
use plotters::prelude::*;
use plotters::evcxr::SVGWrapper;

fn display_blocks(block_com : &Vec<usize>, stable : &StableDecomposition) -> SVGWrapper {
    
let nb_blocks = stable.get_nb_blocks();
println!(" communitity size : {}", block_com.len());
    
let fig = evcxr_figure((800, 480), |root| {
    // The following code will create a chart context
    let mut chart = ChartBuilder::on(&root)
        .caption("blocks histogram", ("Arial", 20).into_font())
        .x_label_area_size(40)
        .y_label_area_size(40)
        .build_cartesian_2d(0usize..nb_blocks, 0f64..1.0f64)?;
    
    // configure axis
    chart.configure_mesh()
            .disable_x_mesh()
            .disable_y_mesh()
            .y_labels(5)
            .x_label_formatter(&|x| format!("{:.1}", *x as usize))
            .y_label_formatter(&|y| format!("{}%", (*y * 100.0) as u32))
            .draw()?;
    
    // normalize by size of block
    let hist_block = Histogram::vertical(&chart)
        .style(RED.filled())
        .margin(0)
        .data(block_com.iter().map(|x| ((*x), 200./ stable.get_nbpoints_in_block(*x).unwrap() as f64)));
    
    let _res = chart.draw_series(hist_block);
    
    Ok(())
    }).style("width:80%");
    fig
}

In [53]:
let mut c0blocks = stable.get_blocks(&communities[0]);
display_blocks(&c0blocks, &stable)

 communitity size : 4249


In [54]:
let mut c1blocks = stable.get_blocks(&communities[1]);
display_blocks(&c1blocks, &stable)

 communitity size : 837


#### Histogram for the largest community

In [55]:
let mut c22blocks = stable.get_blocks(&communities[22]);
display_blocks(&c22blocks, &stable)

 communitity size : 4785


#### histogram of a small community (community 4) , size 55

In [56]:
let mut c4blocks = stable.get_blocks(&communities[4]);
display_blocks(&c4blocks, &stable)

 communitity size : 55


**There is a node in block 0. This small community has a member in the highest density block**

In [57]:
c4blocks.sort()

()

In [58]:
c4blocks[0..5]

[0, 73, 73, 79, 79]

**in community 4, search what is the node and degree which is in block 0**

In [59]:
let c4_degrees : Vec<usize> = communities[4].iter().map(|n| get_degree_undirected(&graph, *n).unwrap()).collect();

In [60]:
let degrees_c4_idx = c4_degrees.mergesort_indexed();


get minimal degree

In [61]:
c4_degrees[degrees_c4_idx[0]]

1

get maximal degree

In [62]:
c4_degrees[degrees_c4_idx[degrees_c4_idx.len()-1]]

722

get index of node of maximal degree in community 4. 

In [63]:
degrees_c4_idx[degrees_c4_idx.len()-1]

12

get node corresponding to index 12 in community (same index as degrees of the same community)

In [64]:
communities[4][12]

26665

**we check  for the degree. So node 26665 is the maximal degree node in community 4. It has degree 722 and is in highest density block**

In [65]:
get_degree_undirected(&graph, 26665).unwrap()

722

#### community 6 is a very small community

In [66]:
let mut c6blocks = stable.get_blocks(&communities[6]);
display_blocks(&c6blocks, &stable)

 communitity size : 3


In [67]:
communities[6]

[960274, 960277, 960279]

In [68]:
get_degree_undirected(&graph, 960274).unwrap()

33

In [69]:
get_degree_undirected(&graph, 960277).unwrap()

2

In [70]:
get_degree_undirected(&graph, 960279).unwrap()

29

#### Display communities size histogram

In [126]:
extern crate hdrhistogram;

//use hdrhistogram;

//
// for length of communities , data to send to function is vector of lenght
// dmin and dmax is the window to display
//
fn display_as_histo(data : &Vec<usize>, dmin : usize , dmax : usize) -> SVGWrapper {
    //
    let mut datamin = usize::MAX;
    let datamax = (0..data.len()).into_iter().fold(0, |datamax, l| datamax.max(l));
    // histo with precision 2 significative digits
    let mut data_histo = hdrhistogram::Histogram::<u64>::new_with_max(datamax as u64, 2).unwrap();
    for d in data {
        data_histo += *d as u64;
        datamin = datamin.min(*d);
    }
    println!("data min  : {dmin}, data max = {dmax}");
    // iteration through data_histo to get a serie for graphic
    let mut slot = data_histo.iter_linear(1);
    let mut xmin = u64::MAX;
    let mut xmax : u64 = 0;
    let mut ymin = u64::MAX;
    let mut ymax : u64 = 0;
    let mut serie = Vec::<(u64,u64)>::new();
    while let Some(item) = slot.next() {
        let v = item.value_iterated_to();
        let c = item.count_since_last_iteration();
        xmin = xmin.min(v);
        xmax = xmax.max(v);
        //
        ymin = ymin.min(c);
        ymax = ymax.max(c);
        //
        serie.push((v,c));
    }
    //
    let fig = evcxr_figure((800, 480), |root| {
    // The following code will create a chart context
    let mut chart = ChartBuilder::on(&root)
        .caption("raw histogram", ("Arial", 20).into_font())
        .x_label_area_size(40)
        .y_label_area_size(40)
        .build_cartesian_2d(0.max(dmin as u64)..xmax.min(dmax as u64), 0..ymax+1)?;
    
    // configure axis
    chart.configure_mesh()
            .disable_x_mesh()
            .disable_y_mesh()
            .y_labels(5)
            .x_label_formatter(&|x| format!("{:.1}", *x as usize))
            .y_label_formatter(&|y| format!("{}", (*y) as u32))
            .draw()?;
    
    // normalize by size of block
    let histo = plotters::series::Histogram::vertical(&chart)
        .style(RED.filled())
        .margin(0)
        .data(serie.iter().map(|(x,y)| (*x, *y)));
    
    let _res = chart.draw_series(histo);
    
    Ok(())
    }).style("width:80%");
    fig
}

In [89]:
com_length[0..10]

[4249, 837, 373, 176, 55, 3433, 3, 731, 2609, 184]

In [127]:
display_as_histo(&com_length,0, 5000)

data min  : 0, data max = 5000


In [129]:
display_as_histo(&com_length,0, 200)

data min  : 0, data max = 200


In [130]:
display_as_histo(&com_length,20, 40)

data min  : 20, data max = 40


**We have a block of small communities of size between 3 and 20, a gap fo communities of size 20 to 40 and then something as a gamma distribution of length**