Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Page rank algorithm support #623

Merged
merged 15 commits into from
Apr 1, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 36 additions & 0 deletions benches/page_rank.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#![feature(test)]
extern crate petgraph;
extern crate test;

use test::Bencher;

use petgraph::algo::page_rank;

#[allow(dead_code)]
mod common;

use common::directed_fan;

#[cfg(feature = "rayon")]
use petgraph::algo::page_rank::parallel_page_rank;
#[cfg(feature = "rayon")]
use rayon::prelude::*;

#[bench]
fn page_rank_bench(bench: &mut Bencher) {
static NODE_COUNT: usize = 500;
let g = directed_fan(NODE_COUNT);
bench.iter(|| {
let _ranks = page_rank(&g, 0.6_f64, 10);
});
}

#[bench]
#[cfg(feature = "rayon")]
fn par_page_rank_bench(bench: &mut Bencher) {
static NODE_COUNT: usize = 2_000;
let g = directed_fan(NODE_COUNT);
bench.iter(|| {
let _ranks = parallel_page_rank(&g, 0.6_f64, 100, None);
});
}
41 changes: 41 additions & 0 deletions src/algo/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub mod floyd_warshall;
pub mod isomorphism;
pub mod k_shortest_path;
pub mod matching;
pub mod page_rank;
pub mod simple_paths;
pub mod tred;

Expand Down Expand Up @@ -44,6 +45,7 @@ pub use isomorphism::{
};
pub use k_shortest_path::k_shortest_path;
pub use matching::{greedy_matching, maximum_matching, Matching};
pub use page_rank::page_rank;
pub use simple_paths::all_simple_paths;

/// \[Generic\] Return the number of connected components of the graph.
Expand Down Expand Up @@ -901,3 +903,42 @@ macro_rules! impl_bounded_measure_float(
);

impl_bounded_measure_float!(f32, f64);

pub trait UnitMeasure:
ABorgna marked this conversation as resolved.
Show resolved Hide resolved
Measure
+ std::ops::Sub<Self, Output = Self>
+ std::ops::Mul<Self, Output = Self>
+ std::ops::Div<Self, Output = Self>
+ std::iter::Sum
{
fn zero() -> Self;
fn one() -> Self;
fn from_usize(nb: usize) -> Self;
fn default_tol() -> Self;
}

macro_rules! impl_unit_measure(
( $( $t:ident ),* )=> {
$(
impl UnitMeasure for $t {
fn zero() -> Self {
0 as $t
}
fn one() -> Self {
1 as $t
}

fn from_usize(nb: usize) -> Self {
nb as $t
}

fn default_tol() -> Self {
1e-6 as $t
}

}

)*
}
);
impl_unit_measure!(f32, f64);
180 changes: 180 additions & 0 deletions src/algo/page_rank.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
use crate::visit::{EdgeRef, IntoEdges, NodeCount, NodeIndexable};

#[cfg(feature = "rayon")]
use rayon::prelude::*;

use super::UnitMeasure;
/// \[Generic\] Page Rank algorithm.
///
/// Computes the ranks of every node in a graph using the [Page Rank algorithm][pr].
///
/// Returns a `Vec` container mapping each node index to its rank.
///
/// # Panics
/// The damping factor should be a number of type `f32` or `f64` between 0 and 1 (0 and 1 included).
/// The graph should at least have one node. If one of these two conditions is not satisfied, it panics.
///
/// # Complexity
/// Time complexity is **O(N|V|²|E|)**.
/// Space complexity is **O(|V| + |E|)**
/// where **N** is the number of iterations, **|V|** the number of vertices (i.e nodes) and **|E|** the number of edges.
///
/// [pr]: https://en.wikipedia.org/wiki/PageRank
///
/// # Example
/// ```rust
/// use petgraph::Graph;
/// use petgraph::algo::page_rank;
/// let mut g: Graph<(), usize> = Graph::new();
/// let a = g.add_node(());
/// let b = g.add_node(());
/// let c = g.add_node(());
/// let d = g.add_node(());
/// let e = g.add_node(());
/// g.extend_with_edges(&[(0, 1), (0, 3), (1, 2), (1, 3)]);
/// // With the following dot representation.
/// //digraph {
/// // 0 [ label = "()" ]
/// // 1 [ label = "()" ]
/// // 2 [ label = "()" ]
/// // 3 [ label = "()" ]
/// // 4 [ label = "()" ]
/// // 0 -> 1 [ label = "0.0" ]
/// // 0 -> 3 [ label = "0.0" ]
/// // 1 -> 2 [ label = "0.0" ]
/// // 1 -> 3 [ label = "0.0" ]
/// //}
/// let damping_factor = 0.7_f32;
/// let number_iterations = 10;
/// let output_ranks = page_rank(&g, damping_factor, number_iterations);
/// let expected_ranks = vec![0.14685437, 0.20267677, 0.22389607, 0.27971846, 0.14685437];
/// assert_eq!(expected_ranks, output_ranks);
/// ```
pub fn page_rank<G, D>(graph: G, damping_factor: D, nb_iter: usize) -> Vec<D>
where
G: NodeCount + IntoEdges + NodeIndexable,
D: UnitMeasure + Copy,
{
let node_count = graph.node_count();
assert!(node_count > 0, "Graph must have nodes.");
ABorgna marked this conversation as resolved.
Show resolved Hide resolved
assert!(
D::zero() <= damping_factor && damping_factor <= D::one(),
"Damping factor should be between 0 et 1."
);
let nb = D::from_usize(node_count);
let mut ranks = vec![D::one() / nb; node_count];
let nodeix = |i| graph.from_index(i);
let out_degrees: Vec<D> = (0..node_count)
.map(|i| graph.edges(nodeix(i)).map(|_| D::one()).sum::<D>())
.collect();

for _ in 0..nb_iter {
let pi = (0..node_count)
.enumerate()
.map(|(v, _)| {
ranks
.iter()
.enumerate()
.map(|(w, r)| {
let mut w_out_edges = graph.edges(nodeix(w));
if let Some(_) = w_out_edges.find(|e| e.target() == nodeix(v)) {
damping_factor * *r / out_degrees[w]
} else if out_degrees[w] == D::zero() {
damping_factor * *r / nb // stochastic matrix condition
} else {
(D::one() - damping_factor) * *r / nb // random jumps
}
})
.sum::<D>()
})
.collect::<Vec<D>>();
let sum = pi.iter().map(|score| *score).sum::<D>();
ranks = pi.iter().map(|r| *r / sum).collect::<Vec<D>>();
}
ranks
}

#[allow(dead_code)]
fn out_edges_info<G, D>(graph: G, index_w: usize, index_v: usize) -> (D, bool)
where
G: NodeCount + IntoEdges + NodeIndexable + std::marker::Sync,
D: UnitMeasure + Copy + std::marker::Send + std::marker::Sync,
{
let node_w = graph.from_index(index_w);
let node_v = graph.from_index(index_v);
let mut out_edges = graph.edges(node_w);
let mut out_edge = out_edges.next();
let mut out_degree = D::zero();
let mut flag_points_to = false;
while let Some(edge) = out_edge {
out_degree = out_degree + D::one();
if edge.target() == node_v {
flag_points_to = true;
}
out_edge = out_edges.next();
}
(out_degree, flag_points_to)
}
/// \[Generic\] Parrallel Page Rank algorithm.
ABorgna marked this conversation as resolved.
Show resolved Hide resolved
/// ```
ABorgna marked this conversation as resolved.
Show resolved Hide resolved
#[cfg(feature = "rayon")]
pub fn parallel_page_rank<G, D>(
graph: G,
damping_factor: D,
nb_iter: usize,
tol: Option<D>,
) -> Vec<D>
where
G: NodeCount + IntoEdges + NodeIndexable + std::marker::Sync,
D: UnitMeasure + Copy + std::marker::Send + std::marker::Sync,
{
let node_count = graph.node_count();
assert!(node_count > 0, "Graph must have nodes.");
ABorgna marked this conversation as resolved.
Show resolved Hide resolved
assert!(
D::zero() <= damping_factor && damping_factor <= D::one(),
"Damping factor should be between 0 et 1."
);
let mut tolerance = D::default_tol();
if let Some(_tol) = tol {
tolerance = _tol;
}
let nb = D::from_usize(node_count);
let mut ranks: Vec<D> = (0..node_count)
.into_par_iter()
.map(|i| D::one() / nb)
.collect();
for _ in 0..nb_iter {
let pi = (0..node_count)
.into_par_iter()
.map(|v| {
ranks
.iter()
.enumerate()
.map(|(w, r)| {
let (out_deg, w_points_to_v) = out_edges_info(graph, w, v);
if w_points_to_v {
damping_factor * *r / out_deg
} else if out_deg == D::zero() {
damping_factor * *r / nb // stochastic matrix condition
} else {
(D::one() - damping_factor) * *r / nb // random jumps
}
})
.sum::<D>()
})
.collect::<Vec<D>>();
let sum = pi.par_iter().map(|score| *score).sum::<D>();
let new_ranks = pi.par_iter().map(|r| *r / sum).collect::<Vec<D>>();
let squared_norm_2 = new_ranks
.par_iter()
.zip(&ranks)
.map(|(new, old)| (*new - *old) * (*new - *old))
.sum::<D>();
if squared_norm_2 <= tolerance {
return ranks;
} else {
ranks = new_ranks;
}
}
ranks
}
83 changes: 83 additions & 0 deletions tests/page_rank.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use petgraph::{algo::page_rank, Graph};

#[cfg(feature = "rayon")]
use petgraph::algo::page_rank::parallel_page_rank;

fn graph_example() -> Graph<String, f32> {
// Taken and adapted from https://github.com/neo4j-labs/graph?tab=readme-ov-file#how-to-run-algorithms
let mut graph = Graph::<_, f32>::new();
graph.add_node("A".to_owned());
graph.add_node("B".to_owned());
graph.add_node("C".to_owned());
graph.add_node("D".to_owned());
graph.add_node("E".to_owned());
graph.add_node("F".to_owned());
graph.add_node("G".to_owned());
graph.add_node("H".to_owned());
graph.add_node("I".to_owned());
graph.add_node("J".to_owned());
graph.add_node("K".to_owned());
graph.add_node("L".to_owned());
graph.add_node("M".to_owned());
graph.extend_with_edges(&[
(1, 2), // B->C
(2, 1), // C->B
(4, 0), // D->A
(4, 1), // D->B
(5, 4), // E->D
(5, 1), // E->B
(5, 6), // E->F
(6, 1), // F->B
(6, 5), // F->E
(7, 1), // G->B
(7, 5), // F->E
(8, 1), // G->B
(8, 5), // G->E
(9, 1), // H->B
(9, 5), // H->E
(10, 1), // I->B
(10, 5), // I->E
(11, 5), // J->B
(12, 5), // K->B
]);
graph
}

fn expected_ranks() -> Vec<f32> {
vec![
0.029228685,
0.38176042,
0.3410649,
0.014170233,
0.035662483,
0.077429585,
0.035662483,
0.014170233,
0.014170233,
0.014170233,
0.014170233,
0.014170233,
0.014170233,
]
}

#[test]
fn test_page_rank() {
let graph = graph_example();
let output_ranks = page_rank(&graph, 0.85_f32, 100);
assert_eq!(expected_ranks(), output_ranks);
}

#[test]
#[cfg(feature = "rayon")]

fn test_par_page_rank() {
let graph = graph_example();
let output_ranks = parallel_page_rank(&graph, 0.85_f32, 100, Some(1e-12));
assert!(!expected_ranks()
.iter()
.zip(output_ranks)
.any(|(expected, computed)| ((expected - computed).abs() > 1e-6)
|| computed.is_nan()
|| expected.is_nan()));
}
20 changes: 19 additions & 1 deletion tests/quickcheck.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use petgraph::algo::{
bellman_ford, condensation, dijkstra, find_negative_cycle, floyd_warshall,
greedy_feedback_arc_set, greedy_matching, is_cyclic_directed, is_cyclic_undirected,
is_isomorphic, is_isomorphic_matching, k_shortest_path, kosaraju_scc, maximum_matching,
min_spanning_tree, tarjan_scc, toposort, Matching,
min_spanning_tree, page_rank, tarjan_scc, toposort, Matching,
};
use petgraph::data::FromElements;
use petgraph::dot::{Config, Dot};
Expand Down Expand Up @@ -1312,3 +1312,21 @@ quickcheck! {
true
}
}

quickcheck! {
// The ranks are probabilities,
// as such they are positive and they should sum up to 1.
fn test_page_rank_proba(gr: Graph<(), f32>) -> bool {
if gr.node_count() == 0 {
return true;
}
let tol = 1e-10;
let ranks: Vec<f64> = page_rank(&gr, 0.85_f64, 5);
let at_least_one_neg_rank = ranks.iter().any(|rank| *rank < 0.);
let not_sumup_to_one = (ranks.iter().sum::<f64>() - 1.).abs() > tol;
if at_least_one_neg_rank | not_sumup_to_one{
return false;
}
true
}
}