Skip to content

Commit

Permalink
time to establish connection (#3134)
Browse files Browse the repository at this point in the history
Implementing #2745 , adding  a metric to break down time from connection pending to connection established, per protocol stack.

````
$curl -s http://127.0.0.1:42183/metrics | grep nt_duration
# HELP libp2p_swarm_connection_establishment_duration Time it took (locally) to finish establishing connections.
# TYPE libp2p_swarm_connection_establishment_duration histogram
libp2p_swarm_connection_establishment_duration_sum{role="Listener",protocols="/ip4/tcp"} 0.007
libp2p_swarm_connection_establishment_duration_count{role="Listener",protocols="/ip4/tcp"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.001"} 0
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.002"} 0
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.004"} 0
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.008"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.016"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.032"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.064"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.128"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.256"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="0.512"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Listener",protocols="/ip4/tcp",le="+Inf"} 1

lbl@chomp:~lbl
$curl -s http://127.0.0.1:34283/metrics | grep nt_duration
# HELP libp2p_swarm_connection_establishment_duration Time it took (locally) to finish establishing connections.
# TYPE libp2p_swarm_connection_establishment_duration histogram
libp2p_swarm_connection_establishment_duration_sum{role="Dialer",protocols="/ip4/tcp"} 0.009
libp2p_swarm_connection_establishment_duration_count{role="Dialer",protocols="/ip4/tcp"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.001"} 0
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.002"} 0
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.004"} 0
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.008"} 0
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.016"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.032"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.064"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.128"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.256"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="0.512"} 1
libp2p_swarm_connection_establishment_duration_bucket{role="Dialer",protocols="/ip4/tcp",le="+Inf"} 1
````
  • Loading branch information
John-LittleBearLabs committed Dec 12, 2022
1 parent 63ffc7f commit f8f19ba
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 10 deletions.
4 changes: 4 additions & 0 deletions misc/metrics/CHANGELOG.md
@@ -1,5 +1,7 @@
# 0.12.0 [unreleased]

- Add `connections_establishment_duration` metric. See [PR 3134].

- Update to `libp2p-dcutr` `v0.9.0`.

- Update to `libp2p-ping` `v0.42.0`.
Expand All @@ -12,6 +14,8 @@

- Update to `libp2p-swarm` `v0.42.0`.

[PR 3134]: https://github.com/libp2p/rust-libp2p/pull/3134/

# 0.11.0

- Update to `libp2p-dcutr` `v0.8.0`.
Expand Down
44 changes: 35 additions & 9 deletions misc/metrics/src/swarm.rs
Expand Up @@ -20,15 +20,19 @@

use crate::protocol_stack;
use prometheus_client::encoding::text::Encode;
use prometheus_client::metrics::counter::Counter;
use prometheus_client::metrics::family::Family;
use prometheus_client::metrics::{
counter::Counter,
family::Family,
histogram::{exponential_buckets, Histogram},
};
use prometheus_client::registry::Registry;

pub struct Metrics {
connections_incoming: Family<AddressLabels, Counter>,
connections_incoming_error: Family<IncomingConnectionErrorLabels, Counter>,

connections_established: Family<ConnectionEstablishedLabels, Counter>,
connections_establishment_duration: Family<ConnectionEstablishmentDurationLabels, Histogram>,
connections_closed: Family<ConnectionClosedLabels, Counter>,

new_listen_addr: Family<AddressLabels, Counter>,
Expand Down Expand Up @@ -123,6 +127,15 @@ impl Metrics {
Box::new(connections_closed.clone()),
);

let connections_establishment_duration = Family::new_with_constructor(
create_connection_establishment_duration_histogram as fn() -> Histogram,
);
sub_registry.register(
"connections_establishment_duration",
"Time it took (locally) to establish connections",
Box::new(connections_establishment_duration.clone()),
);

Self {
connections_incoming,
connections_incoming_error,
Expand All @@ -135,6 +148,7 @@ impl Metrics {
dial_attempt,
outgoing_connection_error,
connected_to_banned_peer,
connections_establishment_duration,
}
}
}
Expand All @@ -143,13 +157,19 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE
fn record(&self, event: &libp2p_swarm::SwarmEvent<TBvEv, THandleErr>) {
match event {
libp2p_swarm::SwarmEvent::Behaviour(_) => {}
libp2p_swarm::SwarmEvent::ConnectionEstablished { endpoint, .. } => {
self.connections_established
.get_or_create(&ConnectionEstablishedLabels {
role: endpoint.into(),
protocols: protocol_stack::as_string(endpoint.get_remote_address()),
})
.inc();
libp2p_swarm::SwarmEvent::ConnectionEstablished {
endpoint,
established_in: time_taken,
..
} => {
let labels = ConnectionEstablishedLabels {
role: endpoint.into(),
protocols: protocol_stack::as_string(endpoint.get_remote_address()),
};
self.connections_established.get_or_create(&labels).inc();
self.connections_establishment_duration
.get_or_create(&labels)
.observe(time_taken.as_secs_f64());
}
libp2p_swarm::SwarmEvent::ConnectionClosed { endpoint, .. } => {
self.connections_closed
Expand Down Expand Up @@ -279,6 +299,8 @@ struct ConnectionEstablishedLabels {
protocols: String,
}

type ConnectionEstablishmentDurationLabels = ConnectionEstablishedLabels;

#[derive(Encode, Hash, Clone, Eq, PartialEq)]
struct ConnectionClosedLabels {
role: Role,
Expand Down Expand Up @@ -372,3 +394,7 @@ impl<TTransErr> From<&libp2p_swarm::PendingInboundConnectionError<TTransErr>>
}
}
}

fn create_connection_establishment_duration_histogram() -> Histogram {
Histogram::new(exponential_buckets(1e-3, 2., 10))
}
1 change: 1 addition & 0 deletions protocols/autonat/tests/test_server.rs
Expand Up @@ -195,6 +195,7 @@ async fn test_dial_back() {
},
num_established,
concurrent_dial_errors,
established_in: _,
} => {
assert_eq!(peer_id, client_id);
assert_eq!(num_established, NonZeroU32::new(2).unwrap());
Expand Down
3 changes: 3 additions & 0 deletions swarm/CHANGELOG.md
Expand Up @@ -2,7 +2,10 @@

- Removed deprecated Swarm constructors. For transition notes see [0.41.0](#0.41.0). See [PR 3170].

- Add `estblished_in` to `SwarmEvent::ConnectionEstablished`. See [PR 3134].

[PR 3170]: https://github.com/libp2p/rust-libp2p/pull/3170
[PR 3134]: https://github.com/libp2p/rust-libp2p/pull/3134

# 0.41.1

Expand Down
12 changes: 11 additions & 1 deletion swarm/src/connection/pool.rs
Expand Up @@ -38,6 +38,7 @@ use futures::{
ready,
stream::FuturesUnordered,
};
use instant::Instant;
use libp2p_core::connection::{ConnectionId, Endpoint, PendingPoint};
use libp2p_core::muxing::{StreamMuxerBox, StreamMuxerExt};
use std::{
Expand Down Expand Up @@ -195,6 +196,8 @@ struct PendingConnection<THandler> {
endpoint: PendingPoint,
/// When dropped, notifies the task which then knows to terminate.
abort_notifier: Option<oneshot::Sender<Void>>,
/// The moment we became aware of this possible connection, useful for timing metrics.
accepted_at: Instant,
}

impl<THandler> PendingConnection<THandler> {
Expand Down Expand Up @@ -237,6 +240,8 @@ where
/// Addresses are dialed in parallel. Contains the addresses and errors
/// of dial attempts that failed before the one successful dial.
concurrent_dial_errors: Option<Vec<(Multiaddr, TransportError<TTrans::Error>)>>,
/// How long it took to establish this connection.
established_in: std::time::Duration,
},

/// An established connection was closed.
Expand Down Expand Up @@ -493,6 +498,7 @@ where
handler,
endpoint,
abort_notifier: Some(abort_notifier),
accepted_at: Instant::now(),
},
);
Ok(connection_id)
Expand Down Expand Up @@ -540,6 +546,7 @@ where
handler,
endpoint: endpoint.into(),
abort_notifier: Some(abort_notifier),
accepted_at: Instant::now(),
},
);
Ok(connection_id)
Expand Down Expand Up @@ -634,6 +641,7 @@ where
handler,
endpoint,
abort_notifier: _,
accepted_at,
} = self
.pending
.remove(&id)
Expand Down Expand Up @@ -783,13 +791,14 @@ where
)
.boxed(),
);

let established_in = accepted_at.elapsed();
return Poll::Ready(PoolEvent::ConnectionEstablished {
peer_id: obtained_peer_id,
endpoint,
id,
other_established_connection_ids,
concurrent_dial_errors,
established_in,
});
}
task::PendingConnectionEvent::PendingFailed { id, error } => {
Expand All @@ -798,6 +807,7 @@ where
handler,
endpoint,
abort_notifier: _,
accepted_at: _, // Ignoring the time it took for the connection to fail.
}) = self.pending.remove(&id)
{
self.counters.dec_pending(&endpoint);
Expand Down
4 changes: 4 additions & 0 deletions swarm/src/lib.rs
Expand Up @@ -191,6 +191,8 @@ pub enum SwarmEvent<TBehaviourOutEvent, THandlerErr> {
/// Addresses are dialed concurrently. Contains the addresses and errors
/// of dial attempts that failed before the one successful dial.
concurrent_dial_errors: Option<Vec<(Multiaddr, TransportError<io::Error>)>>,
/// How long it took to establish this connection
established_in: std::time::Duration,
},
/// A connection with the given peer has been closed,
/// possibly as a result of an error.
Expand Down Expand Up @@ -808,6 +810,7 @@ where
endpoint,
other_established_connection_ids,
concurrent_dial_errors,
established_in,
} => {
if self.banned_peers.contains(&peer_id) {
// Mark the connection for the banned peer as banned, thus withholding any
Expand Down Expand Up @@ -848,6 +851,7 @@ where
num_established,
endpoint,
concurrent_dial_errors,
established_in,
});
}
}
Expand Down

0 comments on commit f8f19ba

Please sign in to comment.