Skip to content

Commit 163115a

Browse files
authored
Add metrics to measure the time spend on EC checkpointing (#962)
Meter the time spent on EC checkpointing tagged by status
1 parent 1b56aa8 commit 163115a

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

host.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"github.com/filecoin-project/go-f3/manifest"
2121
pubsub "github.com/libp2p/go-libp2p-pubsub"
2222
"github.com/libp2p/go-libp2p/core/peer"
23+
"go.opentelemetry.io/otel/metric"
2324
"go.uber.org/multierr"
2425
"golang.org/x/sync/errgroup"
2526
)
@@ -287,13 +288,16 @@ func (h *gpbftRunner) Start(ctx context.Context) (_err error) {
287288
return errors.New("cert store subscription to finalize tipsets was closed unexpectedly")
288289
}
289290
if h.manifest.EC.Finalize {
291+
start := time.Now()
290292
key := cert.ECChain.Head().Key
291-
if err := h.ec.Finalize(h.runningCtx, key); err != nil {
293+
err := h.ec.Finalize(h.runningCtx, key)
294+
if err != nil {
292295
// There is not much we can do here other than logging. The next instance start
293296
// will effectively retry checkpointing the latest finalized tipset. This error
294297
// will not impact the selection of next instance chain.
295298
log.Errorf("error while finalizing decision at EC: %+v", err)
296299
}
300+
metrics.ecFinalizeTime.Record(context.Background(), time.Since(start).Seconds(), metric.WithAttributes(attrStatusFromErr(err)))
297301
} else {
298302
ts := cert.ECChain.Head()
299303
log.Debugw("skipping finalization of a new head because the current manifest specifies that tipsets should not be finalized",

metrics.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ var metrics = struct {
5454
partialMessagesDropped metric.Int64Counter
5555
partialMessageInstances metric.Int64UpDownCounter
5656
partialValidationCache metric.Int64Counter
57+
ecFinalizeTime metric.Float64Histogram
5758
}{
5859
headDiverged: measurements.Must(meter.Int64Counter("f3_head_diverged", metric.WithDescription("Number of times we encountered the head has diverged from base scenario."))),
5960
reconfigured: measurements.Must(meter.Int64Counter("f3_reconfigured", metric.WithDescription("Number of times we reconfigured due to new manifest being delivered."))),
@@ -85,6 +86,11 @@ var metrics = struct {
8586
metric.WithDescription("Number of instances with partial GPBFT messages pending fulfilment."))),
8687
partialValidationCache: measurements.Must(meter.Int64Counter("f3_partial_validation_cache",
8788
metric.WithDescription("The number of times partial validation cache resulted in hit or miss."))),
89+
ecFinalizeTime: measurements.Must(meter.Float64Histogram("f3_ec_finalize_time",
90+
metric.WithDescription("Histogram of the number of seconds spent checkpointing an F3 decision in EC tagged by status"),
91+
metric.WithExplicitBucketBoundaries(0.001, 0.002, 0.003, 0.005, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1.0, 10.0),
92+
metric.WithUnit("s"),
93+
)),
8894
}
8995

9096
func recordValidatedMessage(ctx context.Context, msg gpbft.ValidatedMessage) {

0 commit comments

Comments
 (0)