]> Git Repo - linux.git/blob - net/rxrpc/input_rack.c
Linux 6.14-rc3
[linux.git] / net / rxrpc / input_rack.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* RACK-TLP [RFC8958] Implementation
3  *
4  * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells ([email protected])
6  */
7
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10 #include "ar-internal.h"
11
12 static bool rxrpc_rack_sent_after(ktime_t t1, rxrpc_seq_t seq1,
13                                   ktime_t t2, rxrpc_seq_t seq2)
14 {
15         if (ktime_after(t1, t2))
16                 return true;
17         return t1 == t2 && after(seq1, seq2);
18 }
19
20 /*
21  * Mark a packet lost.
22  */
23 static void rxrpc_rack_mark_lost(struct rxrpc_call *call,
24                                  struct rxrpc_txqueue *tq, unsigned int ix)
25 {
26         if (__test_and_set_bit(ix, &tq->segment_lost)) {
27                 if (__test_and_clear_bit(ix, &tq->segment_retransmitted))
28                         call->tx_nr_resent--;
29         } else {
30                 call->tx_nr_lost++;
31         }
32         tq->segment_xmit_ts[ix] = UINT_MAX;
33 }
34
35 /*
36  * Get the transmission time of a packet in the Tx queue.
37  */
38 static ktime_t rxrpc_get_xmit_ts(const struct rxrpc_txqueue *tq, unsigned int ix)
39 {
40         if (tq->segment_xmit_ts[ix] == UINT_MAX)
41                 return KTIME_MAX;
42         return ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]);
43 }
44
45 /*
46  * Get a bitmask of nack bits for a queue segment and mask off any that aren't
47  * yet reported.
48  */
49 static unsigned long rxrpc_tq_nacks(const struct rxrpc_txqueue *tq)
50 {
51         unsigned long nacks = ~tq->segment_acked;
52
53         if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE)
54                 nacks &= (1UL << tq->nr_reported_acks) - 1;
55         return nacks;
56 }
57
58 /*
59  * Update the RACK state for the most recently sent packet that has been
60  * delivered [RFC8958 6.2 Step 2].
61  */
62 static void rxrpc_rack_update(struct rxrpc_call *call,
63                               struct rxrpc_ack_summary *summary,
64                               struct rxrpc_txqueue *tq,
65                               unsigned int ix)
66 {
67         rxrpc_seq_t seq = tq->qbase + ix;
68         ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
69         ktime_t rtt = ktime_sub(call->acks_latest_ts, xmit_ts);
70
71         if (__test_and_clear_bit(ix, &tq->segment_lost))
72                 call->tx_nr_lost--;
73
74         if (test_bit(ix, &tq->segment_retransmitted)) {
75                 /* Use Rx.serial instead of TCP.ACK.ts_option.echo_reply. */
76                 if (before(call->acks_highest_serial, tq->segment_serial[ix]))
77                         return;
78                 if (rtt < minmax_get(&call->min_rtt))
79                         return;
80         }
81
82         /* The RACK algorithm requires the segment ACKs to be traversed in
83          * order of segment transmission - but the only thing this seems to
84          * matter for is that RACK.rtt is set to the rtt of the most recently
85          * transmitted segment.  We should be able to achieve the same by only
86          * setting RACK.rtt if the xmit time is greater.
87          */
88         if (ktime_after(xmit_ts, call->rack_rtt_ts)) {
89                 call->rack_rtt    = rtt;
90                 call->rack_rtt_ts = xmit_ts;
91         }
92
93         if (rxrpc_rack_sent_after(xmit_ts, seq, call->rack_xmit_ts, call->rack_end_seq)) {
94                 call->rack_rtt = rtt;
95                 call->rack_xmit_ts = xmit_ts;
96                 call->rack_end_seq = seq;
97         }
98 }
99
100 /*
101  * Detect data segment reordering [RFC8958 6.2 Step 3].
102  */
103 static void rxrpc_rack_detect_reordering(struct rxrpc_call *call,
104                                          struct rxrpc_ack_summary *summary,
105                                          struct rxrpc_txqueue *tq,
106                                          unsigned int ix)
107 {
108         rxrpc_seq_t seq = tq->qbase + ix;
109
110         /* Track the highest sequence number so far ACK'd.  This is not
111          * necessarily the same as ack.firstPacket + ack.nAcks - 1 as the peer
112          * could put a NACK in the last SACK slot.
113          */
114         if (after(seq, call->rack_fack))
115                 call->rack_fack = seq;
116         else if (before(seq, call->rack_fack) &&
117                  test_bit(ix, &tq->segment_retransmitted))
118                 call->rack_reordering_seen = true;
119 }
120
121 void rxrpc_input_rack_one(struct rxrpc_call *call,
122                           struct rxrpc_ack_summary *summary,
123                           struct rxrpc_txqueue *tq,
124                           unsigned int ix)
125 {
126         rxrpc_rack_update(call, summary, tq, ix);
127         rxrpc_rack_detect_reordering(call, summary, tq, ix);
128 }
129
130 void rxrpc_input_rack(struct rxrpc_call *call,
131                       struct rxrpc_ack_summary *summary,
132                       struct rxrpc_txqueue *tq,
133                       unsigned long new_acks)
134 {
135         while (new_acks) {
136                 unsigned int ix = __ffs(new_acks);
137
138                 __clear_bit(ix, &new_acks);
139                 rxrpc_input_rack_one(call, summary, tq, ix);
140         }
141
142         trace_rxrpc_rack_update(call, summary);
143 }
144
145 /*
146  * Update the reordering window [RFC8958 6.2 Step 4].  Returns the updated
147  * duration of the reordering window.
148  *
149  * Note that the Rx protocol doesn't have a 'DSACK option' per se, but ACKs can
150  * be given a 'DUPLICATE' reason with the serial number referring to the
151  * duplicated DATA packet.  Rx does not inform as to whether this was a
152  * reception of the same packet twice or of a retransmission of a packet we
153  * already received (though this could be determined by the transmitter based
154  * on the serial number).
155  */
156 static ktime_t rxrpc_rack_update_reo_wnd(struct rxrpc_call *call,
157                                          struct rxrpc_ack_summary *summary)
158 {
159         rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */
160         rxrpc_seq_t snd_nxt = call->tx_transmitted + 1; /* Next seq to be sent */
161         bool have_dsack_option = summary->ack_reason == RXRPC_ACK_DUPLICATE;
162         int dup_thresh = 3;
163
164         /* DSACK-based reordering window adaptation */
165         if (!call->rack_dsack_round_none &&
166             after_eq(snd_una, call->rack_dsack_round))
167                 call->rack_dsack_round_none = true;
168
169         /* Grow the reordering window per round that sees DSACK.  Reset the
170          * window after 16 DSACK-free recoveries.
171          */
172         if (call->rack_dsack_round_none && have_dsack_option) {
173                 call->rack_dsack_round_none = false;
174                 call->rack_dsack_round = snd_nxt;
175                 call->rack_reo_wnd_mult++;
176                 call->rack_reo_wnd_persist = 16;
177         } else if (summary->exiting_fast_or_rto_recovery) {
178                 call->rack_reo_wnd_persist--;
179                 if (call->rack_reo_wnd_persist <= 0)
180                         call->rack_reo_wnd_mult = 1;
181         }
182
183         if (!call->rack_reordering_seen) {
184                 if (summary->in_fast_or_rto_recovery)
185                         return 0;
186                 if (call->acks_nr_sacks >= dup_thresh)
187                         return 0;
188         }
189
190         return us_to_ktime(umin(call->rack_reo_wnd_mult * minmax_get(&call->min_rtt) / 4,
191                                 call->srtt_us >> 3));
192 }
193
194 /*
195  * Detect losses [RFC8958 6.2 Step 5].
196  */
197 static ktime_t rxrpc_rack_detect_loss(struct rxrpc_call *call,
198                                       struct rxrpc_ack_summary *summary)
199 {
200         struct rxrpc_txqueue *tq;
201         ktime_t timeout = 0, lost_after, now = ktime_get_real();
202
203         call->rack_reo_wnd = rxrpc_rack_update_reo_wnd(call, summary);
204         lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd);
205         trace_rxrpc_rack_scan_loss(call);
206
207         for (tq = call->tx_queue; tq; tq = tq->next) {
208                 unsigned long nacks = rxrpc_tq_nacks(tq);
209
210                 if (after(tq->qbase, call->tx_transmitted))
211                         break;
212                 trace_rxrpc_rack_scan_loss_tq(call, tq, nacks);
213
214                 /* Skip ones marked lost but not yet retransmitted */
215                 nacks &= ~tq->segment_lost | tq->segment_retransmitted;
216
217                 while (nacks) {
218                         unsigned int ix = __ffs(nacks);
219                         rxrpc_seq_t seq = tq->qbase + ix;
220                         ktime_t remaining;
221                         ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
222
223                         __clear_bit(ix, &nacks);
224
225                         if (rxrpc_rack_sent_after(call->rack_xmit_ts, call->rack_end_seq,
226                                                   xmit_ts, seq)) {
227                                 remaining = ktime_sub(ktime_add(xmit_ts, lost_after), now);
228                                 if (remaining <= 0) {
229                                         rxrpc_rack_mark_lost(call, tq, ix);
230                                         trace_rxrpc_rack_detect_loss(call, summary, seq);
231                                 } else {
232                                         timeout = max(remaining, timeout);
233                                 }
234                         }
235                 }
236         }
237
238         return timeout;
239 }
240
241 /*
242  * Detect losses and set a timer to retry the detection [RFC8958 6.2 Step 5].
243  */
244 void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call,
245                                           struct rxrpc_ack_summary *summary)
246 {
247         ktime_t timeout = rxrpc_rack_detect_loss(call, summary);
248
249         if (timeout) {
250                 call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RACK_REORDER;
251                 call->rack_timo_at = ktime_add(ktime_get_real(), timeout);
252                 trace_rxrpc_rack_timer(call, timeout, false);
253                 trace_rxrpc_timer_set(call, timeout, rxrpc_timer_trace_rack_reo);
254         }
255 }
256
257 /*
258  * Handle RACK-TLP RTO expiration [RFC8958 6.3].
259  */
260 static void rxrpc_rack_mark_losses_on_rto(struct rxrpc_call *call)
261 {
262         struct rxrpc_txqueue *tq;
263         rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */
264         ktime_t lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd);
265         ktime_t deadline = ktime_sub(ktime_get_real(), lost_after);
266
267         for (tq = call->tx_queue; tq; tq = tq->next) {
268                 unsigned long unacked = ~tq->segment_acked;
269
270                 trace_rxrpc_rack_mark_loss_tq(call, tq);
271                 while (unacked) {
272                         unsigned int ix = __ffs(unacked);
273                         rxrpc_seq_t seq = tq->qbase + ix;
274                         ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix);
275
276                         if (after(seq, call->tx_transmitted))
277                                 return;
278                         __clear_bit(ix, &unacked);
279
280                         if (seq == snd_una ||
281                             ktime_before(xmit_ts, deadline))
282                                 rxrpc_rack_mark_lost(call, tq, ix);
283                 }
284         }
285 }
286
287 /*
288  * Calculate the TLP loss probe timeout (PTO) [RFC8958 7.2].
289  */
290 ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now)
291 {
292         unsigned int flight_size = rxrpc_tx_in_flight(call);
293         ktime_t rto_at = ktime_add(call->tx_last_sent,
294                                    rxrpc_get_rto_backoff(call, false));
295         ktime_t pto;
296
297         if (call->rtt_count > 0) {
298                 /* Use 2*SRTT as the timeout. */
299                 pto = ns_to_ktime(call->srtt_us * NSEC_PER_USEC / 4);
300                 if (flight_size)
301                         pto = ktime_add(pto, call->tlp_max_ack_delay);
302         } else {
303                 pto = NSEC_PER_SEC;
304         }
305
306         if (ktime_after(ktime_add(now, pto), rto_at))
307                 pto = ktime_sub(rto_at, now);
308         return pto;
309 }
310
311 /*
312  * Send a TLP loss probe on PTO expiration [RFC8958 7.3].
313  */
314 void rxrpc_tlp_send_probe(struct rxrpc_call *call)
315 {
316         unsigned int in_flight = rxrpc_tx_in_flight(call);
317
318         if (after_eq(call->acks_hard_ack, call->tx_transmitted))
319                 return; /* Everything we transmitted has been acked. */
320
321         /* There must be no other loss probe still in flight and we need to
322          * have taken a new RTT sample since last probe or the start of
323          * connection.
324          */
325         if (!call->tlp_serial &&
326             call->tlp_rtt_taken != call->rtt_taken) {
327                 call->tlp_is_retrans = false;
328                 if (after(call->send_top, call->tx_transmitted) &&
329                     rxrpc_tx_window_space(call) > 0) {
330                         /* Transmit the lowest-sequence unsent DATA */
331                         call->tx_last_serial = 0;
332                         rxrpc_transmit_some_data(call, 1, rxrpc_txdata_tlp_new_data);
333                         call->tlp_serial = call->tx_last_serial;
334                         call->tlp_seq = call->tx_transmitted;
335                         trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_transmit_new);
336                         in_flight = rxrpc_tx_in_flight(call);
337                 } else {
338                         /* Retransmit the highest-sequence DATA sent */
339                         call->tx_last_serial = 0;
340                         rxrpc_resend_tlp(call);
341                         call->tlp_is_retrans = true;
342                         trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_retransmit);
343                 }
344         } else {
345                 trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_busy);
346         }
347
348         if (in_flight != 0) {
349                 ktime_t rto = rxrpc_get_rto_backoff(call, false);
350
351                 call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RTO;
352                 call->rack_timo_at = ktime_add(ktime_get_real(), rto);
353                 trace_rxrpc_rack_timer(call, rto, false);
354                 trace_rxrpc_timer_set(call, rto, rxrpc_timer_trace_rack_rto);
355         }
356 }
357
358 /*
359  * Detect losses using the ACK of a TLP loss probe [RFC8958 7.4].
360  */
361 void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary)
362 {
363         if (!call->tlp_serial || after(call->tlp_seq, call->acks_hard_ack))
364                 return;
365
366         if (!call->tlp_is_retrans) {
367                 /* TLP of new data delivered */
368                 trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_new_data);
369                 call->tlp_serial = 0;
370         } else if (summary->ack_reason == RXRPC_ACK_DUPLICATE &&
371                    summary->acked_serial == call->tlp_serial) {
372                 /* General Case: Detected packet losses using RACK [7.4.1] */
373                 trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_dup_acked);
374                 call->tlp_serial = 0;
375         } else if (after(call->acks_hard_ack, call->tlp_seq)) {
376                 /* Repaired the single loss */
377                 trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_hard_beyond);
378                 call->tlp_serial = 0;
379                 // TODO: Invoke congestion control to react to the loss
380                 // event the probe has repaired
381         } else if (summary->tlp_probe_acked) {
382                 trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_acked);
383                 /* Special Case: Detected a single loss repaired by the loss
384                  * probe [7.4.2]
385                  */
386                 call->tlp_serial = 0;
387         } else {
388                 trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_incomplete);
389         }
390 }
391
392 /*
393  * Handle RACK timer expiration; returns true to request a resend.
394  */
395 void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by)
396 {
397         struct rxrpc_ack_summary summary = {};
398         enum rxrpc_rack_timer_mode mode = call->rack_timer_mode;
399
400         trace_rxrpc_rack_timer(call, overran_by, true);
401         call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF;
402
403         switch (mode) {
404         case RXRPC_CALL_RACKTIMER_RACK_REORDER:
405                 rxrpc_rack_detect_loss_and_arm_timer(call, &summary);
406                 break;
407         case RXRPC_CALL_RACKTIMER_TLP_PTO:
408                 rxrpc_tlp_send_probe(call);
409                 break;
410         case RXRPC_CALL_RACKTIMER_RTO:
411                 // Might need to poke the congestion algo in some way
412                 rxrpc_rack_mark_losses_on_rto(call);
413                 break;
414         //case RXRPC_CALL_RACKTIMER_ZEROWIN:
415         default:
416                 pr_warn("Unexpected rack timer %u", call->rack_timer_mode);
417         }
418 }
This page took 0.054045 seconds and 4 git commands to generate.