]>
Commit | Line | Data |
---|---|---|
c410bf01 DH |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* RTT/RTO calculation. | |
3 | * | |
4 | * Adapted from TCP for AF_RXRPC by David Howells ([email protected]) | |
5 | * | |
6 | * https://tools.ietf.org/html/rfc6298 | |
7 | * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 | |
8 | * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf | |
9 | */ | |
10 | ||
11 | #include <linux/net.h> | |
12 | #include "ar-internal.h" | |
13 | ||
14 | #define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) | |
15 | #define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ | |
16 | #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ | |
17 | #define rxrpc_min_rtt_wlen 300 /* As sysctl_tcp_min_rtt_wlen */ | |
18 | ||
19 | static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) | |
20 | { | |
21 | return 200; | |
22 | } | |
23 | ||
24 | static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) | |
25 | { | |
26 | return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); | |
27 | } | |
28 | ||
29 | static u32 rxrpc_bound_rto(u32 rto) | |
30 | { | |
31 | return min(rto, RXRPC_RTO_MAX); | |
32 | } | |
33 | ||
34 | /* | |
35 | * Called to compute a smoothed rtt estimate. The data fed to this | |
36 | * routine either comes from timestamps, or from segments that were | |
37 | * known _not_ to have been retransmitted [see Karn/Partridge | |
38 | * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 | |
39 | * piece by Van Jacobson. | |
40 | * NOTE: the next three routines used to be one big routine. | |
41 | * To save cycles in the RFC 1323 implementation it was better to break | |
42 | * it up into three procedures. -- erics | |
43 | */ | |
44 | static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) | |
45 | { | |
46 | long m = sample_rtt_us; /* RTT */ | |
47 | u32 srtt = peer->srtt_us; | |
48 | ||
49 | /* The following amusing code comes from Jacobson's | |
50 | * article in SIGCOMM '88. Note that rtt and mdev | |
51 | * are scaled versions of rtt and mean deviation. | |
52 | * This is designed to be as fast as possible | |
53 | * m stands for "measurement". | |
54 | * | |
55 | * On a 1990 paper the rto value is changed to: | |
56 | * RTO = rtt + 4 * mdev | |
57 | * | |
58 | * Funny. This algorithm seems to be very broken. | |
59 | * These formulae increase RTO, when it should be decreased, increase | |
60 | * too slowly, when it should be increased quickly, decrease too quickly | |
61 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely | |
62 | * does not matter how to _calculate_ it. Seems, it was trap | |
63 | * that VJ failed to avoid. 8) | |
64 | */ | |
65 | if (srtt != 0) { | |
66 | m -= (srtt >> 3); /* m is now error in rtt est */ | |
67 | srtt += m; /* rtt = 7/8 rtt + 1/8 new */ | |
68 | if (m < 0) { | |
69 | m = -m; /* m is now abs(error) */ | |
70 | m -= (peer->mdev_us >> 2); /* similar update on mdev */ | |
71 | /* This is similar to one of Eifel findings. | |
72 | * Eifel blocks mdev updates when rtt decreases. | |
73 | * This solution is a bit different: we use finer gain | |
74 | * for mdev in this case (alpha*beta). | |
75 | * Like Eifel it also prevents growth of rto, | |
76 | * but also it limits too fast rto decreases, | |
77 | * happening in pure Eifel. | |
78 | */ | |
79 | if (m > 0) | |
80 | m >>= 3; | |
81 | } else { | |
82 | m -= (peer->mdev_us >> 2); /* similar update on mdev */ | |
83 | } | |
84 | ||
85 | peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ | |
86 | if (peer->mdev_us > peer->mdev_max_us) { | |
87 | peer->mdev_max_us = peer->mdev_us; | |
88 | if (peer->mdev_max_us > peer->rttvar_us) | |
89 | peer->rttvar_us = peer->mdev_max_us; | |
90 | } | |
91 | } else { | |
92 | /* no previous measure. */ | |
93 | srtt = m << 3; /* take the measured time to be rtt */ | |
94 | peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ | |
95 | peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); | |
96 | peer->mdev_max_us = peer->rttvar_us; | |
97 | } | |
98 | ||
99 | peer->srtt_us = max(1U, srtt); | |
100 | } | |
101 | ||
102 | /* | |
103 | * Calculate rto without backoff. This is the second half of Van Jacobson's | |
104 | * routine referred to above. | |
105 | */ | |
106 | static void rxrpc_set_rto(struct rxrpc_peer *peer) | |
107 | { | |
108 | u32 rto; | |
109 | ||
110 | /* 1. If rtt variance happened to be less 50msec, it is hallucination. | |
111 | * It cannot be less due to utterly erratic ACK generation made | |
112 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ | |
113 | * to do with delayed acks, because at cwnd>2 true delack timeout | |
114 | * is invisible. Actually, Linux-2.4 also generates erratic | |
115 | * ACKs in some circumstances. | |
116 | */ | |
117 | rto = __rxrpc_set_rto(peer); | |
118 | ||
119 | /* 2. Fixups made earlier cannot be right. | |
120 | * If we do not estimate RTO correctly without them, | |
121 | * all the algo is pure shit and should be replaced | |
122 | * with correct one. It is exactly, which we pretend to do. | |
123 | */ | |
124 | ||
125 | /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo | |
126 | * guarantees that rto is higher. | |
127 | */ | |
128 | peer->rto_j = rxrpc_bound_rto(rto); | |
129 | } | |
130 | ||
131 | static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) | |
132 | { | |
133 | if (rtt_us < 0) | |
134 | return; | |
135 | ||
136 | //rxrpc_update_rtt_min(peer, rtt_us); | |
137 | rxrpc_rtt_estimator(peer, rtt_us); | |
138 | rxrpc_set_rto(peer); | |
139 | ||
140 | /* RFC6298: only reset backoff on valid RTT measurement. */ | |
141 | peer->backoff = 0; | |
142 | } | |
143 | ||
144 | /* | |
145 | * Add RTT information to cache. This is called in softirq mode and has | |
146 | * exclusive access to the peer RTT data. | |
147 | */ | |
148 | void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, | |
149 | rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, | |
150 | ktime_t send_time, ktime_t resp_time) | |
151 | { | |
152 | struct rxrpc_peer *peer = call->peer; | |
153 | s64 rtt_us; | |
154 | ||
155 | rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); | |
156 | if (rtt_us < 0) | |
157 | return; | |
158 | ||
159 | spin_lock(&peer->rtt_input_lock); | |
160 | rxrpc_ack_update_rtt(peer, rtt_us); | |
161 | if (peer->rtt_count < 3) | |
162 | peer->rtt_count++; | |
163 | spin_unlock(&peer->rtt_input_lock); | |
164 | ||
165 | trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, | |
166 | peer->srtt_us >> 3, peer->rto_j); | |
167 | } | |
168 | ||
169 | /* | |
170 | * Get the retransmission timeout to set in jiffies, backing it off each time | |
171 | * we retransmit. | |
172 | */ | |
173 | unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) | |
174 | { | |
175 | u64 timo_j; | |
176 | u8 backoff = READ_ONCE(peer->backoff); | |
177 | ||
178 | timo_j = peer->rto_j; | |
179 | timo_j <<= backoff; | |
180 | if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) | |
181 | WRITE_ONCE(peer->backoff, backoff + 1); | |
182 | ||
183 | if (timo_j < 1) | |
184 | timo_j = 1; | |
185 | ||
186 | return timo_j; | |
187 | } | |
188 | ||
189 | void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) | |
190 | { | |
191 | peer->rto_j = RXRPC_TIMEOUT_INIT; | |
192 | peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); | |
193 | peer->backoff = 0; | |
194 | //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); | |
195 | } |