]>
Commit | Line | Data |
---|---|---|
00e0f34c AG |
1 | /* |
2 | * Copyright (c) 2006 Oracle. All rights reserved. | |
3 | * | |
4 | * This software is available to you under a choice of one of two | |
5 | * licenses. You may choose to be licensed under the terms of the GNU | |
6 | * General Public License (GPL) Version 2, available from the file | |
7 | * COPYING in the main directory of this source tree, or the | |
8 | * OpenIB.org BSD license below: | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or | |
11 | * without modification, are permitted provided that the following | |
12 | * conditions are met: | |
13 | * | |
14 | * - Redistributions of source code must retain the above | |
15 | * copyright notice, this list of conditions and the following | |
16 | * disclaimer. | |
17 | * | |
18 | * - Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials | |
21 | * provided with the distribution. | |
22 | * | |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
30 | * SOFTWARE. | |
31 | * | |
32 | */ | |
33 | #include <linux/kernel.h> | |
34 | #include <linux/random.h> | |
bc3b2d7f | 35 | #include <linux/export.h> |
00e0f34c AG |
36 | |
37 | #include "rds.h" | |
38 | ||
39 | /* | |
40 | * All of connection management is simplified by serializing it through | |
41 | * work queues that execute in a connection managing thread. | |
42 | * | |
43 | * TCP wants to send acks through sendpage() in response to data_ready(), | |
44 | * but it needs a process context to do so. | |
45 | * | |
46 | * The receive paths need to allocate but can't drop packets (!) so we have | |
47 | * a thread around to block allocating if the receive fast path sees an | |
48 | * allocation failure. | |
49 | */ | |
50 | ||
51 | /* Grand Unified Theory of connection life cycle: | |
52 | * At any point in time, the connection can be in one of these states: | |
53 | * DOWN, CONNECTING, UP, DISCONNECTING, ERROR | |
54 | * | |
55 | * The following transitions are possible: | |
56 | * ANY -> ERROR | |
57 | * UP -> DISCONNECTING | |
58 | * ERROR -> DISCONNECTING | |
59 | * DISCONNECTING -> DOWN | |
60 | * DOWN -> CONNECTING | |
61 | * CONNECTING -> UP | |
62 | * | |
63 | * Transition to state DISCONNECTING/DOWN: | |
64 | * - Inside the shutdown worker; synchronizes with xmit path | |
0f4b1c7e | 65 | * through RDS_IN_XMIT, and with connection management callbacks |
00e0f34c AG |
66 | * via c_cm_lock. |
67 | * | |
68 | * For receive callbacks, we rely on the underlying transport | |
69 | * (TCP, IB/RDMA) to provide the necessary synchronisation. | |
70 | */ | |
71 | struct workqueue_struct *rds_wq; | |
616b757a | 72 | EXPORT_SYMBOL_GPL(rds_wq); |
00e0f34c AG |
73 | |
74 | void rds_connect_complete(struct rds_connection *conn) | |
75 | { | |
76 | if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { | |
77 | printk(KERN_WARNING "%s: Cannot transition to state UP, " | |
78 | "current state is %d\n", | |
79 | __func__, | |
80 | atomic_read(&conn->c_state)); | |
310886dd | 81 | rds_conn_drop(conn); |
00e0f34c AG |
82 | return; |
83 | } | |
84 | ||
85 | rdsdebug("conn %p for %pI4 to %pI4 complete\n", | |
86 | conn, &conn->c_laddr, &conn->c_faddr); | |
87 | ||
88 | conn->c_reconnect_jiffies = 0; | |
89 | set_bit(0, &conn->c_map_queued); | |
90 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | |
91 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); | |
92 | } | |
616b757a | 93 | EXPORT_SYMBOL_GPL(rds_connect_complete); |
00e0f34c AG |
94 | |
95 | /* | |
96 | * This random exponential backoff is relied on to eventually resolve racing | |
97 | * connects. | |
98 | * | |
99 | * If connect attempts race then both parties drop both connections and come | |
100 | * here to wait for a random amount of time before trying again. Eventually | |
101 | * the backoff range will be so much greater than the time it takes to | |
102 | * establish a connection that one of the pair will establish the connection | |
103 | * before the other's random delay fires. | |
104 | * | |
105 | * Connection attempts that arrive while a connection is already established | |
106 | * are also considered to be racing connects. This lets a connection from | |
107 | * a rebooted machine replace an existing stale connection before the transport | |
108 | * notices that the connection has failed. | |
109 | * | |
110 | * We should *always* start with a random backoff; otherwise a broken connection | |
111 | * will always take several iterations to be re-established. | |
112 | */ | |
2dc39357 | 113 | void rds_queue_reconnect(struct rds_connection *conn) |
00e0f34c AG |
114 | { |
115 | unsigned long rand; | |
116 | ||
117 | rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n", | |
118 | conn, &conn->c_laddr, &conn->c_faddr, | |
119 | conn->c_reconnect_jiffies); | |
120 | ||
121 | set_bit(RDS_RECONNECT_PENDING, &conn->c_flags); | |
122 | if (conn->c_reconnect_jiffies == 0) { | |
123 | conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; | |
124 | queue_delayed_work(rds_wq, &conn->c_conn_w, 0); | |
125 | return; | |
126 | } | |
127 | ||
128 | get_random_bytes(&rand, sizeof(rand)); | |
129 | rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", | |
130 | rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies, | |
131 | conn, &conn->c_laddr, &conn->c_faddr); | |
132 | queue_delayed_work(rds_wq, &conn->c_conn_w, | |
133 | rand % conn->c_reconnect_jiffies); | |
134 | ||
135 | conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2, | |
136 | rds_sysctl_reconnect_max_jiffies); | |
137 | } | |
138 | ||
139 | void rds_connect_worker(struct work_struct *work) | |
140 | { | |
141 | struct rds_connection *conn = container_of(work, struct rds_connection, c_conn_w.work); | |
142 | int ret; | |
143 | ||
144 | clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags); | |
145 | if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { | |
146 | ret = conn->c_trans->conn_connect(conn); | |
147 | rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", | |
148 | conn, &conn->c_laddr, &conn->c_faddr, ret); | |
149 | ||
150 | if (ret) { | |
151 | if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) | |
152 | rds_queue_reconnect(conn); | |
153 | else | |
154 | rds_conn_error(conn, "RDS: connect failed\n"); | |
155 | } | |
156 | } | |
157 | } | |
158 | ||
00e0f34c AG |
159 | void rds_send_worker(struct work_struct *work) |
160 | { | |
161 | struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work); | |
162 | int ret; | |
163 | ||
164 | if (rds_conn_state(conn) == RDS_CONN_UP) { | |
165 | ret = rds_send_xmit(conn); | |
166 | rdsdebug("conn %p ret %d\n", conn, ret); | |
167 | switch (ret) { | |
168 | case -EAGAIN: | |
169 | rds_stats_inc(s_send_immediate_retry); | |
170 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | |
171 | break; | |
172 | case -ENOMEM: | |
173 | rds_stats_inc(s_send_delayed_retry); | |
174 | queue_delayed_work(rds_wq, &conn->c_send_w, 2); | |
175 | default: | |
176 | break; | |
177 | } | |
178 | } | |
179 | } | |
180 | ||
181 | void rds_recv_worker(struct work_struct *work) | |
182 | { | |
183 | struct rds_connection *conn = container_of(work, struct rds_connection, c_recv_w.work); | |
184 | int ret; | |
185 | ||
186 | if (rds_conn_state(conn) == RDS_CONN_UP) { | |
187 | ret = conn->c_trans->recv(conn); | |
188 | rdsdebug("conn %p ret %d\n", conn, ret); | |
189 | switch (ret) { | |
190 | case -EAGAIN: | |
191 | rds_stats_inc(s_recv_immediate_retry); | |
192 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); | |
193 | break; | |
194 | case -ENOMEM: | |
195 | rds_stats_inc(s_recv_delayed_retry); | |
196 | queue_delayed_work(rds_wq, &conn->c_recv_w, 2); | |
197 | default: | |
198 | break; | |
199 | } | |
200 | } | |
201 | } | |
202 | ||
2dc39357 AG |
203 | void rds_shutdown_worker(struct work_struct *work) |
204 | { | |
205 | struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w); | |
206 | ||
207 | rds_conn_shutdown(conn); | |
208 | } | |
209 | ||
00e0f34c AG |
210 | void rds_threads_exit(void) |
211 | { | |
212 | destroy_workqueue(rds_wq); | |
213 | } | |
214 | ||
ef87b7ea | 215 | int rds_threads_init(void) |
00e0f34c | 216 | { |
80c51be5 | 217 | rds_wq = create_singlethread_workqueue("krdsd"); |
8690bfa1 | 218 | if (!rds_wq) |
00e0f34c AG |
219 | return -ENOMEM; |
220 | ||
221 | return 0; | |
222 | } |