]> Git Repo - linux.git/blob - drivers/infiniband/hw/qib/qib_rc.c
Merge branch 'drm-next-4.10' of git://people.freedesktop.org/~agd5f/linux into drm...
[linux.git] / drivers / infiniband / hw / qib / qib_rc.c
1 /*
2  * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33
34 #include <linux/io.h>
35
36 #include "qib.h"
37
38 /* cut down ridiculously long IB macro names */
39 #define OP(x) IB_OPCODE_RC_##x
40
41 static void rc_timeout(unsigned long arg);
42
43 static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
44                        u32 psn, u32 pmtu)
45 {
46         u32 len;
47
48         len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
49         ss->sge = wqe->sg_list[0];
50         ss->sg_list = wqe->sg_list + 1;
51         ss->num_sge = wqe->wr.num_sge;
52         ss->total_len = wqe->length;
53         qib_skip_sge(ss, len, 0);
54         return wqe->length - len;
55 }
56
57 static void start_timer(struct rvt_qp *qp)
58 {
59         qp->s_flags |= RVT_S_TIMER;
60         qp->s_timer.function = rc_timeout;
61         /* 4.096 usec. * (1 << qp->timeout) */
62         qp->s_timer.expires = jiffies + qp->timeout_jiffies;
63         add_timer(&qp->s_timer);
64 }
65
66 /**
67  * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
68  * @dev: the device for this QP
69  * @qp: a pointer to the QP
70  * @ohdr: a pointer to the IB header being constructed
71  * @pmtu: the path MTU
72  *
73  * Return 1 if constructed; otherwise, return 0.
74  * Note that we are in the responder's side of the QP context.
75  * Note the QP s_lock must be held.
76  */
77 static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
78                            struct ib_other_headers *ohdr, u32 pmtu)
79 {
80         struct rvt_ack_entry *e;
81         u32 hwords;
82         u32 len;
83         u32 bth0;
84         u32 bth2;
85
86         /* Don't send an ACK if we aren't supposed to. */
87         if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
88                 goto bail;
89
90         /* header size in 32-bit words LRH+BTH = (8+12)/4. */
91         hwords = 5;
92
93         switch (qp->s_ack_state) {
94         case OP(RDMA_READ_RESPONSE_LAST):
95         case OP(RDMA_READ_RESPONSE_ONLY):
96                 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
97                 if (e->rdma_sge.mr) {
98                         rvt_put_mr(e->rdma_sge.mr);
99                         e->rdma_sge.mr = NULL;
100                 }
101                 /* FALLTHROUGH */
102         case OP(ATOMIC_ACKNOWLEDGE):
103                 /*
104                  * We can increment the tail pointer now that the last
105                  * response has been sent instead of only being
106                  * constructed.
107                  */
108                 if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC)
109                         qp->s_tail_ack_queue = 0;
110                 /* FALLTHROUGH */
111         case OP(SEND_ONLY):
112         case OP(ACKNOWLEDGE):
113                 /* Check for no next entry in the queue. */
114                 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
115                         if (qp->s_flags & RVT_S_ACK_PENDING)
116                                 goto normal;
117                         goto bail;
118                 }
119
120                 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
121                 if (e->opcode == OP(RDMA_READ_REQUEST)) {
122                         /*
123                          * If a RDMA read response is being resent and
124                          * we haven't seen the duplicate request yet,
125                          * then stop sending the remaining responses the
126                          * responder has seen until the requester resends it.
127                          */
128                         len = e->rdma_sge.sge_length;
129                         if (len && !e->rdma_sge.mr) {
130                                 qp->s_tail_ack_queue = qp->r_head_ack_queue;
131                                 goto bail;
132                         }
133                         /* Copy SGE state in case we need to resend */
134                         qp->s_rdma_mr = e->rdma_sge.mr;
135                         if (qp->s_rdma_mr)
136                                 rvt_get_mr(qp->s_rdma_mr);
137                         qp->s_ack_rdma_sge.sge = e->rdma_sge;
138                         qp->s_ack_rdma_sge.num_sge = 1;
139                         qp->s_cur_sge = &qp->s_ack_rdma_sge;
140                         if (len > pmtu) {
141                                 len = pmtu;
142                                 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
143                         } else {
144                                 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
145                                 e->sent = 1;
146                         }
147                         ohdr->u.aeth = qib_compute_aeth(qp);
148                         hwords++;
149                         qp->s_ack_rdma_psn = e->psn;
150                         bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
151                 } else {
152                         /* COMPARE_SWAP or FETCH_ADD */
153                         qp->s_cur_sge = NULL;
154                         len = 0;
155                         qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
156                         ohdr->u.at.aeth = qib_compute_aeth(qp);
157                         ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
158                         hwords += sizeof(ohdr->u.at) / sizeof(u32);
159                         bth2 = e->psn & QIB_PSN_MASK;
160                         e->sent = 1;
161                 }
162                 bth0 = qp->s_ack_state << 24;
163                 break;
164
165         case OP(RDMA_READ_RESPONSE_FIRST):
166                 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
167                 /* FALLTHROUGH */
168         case OP(RDMA_READ_RESPONSE_MIDDLE):
169                 qp->s_cur_sge = &qp->s_ack_rdma_sge;
170                 qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
171                 if (qp->s_rdma_mr)
172                         rvt_get_mr(qp->s_rdma_mr);
173                 len = qp->s_ack_rdma_sge.sge.sge_length;
174                 if (len > pmtu)
175                         len = pmtu;
176                 else {
177                         ohdr->u.aeth = qib_compute_aeth(qp);
178                         hwords++;
179                         qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
180                         e = &qp->s_ack_queue[qp->s_tail_ack_queue];
181                         e->sent = 1;
182                 }
183                 bth0 = qp->s_ack_state << 24;
184                 bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
185                 break;
186
187         default:
188 normal:
189                 /*
190                  * Send a regular ACK.
191                  * Set the s_ack_state so we wait until after sending
192                  * the ACK before setting s_ack_state to ACKNOWLEDGE
193                  * (see above).
194                  */
195                 qp->s_ack_state = OP(SEND_ONLY);
196                 qp->s_flags &= ~RVT_S_ACK_PENDING;
197                 qp->s_cur_sge = NULL;
198                 if (qp->s_nak_state)
199                         ohdr->u.aeth =
200                                 cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
201                                             (qp->s_nak_state <<
202                                              QIB_AETH_CREDIT_SHIFT));
203                 else
204                         ohdr->u.aeth = qib_compute_aeth(qp);
205                 hwords++;
206                 len = 0;
207                 bth0 = OP(ACKNOWLEDGE) << 24;
208                 bth2 = qp->s_ack_psn & QIB_PSN_MASK;
209         }
210         qp->s_rdma_ack_cnt++;
211         qp->s_hdrwords = hwords;
212         qp->s_cur_size = len;
213         qib_make_ruc_header(qp, ohdr, bth0, bth2);
214         return 1;
215
216 bail:
217         qp->s_ack_state = OP(ACKNOWLEDGE);
218         qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING);
219         return 0;
220 }
221
222 /**
223  * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
224  * @qp: a pointer to the QP
225  *
226  * Assumes the s_lock is held.
227  *
228  * Return 1 if constructed; otherwise, return 0.
229  */
230 int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
231 {
232         struct qib_qp_priv *priv = qp->priv;
233         struct qib_ibdev *dev = to_idev(qp->ibqp.device);
234         struct ib_other_headers *ohdr;
235         struct rvt_sge_state *ss;
236         struct rvt_swqe *wqe;
237         u32 hwords;
238         u32 len;
239         u32 bth0;
240         u32 bth2;
241         u32 pmtu = qp->pmtu;
242         char newreq;
243         int ret = 0;
244         int delta;
245
246         ohdr = &priv->s_hdr->u.oth;
247         if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
248                 ohdr = &priv->s_hdr->u.l.oth;
249
250         /* Sending responses has higher priority over sending requests. */
251         if ((qp->s_flags & RVT_S_RESP_PENDING) &&
252             qib_make_rc_ack(dev, qp, ohdr, pmtu))
253                 goto done;
254
255         if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
256                 if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
257                         goto bail;
258                 /* We are in the error state, flush the work request. */
259                 smp_read_barrier_depends(); /* see post_one_send() */
260                 if (qp->s_last == ACCESS_ONCE(qp->s_head))
261                         goto bail;
262                 /* If DMAs are in progress, we can't flush immediately. */
263                 if (atomic_read(&priv->s_dma_busy)) {
264                         qp->s_flags |= RVT_S_WAIT_DMA;
265                         goto bail;
266                 }
267                 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
268                 qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
269                         IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
270                 /* will get called again */
271                 goto done;
272         }
273
274         if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
275                 goto bail;
276
277         if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
278                 if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
279                         qp->s_flags |= RVT_S_WAIT_PSN;
280                         goto bail;
281                 }
282                 qp->s_sending_psn = qp->s_psn;
283                 qp->s_sending_hpsn = qp->s_psn - 1;
284         }
285
286         /* header size in 32-bit words LRH+BTH = (8+12)/4. */
287         hwords = 5;
288         bth0 = 0;
289
290         /* Send a request. */
291         wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
292         switch (qp->s_state) {
293         default:
294                 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
295                         goto bail;
296                 /*
297                  * Resend an old request or start a new one.
298                  *
299                  * We keep track of the current SWQE so that
300                  * we don't reset the "furthest progress" state
301                  * if we need to back up.
302                  */
303                 newreq = 0;
304                 if (qp->s_cur == qp->s_tail) {
305                         /* Check if send work queue is empty. */
306                         if (qp->s_tail == qp->s_head)
307                                 goto bail;
308                         /*
309                          * If a fence is requested, wait for previous
310                          * RDMA read and atomic operations to finish.
311                          */
312                         if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
313                             qp->s_num_rd_atomic) {
314                                 qp->s_flags |= RVT_S_WAIT_FENCE;
315                                 goto bail;
316                         }
317                         newreq = 1;
318                         qp->s_psn = wqe->psn;
319                 }
320                 /*
321                  * Note that we have to be careful not to modify the
322                  * original work request since we may need to resend
323                  * it.
324                  */
325                 len = wqe->length;
326                 ss = &qp->s_sge;
327                 bth2 = qp->s_psn & QIB_PSN_MASK;
328                 switch (wqe->wr.opcode) {
329                 case IB_WR_SEND:
330                 case IB_WR_SEND_WITH_IMM:
331                         /* If no credit, return. */
332                         if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
333                             qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
334                                 qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
335                                 goto bail;
336                         }
337                         if (len > pmtu) {
338                                 qp->s_state = OP(SEND_FIRST);
339                                 len = pmtu;
340                                 break;
341                         }
342                         if (wqe->wr.opcode == IB_WR_SEND)
343                                 qp->s_state = OP(SEND_ONLY);
344                         else {
345                                 qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
346                                 /* Immediate data comes after the BTH */
347                                 ohdr->u.imm_data = wqe->wr.ex.imm_data;
348                                 hwords += 1;
349                         }
350                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
351                                 bth0 |= IB_BTH_SOLICITED;
352                         bth2 |= IB_BTH_REQ_ACK;
353                         if (++qp->s_cur == qp->s_size)
354                                 qp->s_cur = 0;
355                         break;
356
357                 case IB_WR_RDMA_WRITE:
358                         if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
359                                 qp->s_lsn++;
360                         /* FALLTHROUGH */
361                 case IB_WR_RDMA_WRITE_WITH_IMM:
362                         /* If no credit, return. */
363                         if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
364                             qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
365                                 qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
366                                 goto bail;
367                         }
368
369                         ohdr->u.rc.reth.vaddr =
370                                 cpu_to_be64(wqe->rdma_wr.remote_addr);
371                         ohdr->u.rc.reth.rkey =
372                                 cpu_to_be32(wqe->rdma_wr.rkey);
373                         ohdr->u.rc.reth.length = cpu_to_be32(len);
374                         hwords += sizeof(struct ib_reth) / sizeof(u32);
375                         if (len > pmtu) {
376                                 qp->s_state = OP(RDMA_WRITE_FIRST);
377                                 len = pmtu;
378                                 break;
379                         }
380                         if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE)
381                                 qp->s_state = OP(RDMA_WRITE_ONLY);
382                         else {
383                                 qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
384                                 /* Immediate data comes after RETH */
385                                 ohdr->u.rc.imm_data =
386                                         wqe->rdma_wr.wr.ex.imm_data;
387                                 hwords += 1;
388                                 if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED)
389                                         bth0 |= IB_BTH_SOLICITED;
390                         }
391                         bth2 |= IB_BTH_REQ_ACK;
392                         if (++qp->s_cur == qp->s_size)
393                                 qp->s_cur = 0;
394                         break;
395
396                 case IB_WR_RDMA_READ:
397                         /*
398                          * Don't allow more operations to be started
399                          * than the QP limits allow.
400                          */
401                         if (newreq) {
402                                 if (qp->s_num_rd_atomic >=
403                                     qp->s_max_rd_atomic) {
404                                         qp->s_flags |= RVT_S_WAIT_RDMAR;
405                                         goto bail;
406                                 }
407                                 qp->s_num_rd_atomic++;
408                                 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
409                                         qp->s_lsn++;
410                         }
411
412                         ohdr->u.rc.reth.vaddr =
413                                 cpu_to_be64(wqe->rdma_wr.remote_addr);
414                         ohdr->u.rc.reth.rkey =
415                                 cpu_to_be32(wqe->rdma_wr.rkey);
416                         ohdr->u.rc.reth.length = cpu_to_be32(len);
417                         qp->s_state = OP(RDMA_READ_REQUEST);
418                         hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
419                         ss = NULL;
420                         len = 0;
421                         bth2 |= IB_BTH_REQ_ACK;
422                         if (++qp->s_cur == qp->s_size)
423                                 qp->s_cur = 0;
424                         break;
425
426                 case IB_WR_ATOMIC_CMP_AND_SWP:
427                 case IB_WR_ATOMIC_FETCH_AND_ADD:
428                         /*
429                          * Don't allow more operations to be started
430                          * than the QP limits allow.
431                          */
432                         if (newreq) {
433                                 if (qp->s_num_rd_atomic >=
434                                     qp->s_max_rd_atomic) {
435                                         qp->s_flags |= RVT_S_WAIT_RDMAR;
436                                         goto bail;
437                                 }
438                                 qp->s_num_rd_atomic++;
439                                 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
440                                         qp->s_lsn++;
441                         }
442                         if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
443                                 qp->s_state = OP(COMPARE_SWAP);
444                                 put_ib_ateth_swap(wqe->atomic_wr.swap,
445                                                   &ohdr->u.atomic_eth);
446                                 put_ib_ateth_swap(wqe->atomic_wr.compare_add,
447                                                   &ohdr->u.atomic_eth);
448                         } else {
449                                 qp->s_state = OP(FETCH_ADD);
450                                 put_ib_ateth_swap(wqe->atomic_wr.compare_add,
451                                                   &ohdr->u.atomic_eth);
452                                 put_ib_ateth_swap(0, &ohdr->u.atomic_eth);
453                         }
454                         put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
455                                            &ohdr->u.atomic_eth);
456                         ohdr->u.atomic_eth.rkey = cpu_to_be32(
457                                 wqe->atomic_wr.rkey);
458                         hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
459                         ss = NULL;
460                         len = 0;
461                         bth2 |= IB_BTH_REQ_ACK;
462                         if (++qp->s_cur == qp->s_size)
463                                 qp->s_cur = 0;
464                         break;
465
466                 default:
467                         goto bail;
468                 }
469                 qp->s_sge.sge = wqe->sg_list[0];
470                 qp->s_sge.sg_list = wqe->sg_list + 1;
471                 qp->s_sge.num_sge = wqe->wr.num_sge;
472                 qp->s_sge.total_len = wqe->length;
473                 qp->s_len = wqe->length;
474                 if (newreq) {
475                         qp->s_tail++;
476                         if (qp->s_tail >= qp->s_size)
477                                 qp->s_tail = 0;
478                 }
479                 if (wqe->wr.opcode == IB_WR_RDMA_READ)
480                         qp->s_psn = wqe->lpsn + 1;
481                 else
482                         qp->s_psn++;
483                 break;
484
485         case OP(RDMA_READ_RESPONSE_FIRST):
486                 /*
487                  * qp->s_state is normally set to the opcode of the
488                  * last packet constructed for new requests and therefore
489                  * is never set to RDMA read response.
490                  * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
491                  * thread to indicate a SEND needs to be restarted from an
492                  * earlier PSN without interferring with the sending thread.
493                  * See qib_restart_rc().
494                  */
495                 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
496                 /* FALLTHROUGH */
497         case OP(SEND_FIRST):
498                 qp->s_state = OP(SEND_MIDDLE);
499                 /* FALLTHROUGH */
500         case OP(SEND_MIDDLE):
501                 bth2 = qp->s_psn++ & QIB_PSN_MASK;
502                 ss = &qp->s_sge;
503                 len = qp->s_len;
504                 if (len > pmtu) {
505                         len = pmtu;
506                         break;
507                 }
508                 if (wqe->wr.opcode == IB_WR_SEND)
509                         qp->s_state = OP(SEND_LAST);
510                 else {
511                         qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
512                         /* Immediate data comes after the BTH */
513                         ohdr->u.imm_data = wqe->wr.ex.imm_data;
514                         hwords += 1;
515                 }
516                 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
517                         bth0 |= IB_BTH_SOLICITED;
518                 bth2 |= IB_BTH_REQ_ACK;
519                 qp->s_cur++;
520                 if (qp->s_cur >= qp->s_size)
521                         qp->s_cur = 0;
522                 break;
523
524         case OP(RDMA_READ_RESPONSE_LAST):
525                 /*
526                  * qp->s_state is normally set to the opcode of the
527                  * last packet constructed for new requests and therefore
528                  * is never set to RDMA read response.
529                  * RDMA_READ_RESPONSE_LAST is used by the ACK processing
530                  * thread to indicate a RDMA write needs to be restarted from
531                  * an earlier PSN without interferring with the sending thread.
532                  * See qib_restart_rc().
533                  */
534                 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
535                 /* FALLTHROUGH */
536         case OP(RDMA_WRITE_FIRST):
537                 qp->s_state = OP(RDMA_WRITE_MIDDLE);
538                 /* FALLTHROUGH */
539         case OP(RDMA_WRITE_MIDDLE):
540                 bth2 = qp->s_psn++ & QIB_PSN_MASK;
541                 ss = &qp->s_sge;
542                 len = qp->s_len;
543                 if (len > pmtu) {
544                         len = pmtu;
545                         break;
546                 }
547                 if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
548                         qp->s_state = OP(RDMA_WRITE_LAST);
549                 else {
550                         qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
551                         /* Immediate data comes after the BTH */
552                         ohdr->u.imm_data = wqe->wr.ex.imm_data;
553                         hwords += 1;
554                         if (wqe->wr.send_flags & IB_SEND_SOLICITED)
555                                 bth0 |= IB_BTH_SOLICITED;
556                 }
557                 bth2 |= IB_BTH_REQ_ACK;
558                 qp->s_cur++;
559                 if (qp->s_cur >= qp->s_size)
560                         qp->s_cur = 0;
561                 break;
562
563         case OP(RDMA_READ_RESPONSE_MIDDLE):
564                 /*
565                  * qp->s_state is normally set to the opcode of the
566                  * last packet constructed for new requests and therefore
567                  * is never set to RDMA read response.
568                  * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
569                  * thread to indicate a RDMA read needs to be restarted from
570                  * an earlier PSN without interferring with the sending thread.
571                  * See qib_restart_rc().
572                  */
573                 len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
574                 ohdr->u.rc.reth.vaddr =
575                         cpu_to_be64(wqe->rdma_wr.remote_addr + len);
576                 ohdr->u.rc.reth.rkey =
577                         cpu_to_be32(wqe->rdma_wr.rkey);
578                 ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
579                 qp->s_state = OP(RDMA_READ_REQUEST);
580                 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
581                 bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK;
582                 qp->s_psn = wqe->lpsn + 1;
583                 ss = NULL;
584                 len = 0;
585                 qp->s_cur++;
586                 if (qp->s_cur == qp->s_size)
587                         qp->s_cur = 0;
588                 break;
589         }
590         qp->s_sending_hpsn = bth2;
591         delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
592         if (delta && delta % QIB_PSN_CREDIT == 0)
593                 bth2 |= IB_BTH_REQ_ACK;
594         if (qp->s_flags & RVT_S_SEND_ONE) {
595                 qp->s_flags &= ~RVT_S_SEND_ONE;
596                 qp->s_flags |= RVT_S_WAIT_ACK;
597                 bth2 |= IB_BTH_REQ_ACK;
598         }
599         qp->s_len -= len;
600         qp->s_hdrwords = hwords;
601         qp->s_cur_sge = ss;
602         qp->s_cur_size = len;
603         qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
604 done:
605         return 1;
606 bail:
607         qp->s_flags &= ~RVT_S_BUSY;
608         return ret;
609 }
610
611 /**
612  * qib_send_rc_ack - Construct an ACK packet and send it
613  * @qp: a pointer to the QP
614  *
615  * This is called from qib_rc_rcv() and qib_kreceive().
616  * Note that RDMA reads and atomics are handled in the
617  * send side QP state and tasklet.
618  */
619 void qib_send_rc_ack(struct rvt_qp *qp)
620 {
621         struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
622         struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
623         struct qib_pportdata *ppd = ppd_from_ibp(ibp);
624         u64 pbc;
625         u16 lrh0;
626         u32 bth0;
627         u32 hwords;
628         u32 pbufn;
629         u32 __iomem *piobuf;
630         struct ib_header hdr;
631         struct ib_other_headers *ohdr;
632         u32 control;
633         unsigned long flags;
634
635         spin_lock_irqsave(&qp->s_lock, flags);
636
637         if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
638                 goto unlock;
639
640         /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
641         if ((qp->s_flags & RVT_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
642                 goto queue_ack;
643
644         /* Construct the header with s_lock held so APM doesn't change it. */
645         ohdr = &hdr.u.oth;
646         lrh0 = QIB_LRH_BTH;
647         /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
648         hwords = 6;
649         if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
650                 hwords += qib_make_grh(ibp, &hdr.u.l.grh,
651                                        &qp->remote_ah_attr.grh, hwords, 0);
652                 ohdr = &hdr.u.l.oth;
653                 lrh0 = QIB_LRH_GRH;
654         }
655         /* read pkey_index w/o lock (its atomic) */
656         bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
657         if (qp->s_mig_state == IB_MIG_MIGRATED)
658                 bth0 |= IB_BTH_MIG_REQ;
659         if (qp->r_nak_state)
660                 ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
661                                             (qp->r_nak_state <<
662                                              QIB_AETH_CREDIT_SHIFT));
663         else
664                 ohdr->u.aeth = qib_compute_aeth(qp);
665         lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
666                 qp->remote_ah_attr.sl << 4;
667         hdr.lrh[0] = cpu_to_be16(lrh0);
668         hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
669         hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
670         hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
671         ohdr->bth[0] = cpu_to_be32(bth0);
672         ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
673         ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
674
675         spin_unlock_irqrestore(&qp->s_lock, flags);
676
677         /* Don't try to send ACKs if the link isn't ACTIVE */
678         if (!(ppd->lflags & QIBL_LINKACTIVE))
679                 goto done;
680
681         control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC,
682                                        qp->s_srate, lrh0 >> 12);
683         /* length is + 1 for the control dword */
684         pbc = ((u64) control << 32) | (hwords + 1);
685
686         piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
687         if (!piobuf) {
688                 /*
689                  * We are out of PIO buffers at the moment.
690                  * Pass responsibility for sending the ACK to the
691                  * send tasklet so that when a PIO buffer becomes
692                  * available, the ACK is sent ahead of other outgoing
693                  * packets.
694                  */
695                 spin_lock_irqsave(&qp->s_lock, flags);
696                 goto queue_ack;
697         }
698
699         /*
700          * Write the pbc.
701          * We have to flush after the PBC for correctness
702          * on some cpus or WC buffer can be written out of order.
703          */
704         writeq(pbc, piobuf);
705
706         if (dd->flags & QIB_PIO_FLUSH_WC) {
707                 u32 *hdrp = (u32 *) &hdr;
708
709                 qib_flush_wc();
710                 qib_pio_copy(piobuf + 2, hdrp, hwords - 1);
711                 qib_flush_wc();
712                 __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
713         } else
714                 qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords);
715
716         if (dd->flags & QIB_USE_SPCL_TRIG) {
717                 u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
718
719                 qib_flush_wc();
720                 __raw_writel(0xaebecede, piobuf + spcl_off);
721         }
722
723         qib_flush_wc();
724         qib_sendbuf_done(dd, pbufn);
725
726         this_cpu_inc(ibp->pmastats->n_unicast_xmit);
727         goto done;
728
729 queue_ack:
730         if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
731                 this_cpu_inc(*ibp->rvp.rc_qacks);
732                 qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
733                 qp->s_nak_state = qp->r_nak_state;
734                 qp->s_ack_psn = qp->r_ack_psn;
735
736                 /* Schedule the send tasklet. */
737                 qib_schedule_send(qp);
738         }
739 unlock:
740         spin_unlock_irqrestore(&qp->s_lock, flags);
741 done:
742         return;
743 }
744
745 /**
746  * reset_psn - reset the QP state to send starting from PSN
747  * @qp: the QP
748  * @psn: the packet sequence number to restart at
749  *
750  * This is called from qib_rc_rcv() to process an incoming RC ACK
751  * for the given QP.
752  * Called at interrupt level with the QP s_lock held.
753  */
754 static void reset_psn(struct rvt_qp *qp, u32 psn)
755 {
756         u32 n = qp->s_acked;
757         struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
758         u32 opcode;
759
760         qp->s_cur = n;
761
762         /*
763          * If we are starting the request from the beginning,
764          * let the normal send code handle initialization.
765          */
766         if (qib_cmp24(psn, wqe->psn) <= 0) {
767                 qp->s_state = OP(SEND_LAST);
768                 goto done;
769         }
770
771         /* Find the work request opcode corresponding to the given PSN. */
772         opcode = wqe->wr.opcode;
773         for (;;) {
774                 int diff;
775
776                 if (++n == qp->s_size)
777                         n = 0;
778                 if (n == qp->s_tail)
779                         break;
780                 wqe = rvt_get_swqe_ptr(qp, n);
781                 diff = qib_cmp24(psn, wqe->psn);
782                 if (diff < 0)
783                         break;
784                 qp->s_cur = n;
785                 /*
786                  * If we are starting the request from the beginning,
787                  * let the normal send code handle initialization.
788                  */
789                 if (diff == 0) {
790                         qp->s_state = OP(SEND_LAST);
791                         goto done;
792                 }
793                 opcode = wqe->wr.opcode;
794         }
795
796         /*
797          * Set the state to restart in the middle of a request.
798          * Don't change the s_sge, s_cur_sge, or s_cur_size.
799          * See qib_make_rc_req().
800          */
801         switch (opcode) {
802         case IB_WR_SEND:
803         case IB_WR_SEND_WITH_IMM:
804                 qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
805                 break;
806
807         case IB_WR_RDMA_WRITE:
808         case IB_WR_RDMA_WRITE_WITH_IMM:
809                 qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
810                 break;
811
812         case IB_WR_RDMA_READ:
813                 qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
814                 break;
815
816         default:
817                 /*
818                  * This case shouldn't happen since its only
819                  * one PSN per req.
820                  */
821                 qp->s_state = OP(SEND_LAST);
822         }
823 done:
824         qp->s_psn = psn;
825         /*
826          * Set RVT_S_WAIT_PSN as qib_rc_complete() may start the timer
827          * asynchronously before the send tasklet can get scheduled.
828          * Doing it in qib_make_rc_req() is too late.
829          */
830         if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
831             (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
832                 qp->s_flags |= RVT_S_WAIT_PSN;
833 }
834
835 /*
836  * Back up requester to resend the last un-ACKed request.
837  * The QP r_lock and s_lock should be held and interrupts disabled.
838  */
839 static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
840 {
841         struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
842         struct qib_ibport *ibp;
843
844         if (qp->s_retry == 0) {
845                 if (qp->s_mig_state == IB_MIG_ARMED) {
846                         qib_migrate_qp(qp);
847                         qp->s_retry = qp->s_retry_cnt;
848                 } else if (qp->s_last == qp->s_acked) {
849                         qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
850                         rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
851                         return;
852                 } else /* XXX need to handle delayed completion */
853                         return;
854         } else
855                 qp->s_retry--;
856
857         ibp = to_iport(qp->ibqp.device, qp->port_num);
858         if (wqe->wr.opcode == IB_WR_RDMA_READ)
859                 ibp->rvp.n_rc_resends++;
860         else
861                 ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
862
863         qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
864                          RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
865                          RVT_S_WAIT_ACK);
866         if (wait)
867                 qp->s_flags |= RVT_S_SEND_ONE;
868         reset_psn(qp, psn);
869 }
870
871 /*
872  * This is called from s_timer for missing responses.
873  */
874 static void rc_timeout(unsigned long arg)
875 {
876         struct rvt_qp *qp = (struct rvt_qp *)arg;
877         struct qib_ibport *ibp;
878         unsigned long flags;
879
880         spin_lock_irqsave(&qp->r_lock, flags);
881         spin_lock(&qp->s_lock);
882         if (qp->s_flags & RVT_S_TIMER) {
883                 ibp = to_iport(qp->ibqp.device, qp->port_num);
884                 ibp->rvp.n_rc_timeouts++;
885                 qp->s_flags &= ~RVT_S_TIMER;
886                 del_timer(&qp->s_timer);
887                 qib_restart_rc(qp, qp->s_last_psn + 1, 1);
888                 qib_schedule_send(qp);
889         }
890         spin_unlock(&qp->s_lock);
891         spin_unlock_irqrestore(&qp->r_lock, flags);
892 }
893
894 /*
895  * This is called from s_timer for RNR timeouts.
896  */
897 void qib_rc_rnr_retry(unsigned long arg)
898 {
899         struct rvt_qp *qp = (struct rvt_qp *)arg;
900         unsigned long flags;
901
902         spin_lock_irqsave(&qp->s_lock, flags);
903         if (qp->s_flags & RVT_S_WAIT_RNR) {
904                 qp->s_flags &= ~RVT_S_WAIT_RNR;
905                 del_timer(&qp->s_timer);
906                 qib_schedule_send(qp);
907         }
908         spin_unlock_irqrestore(&qp->s_lock, flags);
909 }
910
911 /*
912  * Set qp->s_sending_psn to the next PSN after the given one.
913  * This would be psn+1 except when RDMA reads are present.
914  */
915 static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
916 {
917         struct rvt_swqe *wqe;
918         u32 n = qp->s_last;
919
920         /* Find the work request corresponding to the given PSN. */
921         for (;;) {
922                 wqe = rvt_get_swqe_ptr(qp, n);
923                 if (qib_cmp24(psn, wqe->lpsn) <= 0) {
924                         if (wqe->wr.opcode == IB_WR_RDMA_READ)
925                                 qp->s_sending_psn = wqe->lpsn + 1;
926                         else
927                                 qp->s_sending_psn = psn + 1;
928                         break;
929                 }
930                 if (++n == qp->s_size)
931                         n = 0;
932                 if (n == qp->s_tail)
933                         break;
934         }
935 }
936
937 /*
938  * This should be called with the QP s_lock held and interrupts disabled.
939  */
940 void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
941 {
942         struct ib_other_headers *ohdr;
943         struct rvt_swqe *wqe;
944         u32 opcode;
945         u32 psn;
946
947         if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
948                 return;
949
950         /* Find out where the BTH is */
951         if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
952                 ohdr = &hdr->u.oth;
953         else
954                 ohdr = &hdr->u.l.oth;
955
956         opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
957         if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
958             opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
959                 WARN_ON(!qp->s_rdma_ack_cnt);
960                 qp->s_rdma_ack_cnt--;
961                 return;
962         }
963
964         psn = be32_to_cpu(ohdr->bth[2]);
965         reset_sending_psn(qp, psn);
966
967         /*
968          * Start timer after a packet requesting an ACK has been sent and
969          * there are still requests that haven't been acked.
970          */
971         if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
972             !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
973             (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
974                 start_timer(qp);
975
976         while (qp->s_last != qp->s_acked) {
977                 u32 s_last;
978
979                 wqe = rvt_get_swqe_ptr(qp, qp->s_last);
980                 if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
981                     qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
982                         break;
983                 s_last = qp->s_last;
984                 if (++s_last >= qp->s_size)
985                         s_last = 0;
986                 qp->s_last = s_last;
987                 /* see post_send() */
988                 barrier();
989                 rvt_put_swqe(wqe);
990                 rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
991         }
992         /*
993          * If we were waiting for sends to complete before resending,
994          * and they are now complete, restart sending.
995          */
996         if (qp->s_flags & RVT_S_WAIT_PSN &&
997             qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
998                 qp->s_flags &= ~RVT_S_WAIT_PSN;
999                 qp->s_sending_psn = qp->s_psn;
1000                 qp->s_sending_hpsn = qp->s_psn - 1;
1001                 qib_schedule_send(qp);
1002         }
1003 }
1004
1005 static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
1006 {
1007         qp->s_last_psn = psn;
1008 }
1009
1010 /*
1011  * Generate a SWQE completion.
1012  * This is similar to qib_send_complete but has to check to be sure
1013  * that the SGEs are not being referenced if the SWQE is being resent.
1014  */
1015 static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
1016                                          struct rvt_swqe *wqe,
1017                                          struct qib_ibport *ibp)
1018 {
1019         /*
1020          * Don't decrement refcount and don't generate a
1021          * completion if the SWQE is being resent until the send
1022          * is finished.
1023          */
1024         if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
1025             qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1026                 u32 s_last;
1027
1028                 rvt_put_swqe(wqe);
1029                 s_last = qp->s_last;
1030                 if (++s_last >= qp->s_size)
1031                         s_last = 0;
1032                 qp->s_last = s_last;
1033                 /* see post_send() */
1034                 barrier();
1035                 rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
1036         } else
1037                 this_cpu_inc(*ibp->rvp.rc_delayed_comp);
1038
1039         qp->s_retry = qp->s_retry_cnt;
1040         update_last_psn(qp, wqe->lpsn);
1041
1042         /*
1043          * If we are completing a request which is in the process of
1044          * being resent, we can stop resending it since we know the
1045          * responder has already seen it.
1046          */
1047         if (qp->s_acked == qp->s_cur) {
1048                 if (++qp->s_cur >= qp->s_size)
1049                         qp->s_cur = 0;
1050                 qp->s_acked = qp->s_cur;
1051                 wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
1052                 if (qp->s_acked != qp->s_tail) {
1053                         qp->s_state = OP(SEND_LAST);
1054                         qp->s_psn = wqe->psn;
1055                 }
1056         } else {
1057                 if (++qp->s_acked >= qp->s_size)
1058                         qp->s_acked = 0;
1059                 if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
1060                         qp->s_draining = 0;
1061                 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1062         }
1063         return wqe;
1064 }
1065
1066 /**
1067  * do_rc_ack - process an incoming RC ACK
1068  * @qp: the QP the ACK came in on
1069  * @psn: the packet sequence number of the ACK
1070  * @opcode: the opcode of the request that resulted in the ACK
1071  *
1072  * This is called from qib_rc_rcv_resp() to process an incoming RC ACK
1073  * for the given QP.
1074  * Called at interrupt level with the QP s_lock held.
1075  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
1076  */
1077 static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
1078                      u64 val, struct qib_ctxtdata *rcd)
1079 {
1080         struct qib_ibport *ibp;
1081         enum ib_wc_status status;
1082         struct rvt_swqe *wqe;
1083         int ret = 0;
1084         u32 ack_psn;
1085         int diff;
1086
1087         /* Remove QP from retry timer */
1088         if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1089                 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1090                 del_timer(&qp->s_timer);
1091         }
1092
1093         /*
1094          * Note that NAKs implicitly ACK outstanding SEND and RDMA write
1095          * requests and implicitly NAK RDMA read and atomic requests issued
1096          * before the NAK'ed request.  The MSN won't include the NAK'ed
1097          * request but will include an ACK'ed request(s).
1098          */
1099         ack_psn = psn;
1100         if (aeth >> 29)
1101                 ack_psn--;
1102         wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1103         ibp = to_iport(qp->ibqp.device, qp->port_num);
1104
1105         /*
1106          * The MSN might be for a later WQE than the PSN indicates so
1107          * only complete WQEs that the PSN finishes.
1108          */
1109         while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) {
1110                 /*
1111                  * RDMA_READ_RESPONSE_ONLY is a special case since
1112                  * we want to generate completion events for everything
1113                  * before the RDMA read, copy the data, then generate
1114                  * the completion for the read.
1115                  */
1116                 if (wqe->wr.opcode == IB_WR_RDMA_READ &&
1117                     opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
1118                     diff == 0) {
1119                         ret = 1;
1120                         goto bail;
1121                 }
1122                 /*
1123                  * If this request is a RDMA read or atomic, and the ACK is
1124                  * for a later operation, this ACK NAKs the RDMA read or
1125                  * atomic.  In other words, only a RDMA_READ_LAST or ONLY
1126                  * can ACK a RDMA read and likewise for atomic ops.  Note
1127                  * that the NAK case can only happen if relaxed ordering is
1128                  * used and requests are sent after an RDMA read or atomic
1129                  * is sent but before the response is received.
1130                  */
1131                 if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
1132                      (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
1133                     ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1134                       wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
1135                      (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
1136                         /* Retry this request. */
1137                         if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
1138                                 qp->r_flags |= RVT_R_RDMAR_SEQ;
1139                                 qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1140                                 if (list_empty(&qp->rspwait)) {
1141                                         qp->r_flags |= RVT_R_RSP_SEND;
1142                                         rvt_get_qp(qp);
1143                                         list_add_tail(&qp->rspwait,
1144                                                       &rcd->qp_wait_list);
1145                                 }
1146                         }
1147                         /*
1148                          * No need to process the ACK/NAK since we are
1149                          * restarting an earlier request.
1150                          */
1151                         goto bail;
1152                 }
1153                 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1154                     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
1155                         u64 *vaddr = wqe->sg_list[0].vaddr;
1156                         *vaddr = val;
1157                 }
1158                 if (qp->s_num_rd_atomic &&
1159                     (wqe->wr.opcode == IB_WR_RDMA_READ ||
1160                      wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1161                      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
1162                         qp->s_num_rd_atomic--;
1163                         /* Restart sending task if fence is complete */
1164                         if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
1165                             !qp->s_num_rd_atomic) {
1166                                 qp->s_flags &= ~(RVT_S_WAIT_FENCE |
1167                                                  RVT_S_WAIT_ACK);
1168                                 qib_schedule_send(qp);
1169                         } else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
1170                                 qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
1171                                                  RVT_S_WAIT_ACK);
1172                                 qib_schedule_send(qp);
1173                         }
1174                 }
1175                 wqe = do_rc_completion(qp, wqe, ibp);
1176                 if (qp->s_acked == qp->s_tail)
1177                         break;
1178         }
1179
1180         switch (aeth >> 29) {
1181         case 0:         /* ACK */
1182                 this_cpu_inc(*ibp->rvp.rc_acks);
1183                 if (qp->s_acked != qp->s_tail) {
1184                         /*
1185                          * We are expecting more ACKs so
1186                          * reset the retransmit timer.
1187                          */
1188                         start_timer(qp);
1189                         /*
1190                          * We can stop resending the earlier packets and
1191                          * continue with the next packet the receiver wants.
1192                          */
1193                         if (qib_cmp24(qp->s_psn, psn) <= 0)
1194                                 reset_psn(qp, psn + 1);
1195                 } else if (qib_cmp24(qp->s_psn, psn) <= 0) {
1196                         qp->s_state = OP(SEND_LAST);
1197                         qp->s_psn = psn + 1;
1198                 }
1199                 if (qp->s_flags & RVT_S_WAIT_ACK) {
1200                         qp->s_flags &= ~RVT_S_WAIT_ACK;
1201                         qib_schedule_send(qp);
1202                 }
1203                 qib_get_credit(qp, aeth);
1204                 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1205                 qp->s_retry = qp->s_retry_cnt;
1206                 update_last_psn(qp, psn);
1207                 ret = 1;
1208                 goto bail;
1209
1210         case 1:         /* RNR NAK */
1211                 ibp->rvp.n_rnr_naks++;
1212                 if (qp->s_acked == qp->s_tail)
1213                         goto bail;
1214                 if (qp->s_flags & RVT_S_WAIT_RNR)
1215                         goto bail;
1216                 if (qp->s_rnr_retry == 0) {
1217                         status = IB_WC_RNR_RETRY_EXC_ERR;
1218                         goto class_b;
1219                 }
1220                 if (qp->s_rnr_retry_cnt < 7)
1221                         qp->s_rnr_retry--;
1222
1223                 /* The last valid PSN is the previous PSN. */
1224                 update_last_psn(qp, psn - 1);
1225
1226                 ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
1227
1228                 reset_psn(qp, psn);
1229
1230                 qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
1231                 qp->s_flags |= RVT_S_WAIT_RNR;
1232                 qp->s_timer.function = qib_rc_rnr_retry;
1233                 qp->s_timer.expires = jiffies + usecs_to_jiffies(
1234                         ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
1235                                            QIB_AETH_CREDIT_MASK]);
1236                 add_timer(&qp->s_timer);
1237                 goto bail;
1238
1239         case 3:         /* NAK */
1240                 if (qp->s_acked == qp->s_tail)
1241                         goto bail;
1242                 /* The last valid PSN is the previous PSN. */
1243                 update_last_psn(qp, psn - 1);
1244                 switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
1245                         QIB_AETH_CREDIT_MASK) {
1246                 case 0: /* PSN sequence error */
1247                         ibp->rvp.n_seq_naks++;
1248                         /*
1249                          * Back up to the responder's expected PSN.
1250                          * Note that we might get a NAK in the middle of an
1251                          * RDMA READ response which terminates the RDMA
1252                          * READ.
1253                          */
1254                         qib_restart_rc(qp, psn, 0);
1255                         qib_schedule_send(qp);
1256                         break;
1257
1258                 case 1: /* Invalid Request */
1259                         status = IB_WC_REM_INV_REQ_ERR;
1260                         ibp->rvp.n_other_naks++;
1261                         goto class_b;
1262
1263                 case 2: /* Remote Access Error */
1264                         status = IB_WC_REM_ACCESS_ERR;
1265                         ibp->rvp.n_other_naks++;
1266                         goto class_b;
1267
1268                 case 3: /* Remote Operation Error */
1269                         status = IB_WC_REM_OP_ERR;
1270                         ibp->rvp.n_other_naks++;
1271 class_b:
1272                         if (qp->s_last == qp->s_acked) {
1273                                 qib_send_complete(qp, wqe, status);
1274                                 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1275                         }
1276                         break;
1277
1278                 default:
1279                         /* Ignore other reserved NAK error codes */
1280                         goto reserved;
1281                 }
1282                 qp->s_retry = qp->s_retry_cnt;
1283                 qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1284                 goto bail;
1285
1286         default:                /* 2: reserved */
1287 reserved:
1288                 /* Ignore reserved NAK codes. */
1289                 goto bail;
1290         }
1291
1292 bail:
1293         return ret;
1294 }
1295
1296 /*
1297  * We have seen an out of sequence RDMA read middle or last packet.
1298  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
1299  */
1300 static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
1301                          struct qib_ctxtdata *rcd)
1302 {
1303         struct rvt_swqe *wqe;
1304
1305         /* Remove QP from retry timer */
1306         if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1307                 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1308                 del_timer(&qp->s_timer);
1309         }
1310
1311         wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1312
1313         while (qib_cmp24(psn, wqe->lpsn) > 0) {
1314                 if (wqe->wr.opcode == IB_WR_RDMA_READ ||
1315                     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1316                     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
1317                         break;
1318                 wqe = do_rc_completion(qp, wqe, ibp);
1319         }
1320
1321         ibp->rvp.n_rdma_seq++;
1322         qp->r_flags |= RVT_R_RDMAR_SEQ;
1323         qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1324         if (list_empty(&qp->rspwait)) {
1325                 qp->r_flags |= RVT_R_RSP_SEND;
1326                 rvt_get_qp(qp);
1327                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1328         }
1329 }
1330
1331 /**
1332  * qib_rc_rcv_resp - process an incoming RC response packet
1333  * @ibp: the port this packet came in on
1334  * @ohdr: the other headers for this packet
1335  * @data: the packet data
1336  * @tlen: the packet length
1337  * @qp: the QP for this packet
1338  * @opcode: the opcode for this packet
1339  * @psn: the packet sequence number for this packet
1340  * @hdrsize: the header length
1341  * @pmtu: the path MTU
1342  *
1343  * This is called from qib_rc_rcv() to process an incoming RC response
1344  * packet for the given QP.
1345  * Called at interrupt level.
1346  */
1347 static void qib_rc_rcv_resp(struct qib_ibport *ibp,
1348                             struct ib_other_headers *ohdr,
1349                             void *data, u32 tlen,
1350                             struct rvt_qp *qp,
1351                             u32 opcode,
1352                             u32 psn, u32 hdrsize, u32 pmtu,
1353                             struct qib_ctxtdata *rcd)
1354 {
1355         struct rvt_swqe *wqe;
1356         struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1357         enum ib_wc_status status;
1358         unsigned long flags;
1359         int diff;
1360         u32 pad;
1361         u32 aeth;
1362         u64 val;
1363
1364         if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) {
1365                 /*
1366                  * If ACK'd PSN on SDMA busy list try to make progress to
1367                  * reclaim SDMA credits.
1368                  */
1369                 if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) &&
1370                     (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) {
1371
1372                         /*
1373                          * If send tasklet not running attempt to progress
1374                          * SDMA queue.
1375                          */
1376                         if (!(qp->s_flags & RVT_S_BUSY)) {
1377                                 /* Acquire SDMA Lock */
1378                                 spin_lock_irqsave(&ppd->sdma_lock, flags);
1379                                 /* Invoke sdma make progress */
1380                                 qib_sdma_make_progress(ppd);
1381                                 /* Release SDMA Lock */
1382                                 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1383                         }
1384                 }
1385         }
1386
1387         spin_lock_irqsave(&qp->s_lock, flags);
1388         if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
1389                 goto ack_done;
1390
1391         /* Ignore invalid responses. */
1392         smp_read_barrier_depends(); /* see post_one_send */
1393         if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
1394                 goto ack_done;
1395
1396         /* Ignore duplicate responses. */
1397         diff = qib_cmp24(psn, qp->s_last_psn);
1398         if (unlikely(diff <= 0)) {
1399                 /* Update credits for "ghost" ACKs */
1400                 if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1401                         aeth = be32_to_cpu(ohdr->u.aeth);
1402                         if ((aeth >> 29) == 0)
1403                                 qib_get_credit(qp, aeth);
1404                 }
1405                 goto ack_done;
1406         }
1407
1408         /*
1409          * Skip everything other than the PSN we expect, if we are waiting
1410          * for a reply to a restarted RDMA read or atomic op.
1411          */
1412         if (qp->r_flags & RVT_R_RDMAR_SEQ) {
1413                 if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
1414                         goto ack_done;
1415                 qp->r_flags &= ~RVT_R_RDMAR_SEQ;
1416         }
1417
1418         if (unlikely(qp->s_acked == qp->s_tail))
1419                 goto ack_done;
1420         wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1421         status = IB_WC_SUCCESS;
1422
1423         switch (opcode) {
1424         case OP(ACKNOWLEDGE):
1425         case OP(ATOMIC_ACKNOWLEDGE):
1426         case OP(RDMA_READ_RESPONSE_FIRST):
1427                 aeth = be32_to_cpu(ohdr->u.aeth);
1428                 if (opcode == OP(ATOMIC_ACKNOWLEDGE))
1429                         val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
1430                 else
1431                         val = 0;
1432                 if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
1433                     opcode != OP(RDMA_READ_RESPONSE_FIRST))
1434                         goto ack_done;
1435                 hdrsize += 4;
1436                 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1437                 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1438                         goto ack_op_err;
1439                 /*
1440                  * If this is a response to a resent RDMA read, we
1441                  * have to be careful to copy the data to the right
1442                  * location.
1443                  */
1444                 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1445                                                   wqe, psn, pmtu);
1446                 goto read_middle;
1447
1448         case OP(RDMA_READ_RESPONSE_MIDDLE):
1449                 /* no AETH, no ACK */
1450                 if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1451                         goto ack_seq_err;
1452                 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1453                         goto ack_op_err;
1454 read_middle:
1455                 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1456                         goto ack_len_err;
1457                 if (unlikely(pmtu >= qp->s_rdma_read_len))
1458                         goto ack_len_err;
1459
1460                 /*
1461                  * We got a response so update the timeout.
1462                  * 4.096 usec. * (1 << qp->timeout)
1463                  */
1464                 qp->s_flags |= RVT_S_TIMER;
1465                 mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
1466                 if (qp->s_flags & RVT_S_WAIT_ACK) {
1467                         qp->s_flags &= ~RVT_S_WAIT_ACK;
1468                         qib_schedule_send(qp);
1469                 }
1470
1471                 if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
1472                         qp->s_retry = qp->s_retry_cnt;
1473
1474                 /*
1475                  * Update the RDMA receive state but do the copy w/o
1476                  * holding the locks and blocking interrupts.
1477                  */
1478                 qp->s_rdma_read_len -= pmtu;
1479                 update_last_psn(qp, psn);
1480                 spin_unlock_irqrestore(&qp->s_lock, flags);
1481                 qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
1482                 goto bail;
1483
1484         case OP(RDMA_READ_RESPONSE_ONLY):
1485                 aeth = be32_to_cpu(ohdr->u.aeth);
1486                 if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
1487                         goto ack_done;
1488                 /* Get the number of bytes the message was padded by. */
1489                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1490                 /*
1491                  * Check that the data size is >= 0 && <= pmtu.
1492                  * Remember to account for the AETH header (4) and
1493                  * ICRC (4).
1494                  */
1495                 if (unlikely(tlen < (hdrsize + pad + 8)))
1496                         goto ack_len_err;
1497                 /*
1498                  * If this is a response to a resent RDMA read, we
1499                  * have to be careful to copy the data to the right
1500                  * location.
1501                  */
1502                 wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1503                 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1504                                                   wqe, psn, pmtu);
1505                 goto read_last;
1506
1507         case OP(RDMA_READ_RESPONSE_LAST):
1508                 /* ACKs READ req. */
1509                 if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1510                         goto ack_seq_err;
1511                 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1512                         goto ack_op_err;
1513                 /* Get the number of bytes the message was padded by. */
1514                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1515                 /*
1516                  * Check that the data size is >= 1 && <= pmtu.
1517                  * Remember to account for the AETH header (4) and
1518                  * ICRC (4).
1519                  */
1520                 if (unlikely(tlen <= (hdrsize + pad + 8)))
1521                         goto ack_len_err;
1522 read_last:
1523                 tlen -= hdrsize + pad + 8;
1524                 if (unlikely(tlen != qp->s_rdma_read_len))
1525                         goto ack_len_err;
1526                 aeth = be32_to_cpu(ohdr->u.aeth);
1527                 qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
1528                 WARN_ON(qp->s_rdma_read_sge.num_sge);
1529                 (void) do_rc_ack(qp, aeth, psn,
1530                                  OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
1531                 goto ack_done;
1532         }
1533
1534 ack_op_err:
1535         status = IB_WC_LOC_QP_OP_ERR;
1536         goto ack_err;
1537
1538 ack_seq_err:
1539         rdma_seq_err(qp, ibp, psn, rcd);
1540         goto ack_done;
1541
1542 ack_len_err:
1543         status = IB_WC_LOC_LEN_ERR;
1544 ack_err:
1545         if (qp->s_last == qp->s_acked) {
1546                 qib_send_complete(qp, wqe, status);
1547                 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1548         }
1549 ack_done:
1550         spin_unlock_irqrestore(&qp->s_lock, flags);
1551 bail:
1552         return;
1553 }
1554
1555 /**
1556  * qib_rc_rcv_error - process an incoming duplicate or error RC packet
1557  * @ohdr: the other headers for this packet
1558  * @data: the packet data
1559  * @qp: the QP for this packet
1560  * @opcode: the opcode for this packet
1561  * @psn: the packet sequence number for this packet
1562  * @diff: the difference between the PSN and the expected PSN
1563  *
1564  * This is called from qib_rc_rcv() to process an unexpected
1565  * incoming RC packet for the given QP.
1566  * Called at interrupt level.
1567  * Return 1 if no more processing is needed; otherwise return 0 to
1568  * schedule a response to be sent.
1569  */
1570 static int qib_rc_rcv_error(struct ib_other_headers *ohdr,
1571                             void *data,
1572                             struct rvt_qp *qp,
1573                             u32 opcode,
1574                             u32 psn,
1575                             int diff,
1576                             struct qib_ctxtdata *rcd)
1577 {
1578         struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1579         struct rvt_ack_entry *e;
1580         unsigned long flags;
1581         u8 i, prev;
1582         int old_req;
1583
1584         if (diff > 0) {
1585                 /*
1586                  * Packet sequence error.
1587                  * A NAK will ACK earlier sends and RDMA writes.
1588                  * Don't queue the NAK if we already sent one.
1589                  */
1590                 if (!qp->r_nak_state) {
1591                         ibp->rvp.n_rc_seqnak++;
1592                         qp->r_nak_state = IB_NAK_PSN_ERROR;
1593                         /* Use the expected PSN. */
1594                         qp->r_ack_psn = qp->r_psn;
1595                         /*
1596                          * Wait to send the sequence NAK until all packets
1597                          * in the receive queue have been processed.
1598                          * Otherwise, we end up propagating congestion.
1599                          */
1600                         if (list_empty(&qp->rspwait)) {
1601                                 qp->r_flags |= RVT_R_RSP_NAK;
1602                                 rvt_get_qp(qp);
1603                                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1604                         }
1605                 }
1606                 goto done;
1607         }
1608
1609         /*
1610          * Handle a duplicate request.  Don't re-execute SEND, RDMA
1611          * write or atomic op.  Don't NAK errors, just silently drop
1612          * the duplicate request.  Note that r_sge, r_len, and
1613          * r_rcv_len may be in use so don't modify them.
1614          *
1615          * We are supposed to ACK the earliest duplicate PSN but we
1616          * can coalesce an outstanding duplicate ACK.  We have to
1617          * send the earliest so that RDMA reads can be restarted at
1618          * the requester's expected PSN.
1619          *
1620          * First, find where this duplicate PSN falls within the
1621          * ACKs previously sent.
1622          * old_req is true if there is an older response that is scheduled
1623          * to be sent before sending this one.
1624          */
1625         e = NULL;
1626         old_req = 1;
1627         ibp->rvp.n_rc_dupreq++;
1628
1629         spin_lock_irqsave(&qp->s_lock, flags);
1630
1631         for (i = qp->r_head_ack_queue; ; i = prev) {
1632                 if (i == qp->s_tail_ack_queue)
1633                         old_req = 0;
1634                 if (i)
1635                         prev = i - 1;
1636                 else
1637                         prev = QIB_MAX_RDMA_ATOMIC;
1638                 if (prev == qp->r_head_ack_queue) {
1639                         e = NULL;
1640                         break;
1641                 }
1642                 e = &qp->s_ack_queue[prev];
1643                 if (!e->opcode) {
1644                         e = NULL;
1645                         break;
1646                 }
1647                 if (qib_cmp24(psn, e->psn) >= 0) {
1648                         if (prev == qp->s_tail_ack_queue &&
1649                             qib_cmp24(psn, e->lpsn) <= 0)
1650                                 old_req = 0;
1651                         break;
1652                 }
1653         }
1654         switch (opcode) {
1655         case OP(RDMA_READ_REQUEST): {
1656                 struct ib_reth *reth;
1657                 u32 offset;
1658                 u32 len;
1659
1660                 /*
1661                  * If we didn't find the RDMA read request in the ack queue,
1662                  * we can ignore this request.
1663                  */
1664                 if (!e || e->opcode != OP(RDMA_READ_REQUEST))
1665                         goto unlock_done;
1666                 /* RETH comes after BTH */
1667                 reth = &ohdr->u.rc.reth;
1668                 /*
1669                  * Address range must be a subset of the original
1670                  * request and start on pmtu boundaries.
1671                  * We reuse the old ack_queue slot since the requester
1672                  * should not back up and request an earlier PSN for the
1673                  * same request.
1674                  */
1675                 offset = ((psn - e->psn) & QIB_PSN_MASK) *
1676                         qp->pmtu;
1677                 len = be32_to_cpu(reth->length);
1678                 if (unlikely(offset + len != e->rdma_sge.sge_length))
1679                         goto unlock_done;
1680                 if (e->rdma_sge.mr) {
1681                         rvt_put_mr(e->rdma_sge.mr);
1682                         e->rdma_sge.mr = NULL;
1683                 }
1684                 if (len != 0) {
1685                         u32 rkey = be32_to_cpu(reth->rkey);
1686                         u64 vaddr = be64_to_cpu(reth->vaddr);
1687                         int ok;
1688
1689                         ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
1690                                          IB_ACCESS_REMOTE_READ);
1691                         if (unlikely(!ok))
1692                                 goto unlock_done;
1693                 } else {
1694                         e->rdma_sge.vaddr = NULL;
1695                         e->rdma_sge.length = 0;
1696                         e->rdma_sge.sge_length = 0;
1697                 }
1698                 e->psn = psn;
1699                 if (old_req)
1700                         goto unlock_done;
1701                 qp->s_tail_ack_queue = prev;
1702                 break;
1703         }
1704
1705         case OP(COMPARE_SWAP):
1706         case OP(FETCH_ADD): {
1707                 /*
1708                  * If we didn't find the atomic request in the ack queue
1709                  * or the send tasklet is already backed up to send an
1710                  * earlier entry, we can ignore this request.
1711                  */
1712                 if (!e || e->opcode != (u8) opcode || old_req)
1713                         goto unlock_done;
1714                 qp->s_tail_ack_queue = prev;
1715                 break;
1716         }
1717
1718         default:
1719                 /*
1720                  * Ignore this operation if it doesn't request an ACK
1721                  * or an earlier RDMA read or atomic is going to be resent.
1722                  */
1723                 if (!(psn & IB_BTH_REQ_ACK) || old_req)
1724                         goto unlock_done;
1725                 /*
1726                  * Resend the most recent ACK if this request is
1727                  * after all the previous RDMA reads and atomics.
1728                  */
1729                 if (i == qp->r_head_ack_queue) {
1730                         spin_unlock_irqrestore(&qp->s_lock, flags);
1731                         qp->r_nak_state = 0;
1732                         qp->r_ack_psn = qp->r_psn - 1;
1733                         goto send_ack;
1734                 }
1735                 /*
1736                  * Try to send a simple ACK to work around a Mellanox bug
1737                  * which doesn't accept a RDMA read response or atomic
1738                  * response as an ACK for earlier SENDs or RDMA writes.
1739                  */
1740                 if (!(qp->s_flags & RVT_S_RESP_PENDING)) {
1741                         spin_unlock_irqrestore(&qp->s_lock, flags);
1742                         qp->r_nak_state = 0;
1743                         qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
1744                         goto send_ack;
1745                 }
1746                 /*
1747                  * Resend the RDMA read or atomic op which
1748                  * ACKs this duplicate request.
1749                  */
1750                 qp->s_tail_ack_queue = i;
1751                 break;
1752         }
1753         qp->s_ack_state = OP(ACKNOWLEDGE);
1754         qp->s_flags |= RVT_S_RESP_PENDING;
1755         qp->r_nak_state = 0;
1756         qib_schedule_send(qp);
1757
1758 unlock_done:
1759         spin_unlock_irqrestore(&qp->s_lock, flags);
1760 done:
1761         return 1;
1762
1763 send_ack:
1764         return 0;
1765 }
1766
1767 void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
1768 {
1769         unsigned long flags;
1770         int lastwqe;
1771
1772         spin_lock_irqsave(&qp->s_lock, flags);
1773         lastwqe = rvt_error_qp(qp, err);
1774         spin_unlock_irqrestore(&qp->s_lock, flags);
1775
1776         if (lastwqe) {
1777                 struct ib_event ev;
1778
1779                 ev.device = qp->ibqp.device;
1780                 ev.element.qp = &qp->ibqp;
1781                 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1782                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1783         }
1784 }
1785
1786 static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
1787 {
1788         unsigned next;
1789
1790         next = n + 1;
1791         if (next > QIB_MAX_RDMA_ATOMIC)
1792                 next = 0;
1793         qp->s_tail_ack_queue = next;
1794         qp->s_ack_state = OP(ACKNOWLEDGE);
1795 }
1796
1797 /**
1798  * qib_rc_rcv - process an incoming RC packet
1799  * @rcd: the context pointer
1800  * @hdr: the header of this packet
1801  * @has_grh: true if the header has a GRH
1802  * @data: the packet data
1803  * @tlen: the packet length
1804  * @qp: the QP for this packet
1805  *
1806  * This is called from qib_qp_rcv() to process an incoming RC packet
1807  * for the given QP.
1808  * Called at interrupt level.
1809  */
1810 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
1811                 int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
1812 {
1813         struct qib_ibport *ibp = &rcd->ppd->ibport_data;
1814         struct ib_other_headers *ohdr;
1815         u32 opcode;
1816         u32 hdrsize;
1817         u32 psn;
1818         u32 pad;
1819         struct ib_wc wc;
1820         u32 pmtu = qp->pmtu;
1821         int diff;
1822         struct ib_reth *reth;
1823         unsigned long flags;
1824         int ret;
1825
1826         /* Check for GRH */
1827         if (!has_grh) {
1828                 ohdr = &hdr->u.oth;
1829                 hdrsize = 8 + 12;       /* LRH + BTH */
1830         } else {
1831                 ohdr = &hdr->u.l.oth;
1832                 hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
1833         }
1834
1835         opcode = be32_to_cpu(ohdr->bth[0]);
1836         if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
1837                 return;
1838
1839         psn = be32_to_cpu(ohdr->bth[2]);
1840         opcode >>= 24;
1841
1842         /*
1843          * Process responses (ACKs) before anything else.  Note that the
1844          * packet sequence number will be for something in the send work
1845          * queue rather than the expected receive packet sequence number.
1846          * In other words, this QP is the requester.
1847          */
1848         if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1849             opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1850                 qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
1851                                 hdrsize, pmtu, rcd);
1852                 return;
1853         }
1854
1855         /* Compute 24 bits worth of difference. */
1856         diff = qib_cmp24(psn, qp->r_psn);
1857         if (unlikely(diff)) {
1858                 if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
1859                         return;
1860                 goto send_ack;
1861         }
1862
1863         /* Check for opcode sequence errors. */
1864         switch (qp->r_state) {
1865         case OP(SEND_FIRST):
1866         case OP(SEND_MIDDLE):
1867                 if (opcode == OP(SEND_MIDDLE) ||
1868                     opcode == OP(SEND_LAST) ||
1869                     opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1870                         break;
1871                 goto nack_inv;
1872
1873         case OP(RDMA_WRITE_FIRST):
1874         case OP(RDMA_WRITE_MIDDLE):
1875                 if (opcode == OP(RDMA_WRITE_MIDDLE) ||
1876                     opcode == OP(RDMA_WRITE_LAST) ||
1877                     opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1878                         break;
1879                 goto nack_inv;
1880
1881         default:
1882                 if (opcode == OP(SEND_MIDDLE) ||
1883                     opcode == OP(SEND_LAST) ||
1884                     opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
1885                     opcode == OP(RDMA_WRITE_MIDDLE) ||
1886                     opcode == OP(RDMA_WRITE_LAST) ||
1887                     opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1888                         goto nack_inv;
1889                 /*
1890                  * Note that it is up to the requester to not send a new
1891                  * RDMA read or atomic operation before receiving an ACK
1892                  * for the previous operation.
1893                  */
1894                 break;
1895         }
1896
1897         if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
1898                 qp->r_flags |= RVT_R_COMM_EST;
1899                 if (qp->ibqp.event_handler) {
1900                         struct ib_event ev;
1901
1902                         ev.device = qp->ibqp.device;
1903                         ev.element.qp = &qp->ibqp;
1904                         ev.event = IB_EVENT_COMM_EST;
1905                         qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1906                 }
1907         }
1908
1909         /* OK, process the packet. */
1910         switch (opcode) {
1911         case OP(SEND_FIRST):
1912                 ret = qib_get_rwqe(qp, 0);
1913                 if (ret < 0)
1914                         goto nack_op_err;
1915                 if (!ret)
1916                         goto rnr_nak;
1917                 qp->r_rcv_len = 0;
1918                 /* FALLTHROUGH */
1919         case OP(SEND_MIDDLE):
1920         case OP(RDMA_WRITE_MIDDLE):
1921 send_middle:
1922                 /* Check for invalid length PMTU or posted rwqe len. */
1923                 if (unlikely(tlen != (hdrsize + pmtu + 4)))
1924                         goto nack_inv;
1925                 qp->r_rcv_len += pmtu;
1926                 if (unlikely(qp->r_rcv_len > qp->r_len))
1927                         goto nack_inv;
1928                 qib_copy_sge(&qp->r_sge, data, pmtu, 1);
1929                 break;
1930
1931         case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
1932                 /* consume RWQE */
1933                 ret = qib_get_rwqe(qp, 1);
1934                 if (ret < 0)
1935                         goto nack_op_err;
1936                 if (!ret)
1937                         goto rnr_nak;
1938                 goto send_last_imm;
1939
1940         case OP(SEND_ONLY):
1941         case OP(SEND_ONLY_WITH_IMMEDIATE):
1942                 ret = qib_get_rwqe(qp, 0);
1943                 if (ret < 0)
1944                         goto nack_op_err;
1945                 if (!ret)
1946                         goto rnr_nak;
1947                 qp->r_rcv_len = 0;
1948                 if (opcode == OP(SEND_ONLY))
1949                         goto no_immediate_data;
1950                 /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
1951         case OP(SEND_LAST_WITH_IMMEDIATE):
1952 send_last_imm:
1953                 wc.ex.imm_data = ohdr->u.imm_data;
1954                 hdrsize += 4;
1955                 wc.wc_flags = IB_WC_WITH_IMM;
1956                 goto send_last;
1957         case OP(SEND_LAST):
1958         case OP(RDMA_WRITE_LAST):
1959 no_immediate_data:
1960                 wc.wc_flags = 0;
1961                 wc.ex.imm_data = 0;
1962 send_last:
1963                 /* Get the number of bytes the message was padded by. */
1964                 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1965                 /* Check for invalid length. */
1966                 /* XXX LAST len should be >= 1 */
1967                 if (unlikely(tlen < (hdrsize + pad + 4)))
1968                         goto nack_inv;
1969                 /* Don't count the CRC. */
1970                 tlen -= (hdrsize + pad + 4);
1971                 wc.byte_len = tlen + qp->r_rcv_len;
1972                 if (unlikely(wc.byte_len > qp->r_len))
1973                         goto nack_inv;
1974                 qib_copy_sge(&qp->r_sge, data, tlen, 1);
1975                 rvt_put_ss(&qp->r_sge);
1976                 qp->r_msn++;
1977                 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
1978                         break;
1979                 wc.wr_id = qp->r_wr_id;
1980                 wc.status = IB_WC_SUCCESS;
1981                 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
1982                     opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
1983                         wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
1984                 else
1985                         wc.opcode = IB_WC_RECV;
1986                 wc.qp = &qp->ibqp;
1987                 wc.src_qp = qp->remote_qpn;
1988                 wc.slid = qp->remote_ah_attr.dlid;
1989                 wc.sl = qp->remote_ah_attr.sl;
1990                 /* zero fields that are N/A */
1991                 wc.vendor_err = 0;
1992                 wc.pkey_index = 0;
1993                 wc.dlid_path_bits = 0;
1994                 wc.port_num = 0;
1995                 /* Signal completion event if the solicited bit is set. */
1996                 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
1997                              (ohdr->bth[0] &
1998                               cpu_to_be32(IB_BTH_SOLICITED)) != 0);
1999                 break;
2000
2001         case OP(RDMA_WRITE_FIRST):
2002         case OP(RDMA_WRITE_ONLY):
2003         case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
2004                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
2005                         goto nack_inv;
2006                 /* consume RWQE */
2007                 reth = &ohdr->u.rc.reth;
2008                 hdrsize += sizeof(*reth);
2009                 qp->r_len = be32_to_cpu(reth->length);
2010                 qp->r_rcv_len = 0;
2011                 qp->r_sge.sg_list = NULL;
2012                 if (qp->r_len != 0) {
2013                         u32 rkey = be32_to_cpu(reth->rkey);
2014                         u64 vaddr = be64_to_cpu(reth->vaddr);
2015                         int ok;
2016
2017                         /* Check rkey & NAK */
2018                         ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
2019                                          rkey, IB_ACCESS_REMOTE_WRITE);
2020                         if (unlikely(!ok))
2021                                 goto nack_acc;
2022                         qp->r_sge.num_sge = 1;
2023                 } else {
2024                         qp->r_sge.num_sge = 0;
2025                         qp->r_sge.sge.mr = NULL;
2026                         qp->r_sge.sge.vaddr = NULL;
2027                         qp->r_sge.sge.length = 0;
2028                         qp->r_sge.sge.sge_length = 0;
2029                 }
2030                 if (opcode == OP(RDMA_WRITE_FIRST))
2031                         goto send_middle;
2032                 else if (opcode == OP(RDMA_WRITE_ONLY))
2033                         goto no_immediate_data;
2034                 ret = qib_get_rwqe(qp, 1);
2035                 if (ret < 0)
2036                         goto nack_op_err;
2037                 if (!ret)
2038                         goto rnr_nak;
2039                 wc.ex.imm_data = ohdr->u.rc.imm_data;
2040                 hdrsize += 4;
2041                 wc.wc_flags = IB_WC_WITH_IMM;
2042                 goto send_last;
2043
2044         case OP(RDMA_READ_REQUEST): {
2045                 struct rvt_ack_entry *e;
2046                 u32 len;
2047                 u8 next;
2048
2049                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2050                         goto nack_inv;
2051                 next = qp->r_head_ack_queue + 1;
2052                 /* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */
2053                 if (next > QIB_MAX_RDMA_ATOMIC)
2054                         next = 0;
2055                 spin_lock_irqsave(&qp->s_lock, flags);
2056                 if (unlikely(next == qp->s_tail_ack_queue)) {
2057                         if (!qp->s_ack_queue[next].sent)
2058                                 goto nack_inv_unlck;
2059                         qib_update_ack_queue(qp, next);
2060                 }
2061                 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2062                 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2063                         rvt_put_mr(e->rdma_sge.mr);
2064                         e->rdma_sge.mr = NULL;
2065                 }
2066                 reth = &ohdr->u.rc.reth;
2067                 len = be32_to_cpu(reth->length);
2068                 if (len) {
2069                         u32 rkey = be32_to_cpu(reth->rkey);
2070                         u64 vaddr = be64_to_cpu(reth->vaddr);
2071                         int ok;
2072
2073                         /* Check rkey & NAK */
2074                         ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
2075                                          rkey, IB_ACCESS_REMOTE_READ);
2076                         if (unlikely(!ok))
2077                                 goto nack_acc_unlck;
2078                         /*
2079                          * Update the next expected PSN.  We add 1 later
2080                          * below, so only add the remainder here.
2081                          */
2082                         qp->r_psn += rvt_div_mtu(qp, len - 1);
2083                 } else {
2084                         e->rdma_sge.mr = NULL;
2085                         e->rdma_sge.vaddr = NULL;
2086                         e->rdma_sge.length = 0;
2087                         e->rdma_sge.sge_length = 0;
2088                 }
2089                 e->opcode = opcode;
2090                 e->sent = 0;
2091                 e->psn = psn;
2092                 e->lpsn = qp->r_psn;
2093                 /*
2094                  * We need to increment the MSN here instead of when we
2095                  * finish sending the result since a duplicate request would
2096                  * increment it more than once.
2097                  */
2098                 qp->r_msn++;
2099                 qp->r_psn++;
2100                 qp->r_state = opcode;
2101                 qp->r_nak_state = 0;
2102                 qp->r_head_ack_queue = next;
2103
2104                 /* Schedule the send tasklet. */
2105                 qp->s_flags |= RVT_S_RESP_PENDING;
2106                 qib_schedule_send(qp);
2107
2108                 goto sunlock;
2109         }
2110
2111         case OP(COMPARE_SWAP):
2112         case OP(FETCH_ADD): {
2113                 struct ib_atomic_eth *ateth;
2114                 struct rvt_ack_entry *e;
2115                 u64 vaddr;
2116                 atomic64_t *maddr;
2117                 u64 sdata;
2118                 u32 rkey;
2119                 u8 next;
2120
2121                 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
2122                         goto nack_inv;
2123                 next = qp->r_head_ack_queue + 1;
2124                 if (next > QIB_MAX_RDMA_ATOMIC)
2125                         next = 0;
2126                 spin_lock_irqsave(&qp->s_lock, flags);
2127                 if (unlikely(next == qp->s_tail_ack_queue)) {
2128                         if (!qp->s_ack_queue[next].sent)
2129                                 goto nack_inv_unlck;
2130                         qib_update_ack_queue(qp, next);
2131                 }
2132                 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2133                 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2134                         rvt_put_mr(e->rdma_sge.mr);
2135                         e->rdma_sge.mr = NULL;
2136                 }
2137                 ateth = &ohdr->u.atomic_eth;
2138                 vaddr = get_ib_ateth_vaddr(ateth);
2139                 if (unlikely(vaddr & (sizeof(u64) - 1)))
2140                         goto nack_inv_unlck;
2141                 rkey = be32_to_cpu(ateth->rkey);
2142                 /* Check rkey & NAK */
2143                 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
2144                                           vaddr, rkey,
2145                                           IB_ACCESS_REMOTE_ATOMIC)))
2146                         goto nack_acc_unlck;
2147                 /* Perform atomic OP and save result. */
2148                 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
2149                 sdata = get_ib_ateth_swap(ateth);
2150                 e->atomic_data = (opcode == OP(FETCH_ADD)) ?
2151                         (u64) atomic64_add_return(sdata, maddr) - sdata :
2152                         (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
2153                                       get_ib_ateth_compare(ateth),
2154                                       sdata);
2155                 rvt_put_mr(qp->r_sge.sge.mr);
2156                 qp->r_sge.num_sge = 0;
2157                 e->opcode = opcode;
2158                 e->sent = 0;
2159                 e->psn = psn;
2160                 e->lpsn = psn;
2161                 qp->r_msn++;
2162                 qp->r_psn++;
2163                 qp->r_state = opcode;
2164                 qp->r_nak_state = 0;
2165                 qp->r_head_ack_queue = next;
2166
2167                 /* Schedule the send tasklet. */
2168                 qp->s_flags |= RVT_S_RESP_PENDING;
2169                 qib_schedule_send(qp);
2170
2171                 goto sunlock;
2172         }
2173
2174         default:
2175                 /* NAK unknown opcodes. */
2176                 goto nack_inv;
2177         }
2178         qp->r_psn++;
2179         qp->r_state = opcode;
2180         qp->r_ack_psn = psn;
2181         qp->r_nak_state = 0;
2182         /* Send an ACK if requested or required. */
2183         if (psn & (1 << 31))
2184                 goto send_ack;
2185         return;
2186
2187 rnr_nak:
2188         qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
2189         qp->r_ack_psn = qp->r_psn;
2190         /* Queue RNR NAK for later */
2191         if (list_empty(&qp->rspwait)) {
2192                 qp->r_flags |= RVT_R_RSP_NAK;
2193                 rvt_get_qp(qp);
2194                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2195         }
2196         return;
2197
2198 nack_op_err:
2199         qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2200         qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2201         qp->r_ack_psn = qp->r_psn;
2202         /* Queue NAK for later */
2203         if (list_empty(&qp->rspwait)) {
2204                 qp->r_flags |= RVT_R_RSP_NAK;
2205                 rvt_get_qp(qp);
2206                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2207         }
2208         return;
2209
2210 nack_inv_unlck:
2211         spin_unlock_irqrestore(&qp->s_lock, flags);
2212 nack_inv:
2213         qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2214         qp->r_nak_state = IB_NAK_INVALID_REQUEST;
2215         qp->r_ack_psn = qp->r_psn;
2216         /* Queue NAK for later */
2217         if (list_empty(&qp->rspwait)) {
2218                 qp->r_flags |= RVT_R_RSP_NAK;
2219                 rvt_get_qp(qp);
2220                 list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2221         }
2222         return;
2223
2224 nack_acc_unlck:
2225         spin_unlock_irqrestore(&qp->s_lock, flags);
2226 nack_acc:
2227         qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
2228         qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2229         qp->r_ack_psn = qp->r_psn;
2230 send_ack:
2231         qib_send_rc_ack(qp);
2232         return;
2233
2234 sunlock:
2235         spin_unlock_irqrestore(&qp->s_lock, flags);
2236 }
This page took 0.168921 seconds and 4 git commands to generate.