]> Git Repo - qemu.git/blob - slirp/tcp_input.c
slirp: Reindent after refactoring
[qemu.git] / slirp / tcp_input.c
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
30  * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp
31  */
32
33 /*
34  * Changes and additions relating to SLiRP
35  * Copyright (c) 1995 Danny Gasparovski.
36  *
37  * Please read the file COPYRIGHT for the
38  * terms and conditions of the copyright.
39  */
40
41 #include "qemu/osdep.h"
42 #include <slirp.h>
43 #include "ip_icmp.h"
44
45 #define TCPREXMTTHRESH 3
46
47 #define TCP_PAWS_IDLE   (24 * 24 * 60 * 60 * PR_SLOWHZ)
48
49 /* for modulo comparisons of timestamps */
50 #define TSTMP_LT(a,b)   ((int)((a)-(b)) < 0)
51 #define TSTMP_GEQ(a,b)  ((int)((a)-(b)) >= 0)
52
53 /*
54  * Insert segment ti into reassembly queue of tcp with
55  * control block tp.  Return TH_FIN if reassembly now includes
56  * a segment with FIN.  The macro form does the common case inline
57  * (segment is the next to be received on an established connection,
58  * and the queue is empty), avoiding linkage into and removal
59  * from the queue and repetition of various conversions.
60  * Set DELACK for segments received in order, but ack immediately
61  * when segments are out of order (so fast retransmit can work).
62  */
63 #ifdef TCP_ACK_HACK
64 #define TCP_REASS(tp, ti, m, so, flags) {\
65        if ((ti)->ti_seq == (tp)->rcv_nxt && \
66            tcpfrag_list_empty(tp) && \
67            (tp)->t_state == TCPS_ESTABLISHED) {\
68                if (ti->ti_flags & TH_PUSH) \
69                        tp->t_flags |= TF_ACKNOW; \
70                else \
71                        tp->t_flags |= TF_DELACK; \
72                (tp)->rcv_nxt += (ti)->ti_len; \
73                flags = (ti)->ti_flags & TH_FIN; \
74                if (so->so_emu) { \
75                        if (tcp_emu((so),(m))) sbappend((so), (m)); \
76                } else \
77                        sbappend((so), (m)); \
78         } else {\
79                (flags) = tcp_reass((tp), (ti), (m)); \
80                tp->t_flags |= TF_ACKNOW; \
81        } \
82 }
83 #else
84 #define TCP_REASS(tp, ti, m, so, flags) { \
85         if ((ti)->ti_seq == (tp)->rcv_nxt && \
86         tcpfrag_list_empty(tp) && \
87             (tp)->t_state == TCPS_ESTABLISHED) { \
88                 tp->t_flags |= TF_DELACK; \
89                 (tp)->rcv_nxt += (ti)->ti_len; \
90                 flags = (ti)->ti_flags & TH_FIN; \
91                 if (so->so_emu) { \
92                         if (tcp_emu((so),(m))) sbappend(so, (m)); \
93                 } else \
94                         sbappend((so), (m)); \
95         } else { \
96                 (flags) = tcp_reass((tp), (ti), (m)); \
97                 tp->t_flags |= TF_ACKNOW; \
98         } \
99 }
100 #endif
101 static void tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt,
102                           struct tcpiphdr *ti);
103 static void tcp_xmit_timer(register struct tcpcb *tp, int rtt);
104
105 static int
106 tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti,
107           struct mbuf *m)
108 {
109         register struct tcpiphdr *q;
110         struct socket *so = tp->t_socket;
111         int flags;
112
113         /*
114          * Call with ti==NULL after become established to
115          * force pre-ESTABLISHED data up to user socket.
116          */
117         if (ti == NULL)
118                 goto present;
119
120         /*
121          * Find a segment which begins after this one does.
122          */
123         for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp);
124             q = tcpiphdr_next(q))
125                 if (SEQ_GT(q->ti_seq, ti->ti_seq))
126                         break;
127
128         /*
129          * If there is a preceding segment, it may provide some of
130          * our data already.  If so, drop the data from the incoming
131          * segment.  If it provides all of our data, drop us.
132          */
133         if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) {
134                 register int i;
135                 q = tcpiphdr_prev(q);
136                 /* conversion to int (in i) handles seq wraparound */
137                 i = q->ti_seq + q->ti_len - ti->ti_seq;
138                 if (i > 0) {
139                         if (i >= ti->ti_len) {
140                                 m_free(m);
141                                 /*
142                                  * Try to present any queued data
143                                  * at the left window edge to the user.
144                                  * This is needed after the 3-WHS
145                                  * completes.
146                                  */
147                                 goto present;   /* ??? */
148                         }
149                         m_adj(m, i);
150                         ti->ti_len -= i;
151                         ti->ti_seq += i;
152                 }
153                 q = tcpiphdr_next(q);
154         }
155         ti->ti_mbuf = m;
156
157         /*
158          * While we overlap succeeding segments trim them or,
159          * if they are completely covered, dequeue them.
160          */
161         while (!tcpfrag_list_end(q, tp)) {
162                 register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
163                 if (i <= 0)
164                         break;
165                 if (i < q->ti_len) {
166                         q->ti_seq += i;
167                         q->ti_len -= i;
168                         m_adj(q->ti_mbuf, i);
169                         break;
170                 }
171                 q = tcpiphdr_next(q);
172                 m = tcpiphdr_prev(q)->ti_mbuf;
173                 remque(tcpiphdr2qlink(tcpiphdr_prev(q)));
174                 m_free(m);
175         }
176
177         /*
178          * Stick new segment in its place.
179          */
180         insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q)));
181
182 present:
183         /*
184          * Present data to user, advancing rcv_nxt through
185          * completed sequence space.
186          */
187         if (!TCPS_HAVEESTABLISHED(tp->t_state))
188                 return (0);
189         ti = tcpfrag_list_first(tp);
190         if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt)
191                 return (0);
192         if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
193                 return (0);
194         do {
195                 tp->rcv_nxt += ti->ti_len;
196                 flags = ti->ti_flags & TH_FIN;
197                 remque(tcpiphdr2qlink(ti));
198                 m = ti->ti_mbuf;
199                 ti = tcpiphdr_next(ti);
200                 if (so->so_state & SS_FCANTSENDMORE)
201                         m_free(m);
202                 else {
203                         if (so->so_emu) {
204                                 if (tcp_emu(so,m)) sbappend(so, m);
205                         } else
206                                 sbappend(so, m);
207                 }
208         } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
209         return (flags);
210 }
211
212 /*
213  * TCP input routine, follows pages 65-76 of the
214  * protocol specification dated September, 1981 very closely.
215  */
216 void
217 tcp_input(struct mbuf *m, int iphlen, struct socket *inso, unsigned short af)
218 {
219         struct ip save_ip, *ip;
220         register struct tcpiphdr *ti;
221         caddr_t optp = NULL;
222         int optlen = 0;
223         int len, tlen, off;
224         register struct tcpcb *tp = NULL;
225         register int tiflags;
226         struct socket *so = NULL;
227         int todrop, acked, ourfinisacked, needoutput = 0;
228         int iss = 0;
229         u_long tiwin;
230         int ret;
231         struct sockaddr_storage lhost, fhost;
232         struct sockaddr_in *lhost4, *fhost4;
233     struct ex_list *ex_ptr;
234     Slirp *slirp;
235
236         DEBUG_CALL("tcp_input");
237         DEBUG_ARGS((dfd, " m = %p  iphlen = %2d  inso = %p\n",
238                     m, iphlen, inso));
239
240         /*
241          * If called with m == 0, then we're continuing the connect
242          */
243         if (m == NULL) {
244                 so = inso;
245                 slirp = so->slirp;
246
247                 /* Re-set a few variables */
248                 tp = sototcpcb(so);
249                 m = so->so_m;
250                 so->so_m = NULL;
251                 ti = so->so_ti;
252                 tiwin = ti->ti_win;
253                 tiflags = ti->ti_flags;
254
255                 goto cont_conn;
256         }
257         slirp = m->slirp;
258
259         switch (af) {
260         case AF_INET:
261             if (iphlen > sizeof(struct ip)) {
262                 ip_stripoptions(m, (struct mbuf *)0);
263                 iphlen = sizeof(struct ip);
264             }
265             /* XXX Check if too short */
266
267
268             /*
269              * Save a copy of the IP header in case we want restore it
270              * for sending an ICMP error message in response.
271              */
272             ip = mtod(m, struct ip *);
273             save_ip = *ip;
274             save_ip.ip_len += iphlen;
275
276             /*
277              * Get IP and TCP header together in first mbuf.
278              * Note: IP leaves IP header in first mbuf.
279              */
280             m->m_data -= sizeof(struct tcpiphdr) - sizeof(struct ip)
281                                                  - sizeof(struct tcphdr);
282             m->m_len += sizeof(struct tcpiphdr) - sizeof(struct ip)
283                                                 - sizeof(struct tcphdr);
284             ti = mtod(m, struct tcpiphdr *);
285
286             /*
287              * Checksum extended TCP header and data.
288              */
289             tlen = ip->ip_len;
290             tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL;
291             memset(&ti->ih_mbuf, 0 , sizeof(struct mbuf_ptr));
292             memset(&ti->ti, 0, sizeof(ti->ti));
293             ti->ti_x0 = 0;
294             ti->ti_src = save_ip.ip_src;
295             ti->ti_dst = save_ip.ip_dst;
296             ti->ti_pr = save_ip.ip_p;
297             ti->ti_len = htons((uint16_t)tlen);
298             len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen);
299             if (cksum(m, len)) {
300                 goto drop;
301             }
302             break;
303
304         default:
305             g_assert_not_reached();
306         }
307
308         /*
309          * Check that TCP offset makes sense,
310          * pull out TCP options and adjust length.              XXX
311          */
312         off = ti->ti_off << 2;
313         if (off < sizeof (struct tcphdr) || off > tlen) {
314           goto drop;
315         }
316         tlen -= off;
317         ti->ti_len = tlen;
318         if (off > sizeof (struct tcphdr)) {
319           optlen = off - sizeof (struct tcphdr);
320           optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
321         }
322         tiflags = ti->ti_flags;
323
324         /*
325          * Convert TCP protocol specific fields to host format.
326          */
327         NTOHL(ti->ti_seq);
328         NTOHL(ti->ti_ack);
329         NTOHS(ti->ti_win);
330         NTOHS(ti->ti_urp);
331
332         /*
333          * Drop TCP, IP headers and TCP options.
334          */
335         m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
336         m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
337
338         /*
339          * Locate pcb for segment.
340          */
341 findso:
342         lhost.ss_family = af;
343         fhost.ss_family = af;
344         switch (af) {
345         case AF_INET:
346             lhost4 = (struct sockaddr_in *) &lhost;
347             lhost4->sin_addr = ti->ti_src;
348             lhost4->sin_port = ti->ti_sport;
349             fhost4 = (struct sockaddr_in *) &fhost;
350             fhost4->sin_addr = ti->ti_dst;
351             fhost4->sin_port = ti->ti_dport;
352             break;
353         default:
354             g_assert_not_reached();
355         }
356
357         so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost);
358
359         /*
360          * If the state is CLOSED (i.e., TCB does not exist) then
361          * all data in the incoming segment is discarded.
362          * If the TCB exists but is in CLOSED state, it is embryonic,
363          * but should either do a listen or a connect soon.
364          *
365          * state == CLOSED means we've done socreate() but haven't
366          * attached it to a protocol yet...
367          *
368          * XXX If a TCB does not exist, and the TH_SYN flag is
369          * the only flag set, then create a session, mark it
370          * as if it was LISTENING, and continue...
371          */
372         if (so == NULL) {
373           if (slirp->restricted) {
374             /* Any hostfwds will have an existing socket, so we only get here
375              * for non-hostfwd connections. These should be dropped, unless it
376              * happens to be a guestfwd.
377              */
378             for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) {
379                 if (ex_ptr->ex_fport == ti->ti_dport &&
380                     ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) {
381                     break;
382                 }
383             }
384             if (!ex_ptr) {
385                 goto dropwithreset;
386             }
387           }
388
389           if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN)
390             goto dropwithreset;
391
392           if ((so = socreate(slirp)) == NULL)
393             goto dropwithreset;
394           if (tcp_attach(so) < 0) {
395             free(so); /* Not sofree (if it failed, it's not insqued) */
396             goto dropwithreset;
397           }
398
399           sbreserve(&so->so_snd, TCP_SNDSPACE);
400           sbreserve(&so->so_rcv, TCP_RCVSPACE);
401
402           so->lhost.ss = lhost;
403           so->fhost.ss = fhost;
404
405           so->so_iptos = tcp_tos(so);
406           if (so->so_iptos == 0) {
407               switch (af) {
408               case AF_INET:
409                   so->so_iptos = ((struct ip *)ti)->ip_tos;
410                   break;
411               default:
412                   g_assert_not_reached();
413               }
414           }
415
416           tp = sototcpcb(so);
417           tp->t_state = TCPS_LISTEN;
418         }
419
420         /*
421          * If this is a still-connecting socket, this probably
422          * a retransmit of the SYN.  Whether it's a retransmit SYN
423          * or something else, we nuke it.
424          */
425         if (so->so_state & SS_ISFCONNECTING)
426                 goto drop;
427
428         tp = sototcpcb(so);
429
430         /* XXX Should never fail */
431         if (tp == NULL)
432                 goto dropwithreset;
433         if (tp->t_state == TCPS_CLOSED)
434                 goto drop;
435
436         tiwin = ti->ti_win;
437
438         /*
439          * Segment received on connection.
440          * Reset idle time and keep-alive timer.
441          */
442         tp->t_idle = 0;
443         if (SO_OPTIONS)
444            tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL;
445         else
446            tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE;
447
448         /*
449          * Process options if not in LISTEN state,
450          * else do it below (after getting remote address).
451          */
452         if (optp && tp->t_state != TCPS_LISTEN)
453                 tcp_dooptions(tp, (u_char *)optp, optlen, ti);
454
455         /*
456          * Header prediction: check for the two common cases
457          * of a uni-directional data xfer.  If the packet has
458          * no control flags, is in-sequence, the window didn't
459          * change and we're not retransmitting, it's a
460          * candidate.  If the length is zero and the ack moved
461          * forward, we're the sender side of the xfer.  Just
462          * free the data acked & wake any higher level process
463          * that was blocked waiting for space.  If the length
464          * is non-zero and the ack didn't move, we're the
465          * receiver side.  If we're getting packets in-order
466          * (the reassembly queue is empty), add the data to
467          * the socket buffer and note that we need a delayed ack.
468          *
469          * XXX Some of these tests are not needed
470          * eg: the tiwin == tp->snd_wnd prevents many more
471          * predictions.. with no *real* advantage..
472          */
473         if (tp->t_state == TCPS_ESTABLISHED &&
474             (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
475             ti->ti_seq == tp->rcv_nxt &&
476             tiwin && tiwin == tp->snd_wnd &&
477             tp->snd_nxt == tp->snd_max) {
478                 if (ti->ti_len == 0) {
479                         if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
480                             SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
481                             tp->snd_cwnd >= tp->snd_wnd) {
482                                 /*
483                                  * this is a pure ack for outstanding data.
484                                  */
485                                 if (tp->t_rtt &&
486                                     SEQ_GT(ti->ti_ack, tp->t_rtseq))
487                                         tcp_xmit_timer(tp, tp->t_rtt);
488                                 acked = ti->ti_ack - tp->snd_una;
489                                 sbdrop(&so->so_snd, acked);
490                                 tp->snd_una = ti->ti_ack;
491                                 m_free(m);
492
493                                 /*
494                                  * If all outstanding data are acked, stop
495                                  * retransmit timer, otherwise restart timer
496                                  * using current (possibly backed-off) value.
497                                  * If process is waiting for space,
498                                  * wakeup/selwakeup/signal.  If data
499                                  * are ready to send, let tcp_output
500                                  * decide between more output or persist.
501                                  */
502                                 if (tp->snd_una == tp->snd_max)
503                                         tp->t_timer[TCPT_REXMT] = 0;
504                                 else if (tp->t_timer[TCPT_PERSIST] == 0)
505                                         tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
506
507                                 /*
508                                  * This is called because sowwakeup might have
509                                  * put data into so_snd.  Since we don't so sowwakeup,
510                                  * we don't need this.. XXX???
511                                  */
512                                 if (so->so_snd.sb_cc)
513                                         (void) tcp_output(tp);
514
515                                 return;
516                         }
517                 } else if (ti->ti_ack == tp->snd_una &&
518                     tcpfrag_list_empty(tp) &&
519                     ti->ti_len <= sbspace(&so->so_rcv)) {
520                         /*
521                          * this is a pure, in-sequence data packet
522                          * with nothing on the reassembly queue and
523                          * we have enough buffer space to take it.
524                          */
525                         tp->rcv_nxt += ti->ti_len;
526                         /*
527                          * Add data to socket buffer.
528                          */
529                         if (so->so_emu) {
530                                 if (tcp_emu(so,m)) sbappend(so, m);
531                         } else
532                                 sbappend(so, m);
533
534                         /*
535                          * If this is a short packet, then ACK now - with Nagel
536                          *      congestion avoidance sender won't send more until
537                          *      he gets an ACK.
538                          *
539                          * It is better to not delay acks at all to maximize
540                          * TCP throughput.  See RFC 2581.
541                          */
542                         tp->t_flags |= TF_ACKNOW;
543                         tcp_output(tp);
544                         return;
545                 }
546         } /* header prediction */
547         /*
548          * Calculate amount of space in receive window,
549          * and then do TCP input processing.
550          * Receive window is amount of space in rcv queue,
551          * but not less than advertised window.
552          */
553         { int win;
554           win = sbspace(&so->so_rcv);
555           if (win < 0)
556             win = 0;
557           tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
558         }
559
560         switch (tp->t_state) {
561
562         /*
563          * If the state is LISTEN then ignore segment if it contains an RST.
564          * If the segment contains an ACK then it is bad and send a RST.
565          * If it does not contain a SYN then it is not interesting; drop it.
566          * Don't bother responding if the destination was a broadcast.
567          * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
568          * tp->iss, and send a segment:
569          *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
570          * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
571          * Fill in remote peer address fields if not previously specified.
572          * Enter SYN_RECEIVED state, and process any other fields of this
573          * segment in this state.
574          */
575         case TCPS_LISTEN: {
576
577           if (tiflags & TH_RST)
578             goto drop;
579           if (tiflags & TH_ACK)
580             goto dropwithreset;
581           if ((tiflags & TH_SYN) == 0)
582             goto drop;
583
584           /*
585            * This has way too many gotos...
586            * But a bit of spaghetti code never hurt anybody :)
587            */
588
589           /*
590            * If this is destined for the control address, then flag to
591            * tcp_ctl once connected, otherwise connect
592            */
593           if (af == AF_INET &&
594                  (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
595                  slirp->vnetwork_addr.s_addr) {
596             if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr &&
597                 so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) {
598                 /* May be an add exec */
599                 for (ex_ptr = slirp->exec_list; ex_ptr;
600                      ex_ptr = ex_ptr->ex_next) {
601                   if(ex_ptr->ex_fport == so->so_fport &&
602                      so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) {
603                     so->so_state |= SS_CTL;
604                     break;
605                   }
606                 }
607                 if (so->so_state & SS_CTL) {
608                     goto cont_input;
609                 }
610             }
611             /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */
612           }
613
614           if (so->so_emu & EMU_NOCONNECT) {
615             so->so_emu &= ~EMU_NOCONNECT;
616             goto cont_input;
617           }
618
619           if ((tcp_fconnect(so, so->so_ffamily) == -1) &&
620               (errno != EINPROGRESS) && (errno != EWOULDBLOCK)
621           ) {
622             uint8_t code;
623             DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n",
624                         errno,strerror(errno)));
625             if(errno == ECONNREFUSED) {
626               /* ACK the SYN, send RST to refuse the connection */
627               tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq) 0,
628                           TH_RST | TH_ACK, af);
629             } else {
630               switch (af) {
631               case AF_INET:
632                 code = ICMP_UNREACH_NET;
633                 if (errno == EHOSTUNREACH) {
634                   code = ICMP_UNREACH_HOST;
635                 }
636                 break;
637               default:
638                 g_assert_not_reached();
639               }
640               HTONL(ti->ti_seq);             /* restore tcp header */
641               HTONL(ti->ti_ack);
642               HTONS(ti->ti_win);
643               HTONS(ti->ti_urp);
644               m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
645               m->m_len  += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
646               switch (af) {
647               case AF_INET:
648                 m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip)
649                                                      - sizeof(struct tcphdr);
650                 m->m_len  -= sizeof(struct tcpiphdr) - sizeof(struct ip)
651                                                      - sizeof(struct tcphdr);
652                 *ip = save_ip;
653                 icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno));
654                 break;
655               default:
656                 g_assert_not_reached();
657               }
658             }
659             tcp_close(tp);
660             m_free(m);
661           } else {
662             /*
663              * Haven't connected yet, save the current mbuf
664              * and ti, and return
665              * XXX Some OS's don't tell us whether the connect()
666              * succeeded or not.  So we must time it out.
667              */
668             so->so_m = m;
669             so->so_ti = ti;
670             tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
671             tp->t_state = TCPS_SYN_RECEIVED;
672             tcp_template(tp);
673           }
674           return;
675
676         cont_conn:
677           /* m==NULL
678            * Check if the connect succeeded
679            */
680           if (so->so_state & SS_NOFDREF) {
681             tp = tcp_close(tp);
682             goto dropwithreset;
683           }
684         cont_input:
685           tcp_template(tp);
686
687           if (optp)
688             tcp_dooptions(tp, (u_char *)optp, optlen, ti);
689
690           if (iss)
691             tp->iss = iss;
692           else
693             tp->iss = slirp->tcp_iss;
694           slirp->tcp_iss += TCP_ISSINCR/2;
695           tp->irs = ti->ti_seq;
696           tcp_sendseqinit(tp);
697           tcp_rcvseqinit(tp);
698           tp->t_flags |= TF_ACKNOW;
699           tp->t_state = TCPS_SYN_RECEIVED;
700           tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
701           goto trimthenstep6;
702         } /* case TCPS_LISTEN */
703
704         /*
705          * If the state is SYN_SENT:
706          *      if seg contains an ACK, but not for our SYN, drop the input.
707          *      if seg contains a RST, then drop the connection.
708          *      if seg does not contain SYN, then drop it.
709          * Otherwise this is an acceptable SYN segment
710          *      initialize tp->rcv_nxt and tp->irs
711          *      if seg contains ack then advance tp->snd_una
712          *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state
713          *      arrange for segment to be acked (eventually)
714          *      continue processing rest of data/controls, beginning with URG
715          */
716         case TCPS_SYN_SENT:
717                 if ((tiflags & TH_ACK) &&
718                     (SEQ_LEQ(ti->ti_ack, tp->iss) ||
719                      SEQ_GT(ti->ti_ack, tp->snd_max)))
720                         goto dropwithreset;
721
722                 if (tiflags & TH_RST) {
723                         if (tiflags & TH_ACK) {
724                                 tcp_drop(tp, 0); /* XXX Check t_softerror! */
725                         }
726                         goto drop;
727                 }
728
729                 if ((tiflags & TH_SYN) == 0)
730                         goto drop;
731                 if (tiflags & TH_ACK) {
732                         tp->snd_una = ti->ti_ack;
733                         if (SEQ_LT(tp->snd_nxt, tp->snd_una))
734                                 tp->snd_nxt = tp->snd_una;
735                 }
736
737                 tp->t_timer[TCPT_REXMT] = 0;
738                 tp->irs = ti->ti_seq;
739                 tcp_rcvseqinit(tp);
740                 tp->t_flags |= TF_ACKNOW;
741                 if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
742                         soisfconnected(so);
743                         tp->t_state = TCPS_ESTABLISHED;
744
745                         (void) tcp_reass(tp, (struct tcpiphdr *)0,
746                                 (struct mbuf *)0);
747                         /*
748                          * if we didn't have to retransmit the SYN,
749                          * use its rtt as our initial srtt & rtt var.
750                          */
751                         if (tp->t_rtt)
752                                 tcp_xmit_timer(tp, tp->t_rtt);
753                 } else
754                         tp->t_state = TCPS_SYN_RECEIVED;
755
756 trimthenstep6:
757                 /*
758                  * Advance ti->ti_seq to correspond to first data byte.
759                  * If data, trim to stay within window,
760                  * dropping FIN if necessary.
761                  */
762                 ti->ti_seq++;
763                 if (ti->ti_len > tp->rcv_wnd) {
764                         todrop = ti->ti_len - tp->rcv_wnd;
765                         m_adj(m, -todrop);
766                         ti->ti_len = tp->rcv_wnd;
767                         tiflags &= ~TH_FIN;
768                 }
769                 tp->snd_wl1 = ti->ti_seq - 1;
770                 tp->rcv_up = ti->ti_seq;
771                 goto step6;
772         } /* switch tp->t_state */
773         /*
774          * States other than LISTEN or SYN_SENT.
775          * Check that at least some bytes of segment are within
776          * receive window.  If segment begins before rcv_nxt,
777          * drop leading data (and SYN); if nothing left, just ack.
778          */
779         todrop = tp->rcv_nxt - ti->ti_seq;
780         if (todrop > 0) {
781                 if (tiflags & TH_SYN) {
782                         tiflags &= ~TH_SYN;
783                         ti->ti_seq++;
784                         if (ti->ti_urp > 1)
785                                 ti->ti_urp--;
786                         else
787                                 tiflags &= ~TH_URG;
788                         todrop--;
789                 }
790                 /*
791                  * Following if statement from Stevens, vol. 2, p. 960.
792                  */
793                 if (todrop > ti->ti_len
794                     || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
795                         /*
796                          * Any valid FIN must be to the left of the window.
797                          * At this point the FIN must be a duplicate or out
798                          * of sequence; drop it.
799                          */
800                         tiflags &= ~TH_FIN;
801
802                         /*
803                          * Send an ACK to resynchronize and drop any data.
804                          * But keep on processing for RST or ACK.
805                          */
806                         tp->t_flags |= TF_ACKNOW;
807                         todrop = ti->ti_len;
808                 }
809                 m_adj(m, todrop);
810                 ti->ti_seq += todrop;
811                 ti->ti_len -= todrop;
812                 if (ti->ti_urp > todrop)
813                         ti->ti_urp -= todrop;
814                 else {
815                         tiflags &= ~TH_URG;
816                         ti->ti_urp = 0;
817                 }
818         }
819         /*
820          * If new data are received on a connection after the
821          * user processes are gone, then RST the other end.
822          */
823         if ((so->so_state & SS_NOFDREF) &&
824             tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
825                 tp = tcp_close(tp);
826                 goto dropwithreset;
827         }
828
829         /*
830          * If segment ends after window, drop trailing data
831          * (and PUSH and FIN); if nothing left, just ACK.
832          */
833         todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
834         if (todrop > 0) {
835                 if (todrop >= ti->ti_len) {
836                         /*
837                          * If a new connection request is received
838                          * while in TIME_WAIT, drop the old connection
839                          * and start over if the sequence numbers
840                          * are above the previous ones.
841                          */
842                         if (tiflags & TH_SYN &&
843                             tp->t_state == TCPS_TIME_WAIT &&
844                             SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
845                                 iss = tp->rcv_nxt + TCP_ISSINCR;
846                                 tp = tcp_close(tp);
847                                 goto findso;
848                         }
849                         /*
850                          * If window is closed can only take segments at
851                          * window edge, and have to drop data and PUSH from
852                          * incoming segments.  Continue processing, but
853                          * remember to ack.  Otherwise, drop segment
854                          * and ack.
855                          */
856                         if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
857                                 tp->t_flags |= TF_ACKNOW;
858                         } else {
859                                 goto dropafterack;
860                         }
861                 }
862                 m_adj(m, -todrop);
863                 ti->ti_len -= todrop;
864                 tiflags &= ~(TH_PUSH|TH_FIN);
865         }
866
867         /*
868          * If the RST bit is set examine the state:
869          *    SYN_RECEIVED STATE:
870          *      If passive open, return to LISTEN state.
871          *      If active open, inform user that connection was refused.
872          *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
873          *      Inform user that connection was reset, and close tcb.
874          *    CLOSING, LAST_ACK, TIME_WAIT STATES
875          *      Close the tcb.
876          */
877         if (tiflags&TH_RST) switch (tp->t_state) {
878
879         case TCPS_SYN_RECEIVED:
880         case TCPS_ESTABLISHED:
881         case TCPS_FIN_WAIT_1:
882         case TCPS_FIN_WAIT_2:
883         case TCPS_CLOSE_WAIT:
884                 tp->t_state = TCPS_CLOSED;
885                 tcp_close(tp);
886                 goto drop;
887
888         case TCPS_CLOSING:
889         case TCPS_LAST_ACK:
890         case TCPS_TIME_WAIT:
891                 tcp_close(tp);
892                 goto drop;
893         }
894
895         /*
896          * If a SYN is in the window, then this is an
897          * error and we send an RST and drop the connection.
898          */
899         if (tiflags & TH_SYN) {
900                 tp = tcp_drop(tp,0);
901                 goto dropwithreset;
902         }
903
904         /*
905          * If the ACK bit is off we drop the segment and return.
906          */
907         if ((tiflags & TH_ACK) == 0) goto drop;
908
909         /*
910          * Ack processing.
911          */
912         switch (tp->t_state) {
913         /*
914          * In SYN_RECEIVED state if the ack ACKs our SYN then enter
915          * ESTABLISHED state and continue processing, otherwise
916          * send an RST.  una<=ack<=max
917          */
918         case TCPS_SYN_RECEIVED:
919
920                 if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
921                     SEQ_GT(ti->ti_ack, tp->snd_max))
922                         goto dropwithreset;
923                 tp->t_state = TCPS_ESTABLISHED;
924                 /*
925                  * The sent SYN is ack'ed with our sequence number +1
926                  * The first data byte already in the buffer will get
927                  * lost if no correction is made.  This is only needed for
928                  * SS_CTL since the buffer is empty otherwise.
929                  * tp->snd_una++; or:
930                  */
931                 tp->snd_una=ti->ti_ack;
932                 if (so->so_state & SS_CTL) {
933                   /* So tcp_ctl reports the right state */
934                   ret = tcp_ctl(so);
935                   if (ret == 1) {
936                     soisfconnected(so);
937                     so->so_state &= ~SS_CTL;   /* success XXX */
938                   } else if (ret == 2) {
939                     so->so_state &= SS_PERSISTENT_MASK;
940                     so->so_state |= SS_NOFDREF; /* CTL_CMD */
941                   } else {
942                     needoutput = 1;
943                     tp->t_state = TCPS_FIN_WAIT_1;
944                   }
945                 } else {
946                   soisfconnected(so);
947                 }
948
949                 (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
950                 tp->snd_wl1 = ti->ti_seq - 1;
951                 /* Avoid ack processing; snd_una==ti_ack  =>  dup ack */
952                 goto synrx_to_est;
953                 /* fall into ... */
954
955         /*
956          * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
957          * ACKs.  If the ack is in the range
958          *      tp->snd_una < ti->ti_ack <= tp->snd_max
959          * then advance tp->snd_una to ti->ti_ack and drop
960          * data from the retransmission queue.  If this ACK reflects
961          * more up to date window information we update our window information.
962          */
963         case TCPS_ESTABLISHED:
964         case TCPS_FIN_WAIT_1:
965         case TCPS_FIN_WAIT_2:
966         case TCPS_CLOSE_WAIT:
967         case TCPS_CLOSING:
968         case TCPS_LAST_ACK:
969         case TCPS_TIME_WAIT:
970
971                 if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
972                         if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
973                           DEBUG_MISC((dfd, " dup ack  m = %p  so = %p\n",
974                                       m, so));
975                                 /*
976                                  * If we have outstanding data (other than
977                                  * a window probe), this is a completely
978                                  * duplicate ack (ie, window info didn't
979                                  * change), the ack is the biggest we've
980                                  * seen and we've seen exactly our rexmt
981                                  * threshold of them, assume a packet
982                                  * has been dropped and retransmit it.
983                                  * Kludge snd_nxt & the congestion
984                                  * window so we send only this one
985                                  * packet.
986                                  *
987                                  * We know we're losing at the current
988                                  * window size so do congestion avoidance
989                                  * (set ssthresh to half the current window
990                                  * and pull our congestion window back to
991                                  * the new ssthresh).
992                                  *
993                                  * Dup acks mean that packets have left the
994                                  * network (they're now cached at the receiver)
995                                  * so bump cwnd by the amount in the receiver
996                                  * to keep a constant cwnd packets in the
997                                  * network.
998                                  */
999                                 if (tp->t_timer[TCPT_REXMT] == 0 ||
1000                                     ti->ti_ack != tp->snd_una)
1001                                         tp->t_dupacks = 0;
1002                                 else if (++tp->t_dupacks == TCPREXMTTHRESH) {
1003                                         tcp_seq onxt = tp->snd_nxt;
1004                                         u_int win =
1005                                             min(tp->snd_wnd, tp->snd_cwnd) / 2 /
1006                                                 tp->t_maxseg;
1007
1008                                         if (win < 2)
1009                                                 win = 2;
1010                                         tp->snd_ssthresh = win * tp->t_maxseg;
1011                                         tp->t_timer[TCPT_REXMT] = 0;
1012                                         tp->t_rtt = 0;
1013                                         tp->snd_nxt = ti->ti_ack;
1014                                         tp->snd_cwnd = tp->t_maxseg;
1015                                         (void) tcp_output(tp);
1016                                         tp->snd_cwnd = tp->snd_ssthresh +
1017                                                tp->t_maxseg * tp->t_dupacks;
1018                                         if (SEQ_GT(onxt, tp->snd_nxt))
1019                                                 tp->snd_nxt = onxt;
1020                                         goto drop;
1021                                 } else if (tp->t_dupacks > TCPREXMTTHRESH) {
1022                                         tp->snd_cwnd += tp->t_maxseg;
1023                                         (void) tcp_output(tp);
1024                                         goto drop;
1025                                 }
1026                         } else
1027                                 tp->t_dupacks = 0;
1028                         break;
1029                 }
1030         synrx_to_est:
1031                 /*
1032                  * If the congestion window was inflated to account
1033                  * for the other side's cached packets, retract it.
1034                  */
1035                 if (tp->t_dupacks > TCPREXMTTHRESH &&
1036                     tp->snd_cwnd > tp->snd_ssthresh)
1037                         tp->snd_cwnd = tp->snd_ssthresh;
1038                 tp->t_dupacks = 0;
1039                 if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
1040                         goto dropafterack;
1041                 }
1042                 acked = ti->ti_ack - tp->snd_una;
1043
1044                 /*
1045                  * If transmit timer is running and timed sequence
1046                  * number was acked, update smoothed round trip time.
1047                  * Since we now have an rtt measurement, cancel the
1048                  * timer backoff (cf., Phil Karn's retransmit alg.).
1049                  * Recompute the initial retransmit timer.
1050                  */
1051                 if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
1052                         tcp_xmit_timer(tp,tp->t_rtt);
1053
1054                 /*
1055                  * If all outstanding data is acked, stop retransmit
1056                  * timer and remember to restart (more output or persist).
1057                  * If there is more data to be acked, restart retransmit
1058                  * timer, using current (possibly backed-off) value.
1059                  */
1060                 if (ti->ti_ack == tp->snd_max) {
1061                         tp->t_timer[TCPT_REXMT] = 0;
1062                         needoutput = 1;
1063                 } else if (tp->t_timer[TCPT_PERSIST] == 0)
1064                         tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
1065                 /*
1066                  * When new data is acked, open the congestion window.
1067                  * If the window gives us less than ssthresh packets
1068                  * in flight, open exponentially (maxseg per packet).
1069                  * Otherwise open linearly: maxseg per window
1070                  * (maxseg^2 / cwnd per packet).
1071                  */
1072                 {
1073                   register u_int cw = tp->snd_cwnd;
1074                   register u_int incr = tp->t_maxseg;
1075
1076                   if (cw > tp->snd_ssthresh)
1077                     incr = incr * incr / cw;
1078                   tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
1079                 }
1080                 if (acked > so->so_snd.sb_cc) {
1081                         tp->snd_wnd -= so->so_snd.sb_cc;
1082                         sbdrop(&so->so_snd, (int )so->so_snd.sb_cc);
1083                         ourfinisacked = 1;
1084                 } else {
1085                         sbdrop(&so->so_snd, acked);
1086                         tp->snd_wnd -= acked;
1087                         ourfinisacked = 0;
1088                 }
1089                 tp->snd_una = ti->ti_ack;
1090                 if (SEQ_LT(tp->snd_nxt, tp->snd_una))
1091                         tp->snd_nxt = tp->snd_una;
1092
1093                 switch (tp->t_state) {
1094
1095                 /*
1096                  * In FIN_WAIT_1 STATE in addition to the processing
1097                  * for the ESTABLISHED state if our FIN is now acknowledged
1098                  * then enter FIN_WAIT_2.
1099                  */
1100                 case TCPS_FIN_WAIT_1:
1101                         if (ourfinisacked) {
1102                                 /*
1103                                  * If we can't receive any more
1104                                  * data, then closing user can proceed.
1105                                  * Starting the timer is contrary to the
1106                                  * specification, but if we don't get a FIN
1107                                  * we'll hang forever.
1108                                  */
1109                                 if (so->so_state & SS_FCANTRCVMORE) {
1110                                         tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE;
1111                                 }
1112                                 tp->t_state = TCPS_FIN_WAIT_2;
1113                         }
1114                         break;
1115
1116                 /*
1117                  * In CLOSING STATE in addition to the processing for
1118                  * the ESTABLISHED state if the ACK acknowledges our FIN
1119                  * then enter the TIME-WAIT state, otherwise ignore
1120                  * the segment.
1121                  */
1122                 case TCPS_CLOSING:
1123                         if (ourfinisacked) {
1124                                 tp->t_state = TCPS_TIME_WAIT;
1125                                 tcp_canceltimers(tp);
1126                                 tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1127                         }
1128                         break;
1129
1130                 /*
1131                  * In LAST_ACK, we may still be waiting for data to drain
1132                  * and/or to be acked, as well as for the ack of our FIN.
1133                  * If our FIN is now acknowledged, delete the TCB,
1134                  * enter the closed state and return.
1135                  */
1136                 case TCPS_LAST_ACK:
1137                         if (ourfinisacked) {
1138                                 tcp_close(tp);
1139                                 goto drop;
1140                         }
1141                         break;
1142
1143                 /*
1144                  * In TIME_WAIT state the only thing that should arrive
1145                  * is a retransmission of the remote FIN.  Acknowledge
1146                  * it and restart the finack timer.
1147                  */
1148                 case TCPS_TIME_WAIT:
1149                         tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1150                         goto dropafterack;
1151                 }
1152         } /* switch(tp->t_state) */
1153
1154 step6:
1155         /*
1156          * Update window information.
1157          * Don't look at window if no ACK: TAC's send garbage on first SYN.
1158          */
1159         if ((tiflags & TH_ACK) &&
1160             (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
1161             (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
1162             (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
1163                 tp->snd_wnd = tiwin;
1164                 tp->snd_wl1 = ti->ti_seq;
1165                 tp->snd_wl2 = ti->ti_ack;
1166                 if (tp->snd_wnd > tp->max_sndwnd)
1167                         tp->max_sndwnd = tp->snd_wnd;
1168                 needoutput = 1;
1169         }
1170
1171         /*
1172          * Process segments with URG.
1173          */
1174         if ((tiflags & TH_URG) && ti->ti_urp &&
1175             TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1176                 /*
1177                  * This is a kludge, but if we receive and accept
1178                  * random urgent pointers, we'll crash in
1179                  * soreceive.  It's hard to imagine someone
1180                  * actually wanting to send this much urgent data.
1181                  */
1182                 if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) {
1183                         ti->ti_urp = 0;
1184                         tiflags &= ~TH_URG;
1185                         goto dodata;
1186                 }
1187                 /*
1188                  * If this segment advances the known urgent pointer,
1189                  * then mark the data stream.  This should not happen
1190                  * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
1191                  * a FIN has been received from the remote side.
1192                  * In these states we ignore the URG.
1193                  *
1194                  * According to RFC961 (Assigned Protocols),
1195                  * the urgent pointer points to the last octet
1196                  * of urgent data.  We continue, however,
1197                  * to consider it to indicate the first octet
1198                  * of data past the urgent section as the original
1199                  * spec states (in one of two places).
1200                  */
1201                 if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
1202                         tp->rcv_up = ti->ti_seq + ti->ti_urp;
1203                         so->so_urgc =  so->so_rcv.sb_cc +
1204                                 (tp->rcv_up - tp->rcv_nxt); /* -1; */
1205                         tp->rcv_up = ti->ti_seq + ti->ti_urp;
1206
1207                 }
1208         } else
1209                 /*
1210                  * If no out of band data is expected,
1211                  * pull receive urgent pointer along
1212                  * with the receive window.
1213                  */
1214                 if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
1215                         tp->rcv_up = tp->rcv_nxt;
1216 dodata:
1217
1218         /*
1219          * If this is a small packet, then ACK now - with Nagel
1220          *      congestion avoidance sender won't send more until
1221          *      he gets an ACK.
1222          */
1223         if (ti->ti_len && (unsigned)ti->ti_len <= 5 &&
1224             ((struct tcpiphdr_2 *)ti)->first_char == (char)27) {
1225                 tp->t_flags |= TF_ACKNOW;
1226         }
1227
1228         /*
1229          * Process the segment text, merging it into the TCP sequencing queue,
1230          * and arranging for acknowledgment of receipt if necessary.
1231          * This process logically involves adjusting tp->rcv_wnd as data
1232          * is presented to the user (this happens in tcp_usrreq.c,
1233          * case PRU_RCVD).  If a FIN has already been received on this
1234          * connection then we just ignore the text.
1235          */
1236         if ((ti->ti_len || (tiflags&TH_FIN)) &&
1237             TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1238                 TCP_REASS(tp, ti, m, so, tiflags);
1239         } else {
1240                 m_free(m);
1241                 tiflags &= ~TH_FIN;
1242         }
1243
1244         /*
1245          * If FIN is received ACK the FIN and let the user know
1246          * that the connection is closing.
1247          */
1248         if (tiflags & TH_FIN) {
1249                 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
1250                         /*
1251                          * If we receive a FIN we can't send more data,
1252                          * set it SS_FDRAIN
1253                          * Shutdown the socket if there is no rx data in the
1254                          * buffer.
1255                          * soread() is called on completion of shutdown() and
1256                          * will got to TCPS_LAST_ACK, and use tcp_output()
1257                          * to send the FIN.
1258                          */
1259                         sofwdrain(so);
1260
1261                         tp->t_flags |= TF_ACKNOW;
1262                         tp->rcv_nxt++;
1263                 }
1264                 switch (tp->t_state) {
1265
1266                 /*
1267                  * In SYN_RECEIVED and ESTABLISHED STATES
1268                  * enter the CLOSE_WAIT state.
1269                  */
1270                 case TCPS_SYN_RECEIVED:
1271                 case TCPS_ESTABLISHED:
1272                   if(so->so_emu == EMU_CTL)        /* no shutdown on socket */
1273                     tp->t_state = TCPS_LAST_ACK;
1274                   else
1275                     tp->t_state = TCPS_CLOSE_WAIT;
1276                   break;
1277
1278                 /*
1279                  * If still in FIN_WAIT_1 STATE FIN has not been acked so
1280                  * enter the CLOSING state.
1281                  */
1282                 case TCPS_FIN_WAIT_1:
1283                         tp->t_state = TCPS_CLOSING;
1284                         break;
1285
1286                 /*
1287                  * In FIN_WAIT_2 state enter the TIME_WAIT state,
1288                  * starting the time-wait timer, turning off the other
1289                  * standard timers.
1290                  */
1291                 case TCPS_FIN_WAIT_2:
1292                         tp->t_state = TCPS_TIME_WAIT;
1293                         tcp_canceltimers(tp);
1294                         tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1295                         break;
1296
1297                 /*
1298                  * In TIME_WAIT state restart the 2 MSL time_wait timer.
1299                  */
1300                 case TCPS_TIME_WAIT:
1301                         tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
1302                         break;
1303                 }
1304         }
1305
1306         /*
1307          * Return any desired output.
1308          */
1309         if (needoutput || (tp->t_flags & TF_ACKNOW)) {
1310                 (void) tcp_output(tp);
1311         }
1312         return;
1313
1314 dropafterack:
1315         /*
1316          * Generate an ACK dropping incoming segment if it occupies
1317          * sequence space, where the ACK reflects our state.
1318          */
1319         if (tiflags & TH_RST)
1320                 goto drop;
1321         m_free(m);
1322         tp->t_flags |= TF_ACKNOW;
1323         (void) tcp_output(tp);
1324         return;
1325
1326 dropwithreset:
1327         /* reuses m if m!=NULL, m_free() unnecessary */
1328         if (tiflags & TH_ACK)
1329                 tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af);
1330         else {
1331                 if (tiflags & TH_SYN) ti->ti_len++;
1332                 tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq) 0,
1333                     TH_RST | TH_ACK, af);
1334         }
1335
1336         return;
1337
1338 drop:
1339         /*
1340          * Drop space held by incoming segment and return.
1341          */
1342         m_free(m);
1343 }
1344
1345 static void
1346 tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcpiphdr *ti)
1347 {
1348         uint16_t mss;
1349         int opt, optlen;
1350
1351         DEBUG_CALL("tcp_dooptions");
1352         DEBUG_ARGS((dfd, " tp = %p  cnt=%i\n", tp, cnt));
1353
1354         for (; cnt > 0; cnt -= optlen, cp += optlen) {
1355                 opt = cp[0];
1356                 if (opt == TCPOPT_EOL)
1357                         break;
1358                 if (opt == TCPOPT_NOP)
1359                         optlen = 1;
1360                 else {
1361                         optlen = cp[1];
1362                         if (optlen <= 0)
1363                                 break;
1364                 }
1365                 switch (opt) {
1366
1367                 default:
1368                         continue;
1369
1370                 case TCPOPT_MAXSEG:
1371                         if (optlen != TCPOLEN_MAXSEG)
1372                                 continue;
1373                         if (!(ti->ti_flags & TH_SYN))
1374                                 continue;
1375                         memcpy((char *) &mss, (char *) cp + 2, sizeof(mss));
1376                         NTOHS(mss);
1377                         (void) tcp_mss(tp, mss);        /* sets t_maxseg */
1378                         break;
1379                 }
1380         }
1381 }
1382
1383
1384 /*
1385  * Pull out of band byte out of a segment so
1386  * it doesn't appear in the user's data queue.
1387  * It is still reflected in the segment length for
1388  * sequencing purposes.
1389  */
1390
1391 #ifdef notdef
1392
1393 void
1394 tcp_pulloutofband(so, ti, m)
1395         struct socket *so;
1396         struct tcpiphdr *ti;
1397         register struct mbuf *m;
1398 {
1399         int cnt = ti->ti_urp - 1;
1400
1401         while (cnt >= 0) {
1402                 if (m->m_len > cnt) {
1403                         char *cp = mtod(m, caddr_t) + cnt;
1404                         struct tcpcb *tp = sototcpcb(so);
1405
1406                         tp->t_iobc = *cp;
1407                         tp->t_oobflags |= TCPOOB_HAVEDATA;
1408                         memcpy(sp, cp+1, (unsigned)(m->m_len - cnt - 1));
1409                         m->m_len--;
1410                         return;
1411                 }
1412                 cnt -= m->m_len;
1413                 m = m->m_next; /* XXX WRONG! Fix it! */
1414                 if (m == 0)
1415                         break;
1416         }
1417         panic("tcp_pulloutofband");
1418 }
1419
1420 #endif /* notdef */
1421
1422 /*
1423  * Collect new round-trip time estimate
1424  * and update averages and current timeout.
1425  */
1426
1427 static void
1428 tcp_xmit_timer(register struct tcpcb *tp, int rtt)
1429 {
1430         register short delta;
1431
1432         DEBUG_CALL("tcp_xmit_timer");
1433         DEBUG_ARG("tp = %p", tp);
1434         DEBUG_ARG("rtt = %d", rtt);
1435
1436         if (tp->t_srtt != 0) {
1437                 /*
1438                  * srtt is stored as fixed point with 3 bits after the
1439                  * binary point (i.e., scaled by 8).  The following magic
1440                  * is equivalent to the smoothing algorithm in rfc793 with
1441                  * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
1442                  * point).  Adjust rtt to origin 0.
1443                  */
1444                 delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
1445                 if ((tp->t_srtt += delta) <= 0)
1446                         tp->t_srtt = 1;
1447                 /*
1448                  * We accumulate a smoothed rtt variance (actually, a
1449                  * smoothed mean difference), then set the retransmit
1450                  * timer to smoothed rtt + 4 times the smoothed variance.
1451                  * rttvar is stored as fixed point with 2 bits after the
1452                  * binary point (scaled by 4).  The following is
1453                  * equivalent to rfc793 smoothing with an alpha of .75
1454                  * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
1455                  * rfc793's wired-in beta.
1456                  */
1457                 if (delta < 0)
1458                         delta = -delta;
1459                 delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1460                 if ((tp->t_rttvar += delta) <= 0)
1461                         tp->t_rttvar = 1;
1462         } else {
1463                 /*
1464                  * No rtt measurement yet - use the unsmoothed rtt.
1465                  * Set the variance to half the rtt (so our first
1466                  * retransmit happens at 3*rtt).
1467                  */
1468                 tp->t_srtt = rtt << TCP_RTT_SHIFT;
1469                 tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
1470         }
1471         tp->t_rtt = 0;
1472         tp->t_rxtshift = 0;
1473
1474         /*
1475          * the retransmit should happen at rtt + 4 * rttvar.
1476          * Because of the way we do the smoothing, srtt and rttvar
1477          * will each average +1/2 tick of bias.  When we compute
1478          * the retransmit timer, we want 1/2 tick of rounding and
1479          * 1 extra tick because of +-1/2 tick uncertainty in the
1480          * firing of the timer.  The bias will give us exactly the
1481          * 1.5 tick we need.  But, because the bias is
1482          * statistical, we have to test that we don't drop below
1483          * the minimum feasible timer (which is 2 ticks).
1484          */
1485         TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
1486             (short)tp->t_rttmin, TCPTV_REXMTMAX); /* XXX */
1487
1488         /*
1489          * We received an ack for a packet that wasn't retransmitted;
1490          * it is probably safe to discard any error indications we've
1491          * received recently.  This isn't quite right, but close enough
1492          * for now (a route might have failed after we sent a segment,
1493          * and the return path might not be symmetrical).
1494          */
1495         tp->t_softerror = 0;
1496 }
1497
1498 /*
1499  * Determine a reasonable value for maxseg size.
1500  * If the route is known, check route for mtu.
1501  * If none, use an mss that can be handled on the outgoing
1502  * interface without forcing IP to fragment; if bigger than
1503  * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
1504  * to utilize large mbufs.  If no route is found, route has no mtu,
1505  * or the destination isn't local, use a default, hopefully conservative
1506  * size (usually 512 or the default IP max size, but no more than the mtu
1507  * of the interface), as we can't discover anything about intervening
1508  * gateways or networks.  We also initialize the congestion/slow start
1509  * window to be a single segment if the destination isn't local.
1510  * While looking at the routing entry, we also initialize other path-dependent
1511  * parameters from pre-set or cached values in the routing entry.
1512  */
1513
1514 int
1515 tcp_mss(struct tcpcb *tp, u_int offer)
1516 {
1517         struct socket *so = tp->t_socket;
1518         int mss;
1519
1520         DEBUG_CALL("tcp_mss");
1521         DEBUG_ARG("tp = %p", tp);
1522         DEBUG_ARG("offer = %d", offer);
1523
1524         switch (so->so_ffamily) {
1525         case AF_INET:
1526             mss = min(IF_MTU, IF_MRU) - sizeof(struct tcphdr)
1527                                       + sizeof(struct ip);
1528             break;
1529         default:
1530             g_assert_not_reached();
1531         }
1532
1533         if (offer)
1534                 mss = min(mss, offer);
1535         mss = max(mss, 32);
1536         if (mss < tp->t_maxseg || offer != 0)
1537            tp->t_maxseg = mss;
1538
1539         tp->snd_cwnd = mss;
1540
1541         sbreserve(&so->so_snd, TCP_SNDSPACE + ((TCP_SNDSPACE % mss) ?
1542                                                (mss - (TCP_SNDSPACE % mss)) :
1543                                                0));
1544         sbreserve(&so->so_rcv, TCP_RCVSPACE + ((TCP_RCVSPACE % mss) ?
1545                                                (mss - (TCP_RCVSPACE % mss)) :
1546                                                0));
1547
1548         DEBUG_MISC((dfd, " returning mss = %d\n", mss));
1549
1550         return mss;
1551 }
This page took 0.12063 seconds and 4 git commands to generate.