| plan 9 kernel history: overview | file list | diff list |
2000/0102/ip/tcp.c (diff list | history)
| ip/tcp.c on 1997/0327 | ||
| 1997/0327 | #include "u.h" #include "../port/lib.h" #include "mem.h" #include "dat.h" #include "fns.h" #include "../port/error.h" #include "ip.h" enum { QMAX = 64*1024-1, IP_TCPPROTO = 6, TCP_IPLEN = 8, TCP_PHDRSIZE = 12, TCP_HDRSIZE = 20, | |
| 1998/0306 | TCP_TCBPHDRSZ = 40, | |
| 1997/0327 | TCP_PKT = TCP_IPLEN+TCP_PHDRSIZE, TimerOFF = 0, TimerON = 1, TimerDONE = 2, MAX_TIME = (1<<20), /* Forever */ | |
| 1998/1008 | TCP_ACK = 50, /* Timed ack sequence in ms */ | |
| 1997/0327 | URG = 0x20, /* Data marked urgent */ ACK = 0x10, /* Acknowledge is valid */ PSH = 0x08, /* Whole data pipe is pushed */ RST = 0x04, /* Reset connection */ SYN = 0x02, /* Pkt. is synchronise */ FIN = 0x01, /* Start close down */ EOLOPT = 0, NOOPOPT = 1, | |
| 1999/0529 | MAXBACKMS = 30000, /* longest backoff time (ms) before hangup */ | |
| 1997/0327 | MSSOPT = 2, MSS_LENGTH = 4, /* Mean segment size */ MSL2 = 10, MSPTICK = 50, /* Milliseconds per timer tick */ | |
| 1999/0401 | DEF_MSS = 1460, /* Default mean segment */ DEF_RTT = 1000, /* Default round trip */ | |
| 1999/0623 | DEF_KAT = 30000, /* Default time ms) between keep alives */ | |
| 1997/0327 | TCP_LISTEN = 0, /* Listen connection */ TCP_CONNECT = 1, /* Outgoing connection */ | |
| 1999/0401 | TCPREXMTTHRESH = 3, /* dupack threshhold for rxt */ | |
| 1997/0327 | FORCE = 1, CLONE = 2, RETRAN = 4, ACTIVE = 8, SYNACK = 16, LOGAGAIN = 3, LOGDGAIN = 2, Closed = 0, /* Connection states */ Listen, Syn_sent, Syn_received, Established, Finwait1, Finwait2, Close_wait, Closing, Last_ack, Time_wait }; /* Must correspond to the enumeration above */ char *tcpstates[] = { "Closed", "Listen", "Syn_sent", "Syn_received", "Established", "Finwait1", "Finwait2", "Close_wait", "Closing", "Last_ack", "Time_wait" }; typedef struct Timer Timer; struct Timer { Timer *next; Timer *prev; | |
| 1998/0925 | Timer *readynext; | |
| 1997/0327 | int state; int start; int count; void (*func)(void*); void *arg; }; typedef struct Tcphdr Tcphdr; struct Tcphdr { | |
| 1998/0306 | uchar vihl; /* Version and header length */ uchar tos; /* Type of service */ uchar length[2]; /* packet length */ uchar id[2]; /* Identification */ uchar frag[2]; /* Fragment information */ uchar Unused; uchar proto; uchar tcplen[2]; uchar tcpsrc[4]; uchar tcpdst[4]; uchar tcpsport[2]; uchar tcpdport[2]; uchar tcpseq[4]; uchar tcpack[4]; uchar tcpflag[2]; uchar tcpwin[2]; uchar tcpcksum[2]; uchar tcpurg[2]; | |
| 1997/0327 | /* Options segment */ | |
| 1998/0306 | uchar tcpopt[2]; uchar tcpmss[2]; | |
| 1997/0327 | }; typedef struct Tcp Tcp; struct Tcp { ushort source; ushort dest; ulong seq; ulong ack; | |
| 1998/0306 | uchar flags; | |
| 1997/0327 | ushort wnd; ushort urg; ushort mss; | |
| 1999/0401 | ushort len; /* size of data */ | |
| 1997/0327 | }; typedef struct Reseq Reseq; struct Reseq { Reseq *next; Tcp seg; Block *bp; ushort length; }; | |
| 1999/0302 | /* * the qlock in the Conv locks this structure */ | |
| 1997/0327 | typedef struct Tcpctl Tcpctl; struct Tcpctl { | |
| 1998/0306 | uchar state; /* Connection state */ uchar type; /* Listening or active connection */ uchar code; /* Icmp code */ | |
| 1997/0327 | struct { ulong una; /* Unacked data pointer */ ulong nxt; /* Next sequence expected */ ulong ptr; /* Data pointer */ ushort wnd; /* Tcp send window */ ulong urg; /* Urgent data pointer */ ulong wl1; ulong wl2; | |
| 1999/0401 | /* to implement tahoe and reno TCP */ ulong dupacks; /* number of duplicate acks rcvd */ int recovery; /* loss recovery flag */ ulong rxt; /* right window marker for recovery */ | |
| 1997/0327 | } snd; struct { | |
| 1998/0306 | ulong nxt; /* Receive pointer to next uchar slot */ | |
| 1997/0327 | ushort wnd; /* Receive window incoming */ ulong urg; /* Urgent pointer */ int blocked; } rcv; ulong iss; /* Initial sequence number */ ushort cwind; /* Congestion window */ ushort ssthresh; /* Slow start threshold */ int resent; /* Bytes just resent */ int irs; /* Initial received squence */ ushort mss; /* Mean segment size */ int rerecv; /* Overlap of data rerecevived */ ushort window; /* Recevive window */ int max_snd; /* Max send */ ulong last_ack; /* Last acknowledege received */ | |
| 1998/0306 | uchar backoff; /* Exponential backoff counter */ | |
| 1999/0607 | int backedoff; /* ms we've backed off for rexmits */ | |
| 1998/0306 | uchar flags; /* State flags */ | |
| 1997/0327 | ulong sndcnt; /* Amount of data in send queue */ Reseq *reseq; /* Resequencing queue */ Timer timer; /* Activity timer */ Timer acktimer; /* Acknowledge timer */ Timer rtt_timer; /* Round trip timer */ | |
| 1998/1118 | Timer katimer; /* keep alive timer */ | |
| 1997/0327 | ulong rttseq; /* Round trip sequence */ int srtt; /* Shortened round trip */ int mdev; /* Mean deviation of round trip */ int kacounter; /* count down for keep alive */ uint sndsyntime; /* time syn sent */ | |
| 1998/0306 | Tcphdr protohdr; /* prototype header */ | |
| 1997/0327 | }; int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */ ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */ | |
| 1998/0313 | /* MIB II counters */ typedef struct Tcpstats Tcpstats; struct Tcpstats | |
| 1997/0916 | { | |
| 1998/0313 | ulong tcpRtoAlgorithm; ulong tcpRtoMin; ulong tcpRtoMax; ulong tcpMaxConn; ulong tcpActiveOpens; ulong tcpPassiveOpens; ulong tcpAttemptFails; ulong tcpEstabResets; ulong tcpCurrEstab; ulong tcpInSegs; ulong tcpOutSegs; ulong tcpRetransSegs; | |
| 1999/0401 | ulong tcpRetransTimeouts; | |
| 1998/0313 | ulong InErrs; ulong OutRsts; }; | |
| 1997/0916 | ||
| 1998/0313 | typedef struct Tcppriv Tcppriv; struct Tcppriv { Timer *timers; /* List of active timers */ QLock tl; /* Protect timer list */ Rendez tcpr; /* used by tcpackproc */ /* MIB stats */ Tcpstats tstats; /* non-MIB stats */ ulong csumerr; /* checksum errors */ ulong hlenerr; /* header length error */ ulong lenerr; /* short packet */ ulong order; /* out of order */ | |
| 1998/0924 | /* for keeping track of tcpackproc */ int ackprocstarted; QLock apl; | |
| 1998/0313 | }; | |
| 1999/1006 | int addreseq(Tcpctl*, Tcp*, Block*, ushort); | |
| 1997/0327 | void getreseq(Tcpctl*, Tcp*, Block**, ushort*); void localclose(Conv*, char*); void procsyn(Conv*, Tcp*); | |
| 1998/0313 | void tcpiput(Proto*, uchar*, Block*); | |
| 1997/0327 | void tcpoutput(Conv*); int tcptrim(Tcpctl*, Tcp*, Block**, ushort*); void tcpstart(Conv*, int, ushort); void tcptimeout(void*); void tcpsndsyn(Tcpctl*); void tcprcvwin(Conv*); | |
| 1999/0320 | void tcpacktimer(void*); void tcpkeepalive(void*); | |
| 1999/0529 | void tcpsetkacounter(Tcpctl*); | |
| 2000/0102 | void tcprxmit(Conv*); | |
| 1997/0327 | void | |
| 1998/0306 | tcpsetstate(Conv *s, uchar newstate) | |
| 1997/0327 | { Tcpctl *tcb; | |
| 1998/0306 | uchar oldstate; | |
| 1998/0313 | Tcppriv *tpriv; | |
| 1997/0327 | ||
| 1998/0313 | tpriv = s->p->priv; | |
| 1997/0327 | tcb = (Tcpctl*)s->ptcl; oldstate = tcb->state; if(oldstate == newstate) return; | |
| 1998/0313 | if(oldstate == Established) tpriv->tstats.tcpCurrEstab--; if(newstate == Established) tpriv->tstats.tcpCurrEstab++; | |
| 1998/0630 | /** print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport, tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab ); **/ | |
| 1997/0327 | switch(newstate) { case Closed: qclose(s->rq); qclose(s->wq); qclose(s->eq); | |
| 1998/0925 | break; | |
| 1997/0327 | case Close_wait: /* Remote closes */ qhangup(s->rq, nil); break; } | |
| 1998/0925 | tcb->state = newstate; | |
| 1997/0515 | if(oldstate == Syn_sent && newstate != Closed) | |
| 1998/0313 | Fsconnected(s, nil); | |
| 1997/0327 | } static char* tcpconnect(Conv *c, char **argv, int argc) { | |
| 1997/0403 | char *e; | |
| 1997/0327 | ||
| 1997/0403 | e = Fsstdconnect(c, argv, argc); if(e != nil) return e; | |
| 1997/0327 | tcpstart(c, TCP_CONNECT, QMAX); | |
| 1997/0403 | return nil; | |
| 1997/0327 | } | |
| 1998/0306 | static int tcpstate(Conv *c, char *state, int n) | |
| 1997/0327 | { Tcpctl *s; s = (Tcpctl*)(c->ptcl); | |
| 1998/0306 | return snprint(state, n, | |
| 1997/0327 | "%s srtt %d mdev %d timer.start %d timer.count %d\n", tcpstates[s->state], s->srtt, s->mdev, s->timer.start, s->timer.count); | |
| 1998/0306 | } | |
| 1997/0327 | ||
| 1998/0306 | static int tcpinuse(Conv *c) { Tcpctl *s; s = (Tcpctl*)(c->ptcl); return s->state != Closed; | |
| 1997/0327 | } | |
| 1997/0403 | static char* tcpannounce(Conv *c, char **argv, int argc) | |
| 1997/0327 | { | |
| 1997/0403 | char *e; e = Fsstdannounce(c, argv, argc); if(e != nil) return e; | |
| 1997/0327 | tcpstart(c, TCP_LISTEN, QMAX); | |
| 1998/0313 | Fsconnected(c, nil); | |
| 1997/0403 | return nil; | |
| 1997/0327 | } static void tcpclose(Conv *c) { Tcpctl *tcb; tcb = (Tcpctl*)c->ptcl; qhangup(c->rq, nil); qhangup(c->wq, nil); qhangup(c->eq, nil); switch(tcb->state) { case Listen: /* * reset any incoming calls to this listener */ | |
| 1998/0313 | Fsconnected(c, "Hangup"); | |
| 1997/0327 | localclose(c, nil); break; case Closed: case Syn_sent: localclose(c, nil); break; case Syn_received: case Established: tcb->sndcnt++; tcb->snd.nxt++; tcpsetstate(c, Finwait1); tcpoutput(c); break; case Close_wait: tcb->sndcnt++; tcb->snd.nxt++; tcpsetstate(c, Last_ack); tcpoutput(c); break; } | |
| 1999/0302 | qunlock(c); | |
| 1997/0327 | } void tcpkick(Conv *s, int len) { Tcpctl *tcb; tcb = (Tcpctl*)s->ptcl; switch(tcb->state) { case Listen: tcb->flags |= ACTIVE; tcpsndsyn(tcb); tcpsetstate(s, Syn_sent); /* No break */ case Syn_sent: case Syn_received: case Established: case Close_wait: /* * Push data */ | |
| 2000/0102 | if(waserror()){ qunlock(s); nexterror(); } | |
| 1999/0302 | qlock(s); | |
| 1997/0327 | tcb->sndcnt += len; tcprcvwin(s); tcpoutput(s); | |
| 1999/0302 | qunlock(s); | |
| 2000/0102 | poperror(); | |
| 1997/0327 | break; default: | |
| 2000/0102 | qlock(s); | |
| 1997/0327 | localclose(s, "Hangup"); | |
| 2000/0102 | qunlock(s); | |
| 1997/0327 | } } void tcprcvwin(Conv *s) /* Call with tcb locked */ { int w; Tcpctl *tcb; tcb = (Tcpctl*)s->ptcl; w = QMAX - qlen(s->rq); if(w < 0) w = 0; tcb->rcv.wnd = w; if(w == 0) tcb->rcv.blocked = 1; } void | |
| 1999/0320 | tcpacktimer(void *v) | |
| 1997/0327 | { Tcpctl *tcb; | |
| 1999/0320 | Conv *s; | |
| 1997/0327 | ||
| 1999/0320 | s = v; | |
| 1997/0327 | tcb = (Tcpctl*)s->ptcl; | |
| 2000/0102 | if(waserror()){ qunlock(s); nexterror(); } | |
| 1999/0302 | qlock(s); | |
| 2000/0102 | if(tcb->state != Closed){ tcb->flags |= FORCE; tcprcvwin(s); tcpoutput(s); } | |
| 1999/0302 | qunlock(s); | |
| 2000/0102 | poperror(); | |
| 1997/0327 | } static void tcpcreate(Conv *c) { c->rq = qopen(QMAX, 0, tcpacktimer, c); | |
| 1999/0327 | c->wq = qopen(2*QMAX, 0, 0, 0); | |
| 1997/0327 | } | |
| 2000/0101 | static void timerstate(Tcppriv *priv, Timer *t, int newstate) { if(newstate != TimerON){ if(t->state == TimerON){ // unchain if(priv->timers == t){ priv->timers = t->next; if(t->prev != nil) panic("timerstate1"); } if(t->next) t->next->prev = t->prev; if(t->prev) t->prev->next = t->next; t->next = t->prev = nil; } } else { if(t->state != TimerON){ // chain if(t->prev != nil || t->next != nil) panic("timerstate2"); t->prev = nil; t->next = priv->timers; if(t->next) t->next->prev = t; priv->timers = t; } } t->state = newstate; } | |
| 1997/0327 | void | |
| 1998/0313 | tcpackproc(void *a) | |
| 1997/0327 | { Timer *t, *tp, *timeo; | |
| 1998/0313 | Proto *tcp; | |
| 1998/0927 | Tcppriv *priv; | |
| 1998/0925 | int loop; | |
| 1997/0327 | ||
| 1998/0313 | tcp = a; | |
| 1998/0927 | priv = tcp->priv; | |
| 1998/0313 | ||
| 1997/0327 | for(;;) { | |
| 1998/0927 | tsleep(&priv->tcpr, return0, 0, MSPTICK); | |
| 1997/0327 | ||
| 1998/0927 | qlock(&priv->tl); | |
| 1997/0327 | timeo = nil; | |
| 1998/0925 | loop = 0; | |
| 1998/0927 | for(t = priv->timers; t != nil; t = tp) { | |
| 1998/0925 | if(loop++ > 10000) panic("tcpackproc1"); | |
| 1997/0327 | tp = t->next; if(t->state == TimerON) { t->count--; if(t->count == 0) { | |
| 2000/0101 | timerstate(priv, t, TimerDONE); | |
| 1998/0925 | t->readynext = timeo; | |
| 1997/0327 | timeo = t; } } } | |
| 1998/0927 | qunlock(&priv->tl); | |
| 1997/0327 | ||
| 1998/0925 | loop = 0; for(t = timeo; t != nil; t = t->readynext) { if(loop++ > 10000) panic("tcpackproc2"); | |
| 1997/0327 | if(t->state == TimerDONE && t->func != nil) (*t->func)(t->arg); } } } void | |
| 1998/0927 | tcpgo(Tcppriv *priv, Timer *t) | |
| 1997/0327 | { if(t == nil || t->start == 0) return; | |
| 1998/0927 | qlock(&priv->tl); | |
| 1997/0327 | t->count = t->start; | |
| 2000/0101 | timerstate(priv, t, TimerON); | |
| 1998/0927 | qunlock(&priv->tl); | |
| 1997/0327 | } void | |
| 1998/0927 | tcphalt(Tcppriv *priv, Timer *t) | |
| 1997/0327 | { if(t == nil) return; | |
| 1998/0927 | qlock(&priv->tl); | |
| 2000/0101 | timerstate(priv, t, TimerOFF); | |
| 1998/0927 | qunlock(&priv->tl); | |
| 1997/0327 | } int backoff(int n) { if(n < 5) return 1 << n; return 64; } void localclose(Conv *s, char *reason) /* called with tcb locked */ { Tcpctl *tcb; Reseq *rp,*rp1; | |
| 1998/0313 | Tcppriv *tpriv; | |
| 1997/0327 | ||
| 1998/0313 | tpriv = s->p->priv; | |
| 1997/0327 | tcb = (Tcpctl*)s->ptcl; | |
| 1998/0313 | tcphalt(tpriv, &tcb->timer); tcphalt(tpriv, &tcb->rtt_timer); | |
| 1998/0925 | tcphalt(tpriv, &tcb->acktimer); | |
| 1998/1118 | tcphalt(tpriv, &tcb->katimer); | |
| 1997/0327 | /* Flush reassembly queue; nothing more can arrive */ for(rp = tcb->reseq; rp != nil; rp = rp1) { rp1 = rp->next; freeblist(rp->bp); free(rp); } | |
| 2000/0102 | tcb->reseq = nil; | |
| 1997/0327 | if(tcb->state == Syn_sent) | |
| 1998/0313 | Fsconnected(s, reason); | |
| 1998/1127 | if(s->state == Announced) wakeup(&s->listenr); | |
| 1997/0327 | qhangup(s->rq, reason); qhangup(s->wq, reason); tcpsetstate(s, Closed); } | |
| 1999/0401 | /* mtu (- TCP + IP hdr len) of 1st hop */ int tcpmtu(Conv *s) { Ipifc *ifc; int mtu; mtu = 0; ifc = findipifc(s->p->f, s->raddr, 0); if(ifc != nil) mtu = ifc->maxmtu - ifc->m->hsize - (TCP_PKT + TCP_HDRSIZE); if(mtu < 4) mtu = DEF_MSS; return mtu; } | |
| 1997/0327 | void inittcpctl(Conv *s) { Tcpctl *tcb; | |
| 1998/0306 | Tcphdr *h; | |
| 1997/0327 | tcb = (Tcpctl*)s->ptcl; memset(tcb, 0, sizeof(Tcpctl)); tcb->ssthresh = 65535; tcb->timer.start = tcp_irtt / MSPTICK; tcb->timer.func = tcptimeout; tcb->timer.arg = s; tcb->rtt_timer.start = MAX_TIME; tcb->acktimer.start = TCP_ACK / MSPTICK; tcb->acktimer.func = tcpacktimer; tcb->acktimer.arg = s; | |
| 1998/1118 | tcb->katimer.start = DEF_KAT / MSPTICK; tcb->katimer.func = tcpkeepalive; tcb->katimer.arg = s; | |
| 1998/0306 | /* create a prototype(pseudo) header */ if(ipcmp(s->laddr, IPnoaddr) == 0) | |
| 1998/0313 | findlocalip(s->p->f, s->laddr, s->raddr); | |
| 1998/0306 | h = &tcb->protohdr; memset(h, 0, sizeof(*h)); h->proto = IP_TCPPROTO; hnputs(h->tcpsport, s->lport); hnputs(h->tcpdport, s->rport); v6tov4(h->tcpsrc, s->laddr); v6tov4(h->tcpdst, s->raddr); | |
| 1997/0327 | ||
| 1999/0401 | tcb->mss = tcb->cwind = tcpmtu(s); | |
| 1997/0327 | } void tcpstart(Conv *s, int mode, ushort window) { Tcpctl *tcb; | |
| 1998/0313 | Tcppriv *tpriv; | |
| 1998/0924 | char kpname[NAMELEN]; | |
| 1997/0327 | ||
| 1998/0313 | tpriv = s->p->priv; | |
| 1998/0924 | if(tpriv->ackprocstarted == 0){ qlock(&tpriv->apl); if(tpriv->ackprocstarted == 0){ sprint(kpname, "#I%dtcpack", s->p->f->dev); kproc(kpname, tcpackproc, s->p); tpriv->ackprocstarted = 1; } qunlock(&tpriv->apl); } | |
| 1997/0327 | tcb = (Tcpctl*)s->ptcl; inittcpctl(s); tcb->window = window; tcb->rcv.wnd = window; switch(mode) { case TCP_LISTEN: | |
| 1998/0313 | tpriv->tstats.tcpPassiveOpens++; | |
| 1997/0327 | tcb->flags |= CLONE; tcpsetstate(s, Listen); break; case TCP_CONNECT: | |
| 1998/0313 | tpriv->tstats.tcpActiveOpens++; | |
| 1997/0327 | /* Send SYN, go into SYN_SENT state */ | |
| 1999/0302 | qlock(s); | |
| 2000/0102 | if(waserror()){ qunlock(s); nexterror(); } | |
| 1997/0327 | tcb->flags |= ACTIVE; tcpsndsyn(tcb); tcpsetstate(s, Syn_sent); tcpoutput(s); | |
| 1999/0302 | qunlock(s); | |
| 2000/0102 | poperror(); | |
| 1997/0327 | break; } } static char* tcpflag(ushort flag) { static char buf[128]; sprint(buf, "%d", flag>>10); /* Head len */ if(flag & URG) strcat(buf, " URG"); if(flag & ACK) strcat(buf, " ACK"); if(flag & PSH) strcat(buf, " PSH"); if(flag & RST) strcat(buf, " RST"); if(flag & SYN) strcat(buf, " SYN"); if(flag & FIN) strcat(buf, " FIN"); return buf; } Block * htontcp(Tcp *tcph, Block *data, Tcphdr *ph) { int dlen; Tcphdr *h; ushort csum; ushort hdrlen; hdrlen = TCP_HDRSIZE; if(tcph->mss) hdrlen += MSS_LENGTH; if(data) { dlen = blocklen(data); data = padblock(data, hdrlen + TCP_PKT); if(data == nil) return nil; } else { dlen = 0; data = allocb(hdrlen + TCP_PKT); if(data == nil) return nil; data->wp += hdrlen + TCP_PKT; } | |
| 1998/0306 | /* copy in pseudo ip header plus port numbers */ | |
| 1997/0327 | h = (Tcphdr *)(data->rp); | |
| 1998/0306 | memmove(h, ph, TCP_TCBPHDRSZ); /* copy in variable bits */ | |
| 1997/0327 | hnputs(h->tcplen, hdrlen + dlen); hnputl(h->tcpseq, tcph->seq); hnputl(h->tcpack, tcph->ack); hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags); hnputs(h->tcpwin, tcph->wnd); hnputs(h->tcpurg, tcph->urg); if(tcph->mss != 0){ h->tcpopt[0] = MSSOPT; h->tcpopt[1] = MSS_LENGTH; hnputs(h->tcpmss, tcph->mss); } csum = ptclcsum(data, TCP_IPLEN, hdrlen+dlen+TCP_PHDRSIZE); hnputs(h->tcpcksum, csum); | |
| 1998/0313 | /* netlog(f, Logtcpmsg, "%d > %d s %l8.8ux a %8.8lux %s w %.4ux l %d\n", | |
| 1997/0327 | tcph->source, tcph->dest, tcph->seq, tcph->ack, tcpflag((hdrlen<<10)|tcph->flags), | |
| 1998/0313 | tcph->wnd, dlen); */ | |
| 1997/0327 | return data; } int ntohtcp(Tcp *tcph, Block **bpp) { Tcphdr *h; | |
| 1998/0306 | uchar *optr; | |
| 1997/0327 | ushort hdrlen; | |
| 1999/0827 | ushort optlen; int n; | |
| 1997/0327 | *bpp = pullupblock(*bpp, TCP_PKT+TCP_HDRSIZE); if(*bpp == nil) return -1; h = (Tcphdr *)((*bpp)->rp); tcph->source = nhgets(h->tcpsport); tcph->dest = nhgets(h->tcpdport); tcph->seq = nhgetl(h->tcpseq); tcph->ack = nhgetl(h->tcpack); hdrlen = (h->tcpflag[0] & 0xf0)>>2; if(hdrlen < TCP_HDRSIZE) { freeblist(*bpp); return -1; } tcph->flags = h->tcpflag[1]; tcph->wnd = nhgets(h->tcpwin); tcph->urg = nhgets(h->tcpurg); tcph->mss = 0; | |
| 1999/0401 | tcph->len = nhgets(h->length) - (hdrlen + TCP_PKT); | |
| 1997/0327 | *bpp = pullupblock(*bpp, hdrlen+TCP_PKT); if(*bpp == nil) return -1; | |
| 1998/0313 | /* netlog(Logtcpmsg, "%d > %d s %l8.8ux a %8.8lux %s w %.4ux l %d\n", | |
| 1997/0327 | tcph->source, tcph->dest, tcph->seq, tcph->ack, tcpflag((hdrlen<<10)|tcph->flags), | |
| 1998/0313 | tcph->wnd, nhgets(h->length)-hdrlen-TCP_PKT); */ | |
| 1997/0327 | optr = h->tcpopt; | |
| 1999/0827 | n = hdrlen - TCP_HDRSIZE; while(n > 0 && *optr != EOLOPT) { if(*optr == NOOPOPT) { n--; optr++; continue; } optlen = optr[1]; if(optlen < 2 || optlen > n) | |
| 1999/0825 | break; | |
| 1999/0827 | if(0) print("tcpopt %d %d\n", *optr, optlen); switch(*optr) { | |
| 1997/0327 | case MSSOPT: if(optlen == MSS_LENGTH) | |
| 1999/0825 | tcph->mss = nhgets(optr+2); | |
| 1997/0327 | break; } | |
| 1999/0827 | n -= optlen; optr += optlen; | |
| 1997/0327 | } return hdrlen; } /* Generate an initial sequence number and put a SYN on the send queue */ void tcpsndsyn(Tcpctl *tcb) { tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16); tcb->rttseq = tcb->iss; tcb->snd.wl2 = tcb->iss; tcb->snd.una = tcb->iss; tcb->snd.ptr = tcb->rttseq; tcb->snd.nxt = tcb->rttseq; tcb->sndcnt++; tcb->flags |= FORCE; tcb->sndsyntime = msec; } | |
| 1998/0306 | /* * called with v4 (4 byte) addresses */ | |
| 1997/0327 | void | |
| 1998/0313 | sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg) | |
| 1997/0327 | { Tcphdr ph; Block *hbp; | |
| 1998/0306 | uchar rflags; | |
| 1998/0313 | Tcppriv *tpriv; | |
| 1997/0327 | ||
| 1998/0313 | tpriv = tcp->priv; | |
| 1997/0327 | if(seg->flags & RST) return; | |
| 1998/0306 | /* make pseudo header */ memset(&ph, 0, sizeof(ph)); v6tov4(ph.tcpsrc, dest); v6tov4(ph.tcpdst, source); | |
| 1997/0327 | ph.proto = IP_TCPPROTO; hnputs(ph.tcplen, TCP_HDRSIZE); | |
| 1998/0306 | hnputs(ph.tcpsport, seg->dest); hnputs(ph.tcpdport, seg->source); | |
| 1997/0327 | ||
| 1998/0313 | tpriv->tstats.OutRsts++; | |
| 1997/0327 | rflags = RST; /* convince the other end that this reset is in band */ if(seg->flags & ACK) { seg->seq = seg->ack; seg->ack = 0; } else { rflags |= ACK; seg->ack = seg->seq; seg->seq = 0; if(seg->flags & SYN) seg->ack++; seg->ack += length; if(seg->flags & FIN) seg->ack++; } seg->flags = rflags; seg->wnd = 0; seg->urg = 0; seg->mss = 0; hbp = htontcp(seg, nil, &ph); if(hbp == nil) return; | |
| 1999/0817 | ipoput(tcp->f, hbp, 0, MAXTTL, DFLTTOS); | |
| 1997/0327 | } /* * send a reset to the remote side and close the conversation */ char* tcphangup(Conv *s) { Tcp seg; Tcpctl *tcb; Tcphdr ph; Block *hbp; tcb = (Tcpctl*)s->ptcl; if(waserror()){ | |
| 1999/0302 | qunlock(s); | |
| 1998/0313 | return commonerror(); | |
| 1997/0327 | } | |
| 1999/0302 | qlock(s); | |
| 1997/0327 | if(s->raddr != 0) { seg.flags = RST | ACK; seg.ack = tcb->rcv.nxt; seg.seq = tcb->snd.ptr; seg.wnd = 0; seg.urg = 0; seg.mss = 0; tcb->last_ack = tcb->rcv.nxt; hnputs(ph.tcplen, TCP_HDRSIZE); | |
| 1998/0306 | hbp = htontcp(&seg, nil, &tcb->protohdr); | |
| 1999/0817 | ipoput(s->p->f, hbp, 0, s->ttl, s->tos); | |
| 1997/0327 | } localclose(s, nil); poperror(); | |
| 1999/0302 | qunlock(s); | |
| 1997/0327 | return nil; } Conv* | |
| 1998/0306 | tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst) | |
| 1997/0327 | { Conv *new; Tcpctl *tcb; | |
| 1998/0306 | Tcphdr *h; | |
| 1997/0327 | ||
| 1998/0313 | new = Fsnewcall(s, src, segp->source, dst, segp->dest); | |
| 1997/0327 | if(new == nil) return nil; memmove(new->ptcl, s->ptcl, sizeof(Tcpctl)); tcb = (Tcpctl*)new->ptcl; tcb->flags &= ~CLONE; tcb->timer.arg = new; tcb->timer.state = TimerOFF; tcb->acktimer.arg = new; tcb->acktimer.state = TimerOFF; | |
| 1998/1204 | tcb->katimer.arg = new; tcb->katimer.state = TimerOFF; tcb->rtt_timer.arg = new; tcb->rtt_timer.state = TimerOFF; | |
| 1997/0327 | ||
| 1998/0306 | h = &tcb->protohdr; memset(h, 0, sizeof(*h)); h->proto = IP_TCPPROTO; hnputs(h->tcpsport, new->lport); hnputs(h->tcpdport, new->rport); v6tov4(h->tcpsrc, dst); v6tov4(h->tcpdst, src); | |
| 1997/0327 | return new; } int seq_within(ulong x, ulong low, ulong high) { if(low <= high){ if(low <= x && x <= high) return 1; } else { | |
| 1997/0802 | if(x >= low || x <= high) | |
| 1997/0327 | return 1; } return 0; } int seq_lt(ulong x, ulong y) { | |
| 1999/0401 | return (int)(x-y) < 0; | |
| 1997/0327 | } int seq_le(ulong x, ulong y) { | |
| 1999/0401 | return (int)(x-y) <= 0; | |
| 1997/0327 | } int seq_gt(ulong x, ulong y) { | |
| 1999/0401 | return (int)(x-y) > 0; | |
| 1997/0327 | } int seq_ge(ulong x, ulong y) { | |
| 1999/0401 | return (int)(x-y) >= 0; | |
| 1997/0327 | } /* * use the time between the first SYN and it's ack as the * initial round trip time */ void tcpsynackrtt(Conv *s) { Tcpctl *tcb; int delta; | |
| 1998/0313 | Tcppriv *tpriv; | |
| 1997/0327 | tcb = (Tcpctl*)s->ptcl; | |
| 1998/0313 | tpriv = s->p->priv; | |
| 1997/0327 | delta = msec - tcb->sndsyntime; tcb->srtt = delta<<LOGAGAIN; tcb->mdev = delta<<LOGDGAIN; /* halt round trip timer */ | |
| 1998/0313 | tcphalt(tpriv, &tcb->rtt_timer); | |
| 1997/0327 | } void update(Conv *s, Tcp *seg) { int rtt, delta; Tcpctl *tcb; ushort acked, expand; | |
| 1998/0313 | Tcppriv *tpriv; | |
| 1997/0327 | ||
| 1998/0313 | tpriv = s->p->priv; | |
| 1997/0327 | tcb = (Tcpctl*)s->ptcl; if(seq_gt(seg->ack, tcb->snd.nxt)) { tcb->flags |= FORCE; return; } | |
| 1999/0401 | /* added by Dong for fast retransmission */ if( seg->ack == tcb->snd.una && seg->len == 0 && seg->wnd == tcb->snd.wnd ) { /* this is a pure ack w/o window update */ // print("dupack %lud ack %lud sndwnd %d advwin %d\n", // tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd); if(++tcb->snd.dupacks == TCPREXMTTHRESH) { /* * tahoe tcp rxt the packet, half sshthresh, * and set cwnd to one packet */ tcb->snd.recovery = 1; tcb->snd.rxt = tcb->snd.nxt; // print("fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt); | |
| 2000/0102 | tcprxmit(s); | |
| 1999/0401 | } else { /* do reno tcp here. */ } } | |
| 1997/0327 | if(seq_ge(seg->ack,tcb->snd.wl2)) if(seq_gt(seg->seq,tcb->snd.wl1) || (seg->seq == tcb->snd.wl1)) { if(seg->wnd != 0 && tcb->snd.wnd == 0) tcb->snd.ptr = tcb->snd.una; tcb->snd.wnd = seg->wnd; tcb->snd.wl1 = seg->seq; tcb->snd.wl2 = seg->ack; } if(!seq_gt(seg->ack, tcb->snd.una)) return; | |
| 1999/0401 | /* * any positive ack turns off fast rxt, * (should we do new-reno on partial acks?) */ if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) { tcb->snd.dupacks = 0; tcb->snd.recovery = 0; } else { // print("rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind); } | |
| 1997/0327 | /* Compute the new send window size */ acked = seg->ack - tcb->snd.una; | |
| 1999/0401 | /* avoid slow start and timers for SYN acks */ if((tcb->flags & SYNACK) == 0) { tcb->flags |= SYNACK; acked--; tcb->sndcnt--; goto done; } /* slow start as long as we're not recovering from lost packets */ if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) { | |
| 1997/0327 | if(tcb->cwind < tcb->ssthresh) { expand = tcb->mss; if(acked < expand) expand = acked; } else expand = ((int)tcb->mss * tcb->mss) / tcb->cwind; if(tcb->cwind + expand < tcb->cwind) expand = 65535 - tcb->cwind; if(tcb->cwind + expand > tcb->snd.wnd) expand = tcb->snd.wnd - tcb->cwind; if(expand != 0) tcb->cwind += expand; } /* Adjust the timers according to the round trip time */ if(tcb->rtt_timer.state == TimerON && seq_ge(seg->ack, tcb->rttseq)) { | |
| 1998/0313 | tcphalt(tpriv, &tcb->rtt_timer); | |
| 1997/0327 | if((tcb->flags&RETRAN) == 0) { tcb->backoff = 0; | |
| 1999/0607 | tcb->backedoff = 0; | |
| 1997/0327 | rtt = tcb->rtt_timer.start - tcb->rtt_timer.count; if(rtt == 0) rtt = 1; /* otherwise all close systems will rexmit in 0 time */ rtt *= MSPTICK; if (tcb->srtt == 0) { tcb->srtt = rtt << LOGAGAIN; tcb->mdev = rtt << LOGDGAIN; } else { delta = rtt - (tcb->srtt>>LOGAGAIN); tcb->srtt += delta; if(tcb->srtt <= 0) tcb->srtt = 1; delta = abs(delta) - (tcb->mdev>>LOGDGAIN); tcb->mdev += delta; if(tcb->mdev <= 0) tcb->mdev = 1; } } } | |
| 1999/0401 | done: | |
| 1997/0327 | qdiscard(s->wq, acked); tcb->sndcnt -= acked; tcb->snd.una = seg->ack; if(seq_gt(seg->ack, tcb->snd.urg)) tcb->snd.urg = seg->ack; | |
| 1998/0313 | tcphalt(tpriv, &tcb->timer); | |
| 1997/0327 | if(tcb->snd.una != tcb->snd.nxt) | |
| 1998/0313 | tcpgo(tpriv, &tcb->timer); | |
| 1997/0327 | if(seq_lt(tcb->snd.ptr, tcb->snd.una)) tcb->snd.ptr = tcb->snd.una; tcb->flags &= ~RETRAN; tcb->backoff = 0; | |
| 1999/0607 | tcb->backedoff = 0; | |
| 1997/0327 | } void | |
| 1998/0313 | tcpiput(Proto *tcp, uchar*, Block *bp) | |
| 1997/0327 | { Tcp seg; Tcphdr *h; int hdrlen; Tcpctl *tcb; ushort length; | |
| 1998/0306 | uchar source[IPaddrlen], dest[IPaddrlen]; | |
| 1997/0327 | Conv *spec, *gen, *s, **p; | |
| 1998/0313 | Fs *f; Tcppriv *tpriv; | |
| 1997/0327 | ||
| 1998/0313 | f = tcp->f; tpriv = tcp->priv; tpriv->tstats.tcpInSegs++; | |
| 1997/0327 | h = (Tcphdr*)(bp->rp); | |
| 1998/0306 | v4tov6(dest, h->tcpdst); v4tov6(source, h->tcpsrc); | |
| 1997/0327 | length = nhgets(h->length); h->Unused = 0; hnputs(h->tcplen, length-TCP_PKT); if(ptclcsum(bp, TCP_IPLEN, length-TCP_IPLEN)) { | |
| 1998/0313 | tpriv->csumerr++; netlog(f, Logtcp, "bad tcp proto cksum\n"); | |
| 1997/0327 | freeblist(bp); return; } hdrlen = ntohtcp(&seg, &bp); if(hdrlen < 0){ | |
| 1998/0313 | tpriv->hlenerr++; netlog(f, Logtcp, "bad tcp hdr len\n"); | |
| 1997/0327 | return; } /* trim the packet to the size claimed by the datagram */ length -= hdrlen+TCP_PKT; bp = trimblock(bp, hdrlen+TCP_PKT, length); if(bp == nil){ | |
| 1998/0313 | tpriv->lenerr++; netlog(f, Logtcp, "tcp len < 0 after trim\n"); | |
| 1997/0327 | return; } | |
| 1999/0302 | /* lock protocol while searching for a conversation */ qlock(tcp); | |
| 1997/0327 | /* Look for a connection. failing that look for a listener. */ | |
| 1998/0313 | for(p = tcp->conv; *p; p++) { | |
| 1997/0327 | s = *p; | |
| 1998/0724 | tcb = (Tcpctl*)s->ptcl; | |
| 1998/0306 | if(s->rport == seg.source) if(s->lport == seg.dest) | |
| 1998/0724 | if(tcb->state != Closed) | |
| 1998/0306 | if(ipcmp(s->raddr, source) == 0) | |
| 1997/0327 | break; } s = *p; if(s){ /* can't send packets to a listener */ tcb = (Tcpctl*)s->ptcl; if(tcb->state == Listen){ | |
| 1999/0302 | qunlock(tcp); | |
| 1997/0327 | freeblist(bp); return; } } if(s == nil && (seg.flags & SYN)) { /* * dump packets with bogus flags */ if(seg.flags & RST){ | |
| 1999/0302 | qunlock(tcp); | |
| 1997/0327 | freeblist(bp); return; } | |
| 1999/0302 | ||
| 1997/0327 | if(seg.flags & ACK) { | |
| 1999/0302 | qunlock(tcp); | |
| 1998/0313 | sndrst(tcp, source, dest, length, &seg); | |
| 1997/0327 | freeblist(bp); return; } /* * find a listener specific to this port (spec) or, * failing that, a general one (gen) */ gen = nil; spec = nil; | |
| 1998/0313 | for(p = tcp->conv; *p; p++) { | |
| 1997/0327 | s = *p; tcb = (Tcpctl*)s->ptcl; if((tcb->flags & CLONE) == 0) continue; if(tcb->state != Listen) continue; | |
| 1998/0306 | if(s->rport == 0 && ipcmp(s->raddr, IPnoaddr) == 0) { | |
| 1997/0327 | if(s->lport == seg.dest){ spec = s; break; } if(s->lport == 0) gen = s; } } s = nil; if(spec != nil) s = tcpincoming(spec, &seg, source, dest); else if(gen != nil) s = tcpincoming(gen, &seg, source, dest); } if(s == nil) { | |
| 1999/0302 | qunlock(tcp); | |
| 1998/0313 | sndrst(tcp, source, dest, length, &seg); | |
| 1999/0302 | freeblist(bp); | |
| 1997/0327 | return; } /* The rest of the input state machine is run with the control block * locked and implements the state machine directly out of the RFC. * Out-of-band data is ignored - it was always a bad idea. */ tcb = (Tcpctl*)s->ptcl; | |
| 2000/0102 | if(waserror()){ qunlock(s); nexterror(); } | |
| 1999/0302 | qlock(s); qunlock(tcp); | |
| 1997/0327 | ||
| 1998/1118 | if(tcb->kacounter > 0) | |
| 1999/0529 | tcb->kacounter = MAXBACKMS / (tcb->katimer.start*MSPTICK); if(tcb->kacounter < 3) tcb->kacounter = 3; | |
| 1998/1118 | ||
| 1997/0327 | switch(tcb->state) { case Closed: | |
| 1998/0313 | sndrst(tcp, source, dest, length, &seg); | |
| 1997/0327 | goto raise; case Listen: if(seg.flags & SYN) { procsyn(s, &seg); tcpsndsyn(tcb); tcpsetstate(s, Syn_received); if(length != 0 || (seg.flags & FIN)) break; } goto raise; case Syn_sent: if(seg.flags & ACK) { if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) { | |
| 1998/0313 | sndrst(tcp, source, dest, length, &seg); | |
| 1997/0327 | goto raise; } } if(seg.flags & RST) { if(seg.flags & ACK) localclose(s, Econrefused); goto raise; } if(seg.flags & SYN) { procsyn(s, &seg); if(seg.flags & ACK){ update(s, &seg); tcpsynackrtt(s); tcpsetstate(s, Established); } else tcpsetstate(s, Syn_received); if(length != 0 || (seg.flags & FIN)) break; freeblist(bp); goto output; } else freeblist(bp); | |
| 1999/0302 | qunlock(s); | |
| 2000/0102 | poperror(); | |
| 1997/0327 | return; case Syn_received: /* doesn't matter if it's the correct ack, we're just trying to set timing */ if(seg.flags & ACK) tcpsynackrtt(s); break; } /* Cut the data to fit the receive window */ if(tcptrim(tcb, &seg, &bp, &length) == -1) { | |
| 1998/0313 | netlog(f, Logtcp, "tcp len < 0, %lux\n", seg.seq); | |
| 1997/0327 | update(s, &seg); if(tcb->sndcnt == 0 && tcb->state == Closing) { | |
| 1998/1204 | tcphalt(tpriv, &tcb->rtt_timer); tcphalt(tpriv, &tcb->acktimer); tcphalt(tpriv, &tcb->katimer); | |
| 1997/0327 | tcpsetstate(s, Time_wait); tcb->timer.start = MSL2*(1000 / MSPTICK); | |
| 1998/0313 | tcpgo(tpriv, &tcb->timer); | |
| 1997/0327 | } if(!(seg.flags & RST)) { tcb->flags |= FORCE; goto output; } | |
| 1999/0302 | qunlock(s); | |
| 2000/0102 | poperror(); | |
| 1997/0327 | return; } /* Cannot accept so answer with a rst */ if(length && tcb->state == Closed) { | |
| 1998/0313 | sndrst(tcp, source, dest, length, &seg); | |
| 1997/0327 | goto raise; } /* The segment is beyond the current receive pointer so * queue the data in the resequence queue */ if(seg.seq != tcb->rcv.nxt) if(length != 0 || (seg.flags & (SYN|FIN))) { update(s, &seg); | |
| 1998/0313 | tpriv->order++; | |
| 1999/1006 | if(addreseq(tcb, &seg, bp, length) < 0) print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport); | |
| 1997/0327 | tcb->flags |= FORCE; goto output; } /* * keep looping till we've processed this packet plus any * adjacent packets in the resequence queue */ for(;;) { if(seg.flags & RST) { | |
| 1998/0313 | if(tcb->state == Established) tpriv->tstats.tcpEstabResets++; | |
| 1997/0327 | localclose(s, Econrefused); goto raise; } if((seg.flags&ACK) == 0) goto raise; switch(tcb->state) { case Syn_received: if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){ | |
| 1998/0313 | sndrst(tcp, source, dest, length, &seg); | |
| 1997/0327 | goto raise; } update(s, &seg); tcpsetstate(s, Established); case Established: case Close_wait: update(s, &seg); break; case Finwait1: update(s, &seg); if(tcb->sndcnt == 0){ | |
| 1998/1204 | tcphalt(tpriv, &tcb->rtt_timer); tcphalt(tpriv, &tcb->acktimer); | |
| 1999/0529 | tcpsetkacounter(tcb); | |
| 1997/0327 | tcpsetstate(s, Finwait2); | |
| 1998/1118 | tcb->katimer.start = MSL2 * (1000 / MSPTICK); | |
| 1998/1202 | tcpgo(tpriv, &tcb->katimer); | |
| 1997/0327 | } break; case Finwait2: update(s, &seg); break; case Closing: update(s, &seg); if(tcb->sndcnt == 0) { | |
| 1998/1204 | tcphalt(tpriv, &tcb->rtt_timer); tcphalt(tpriv, &tcb->acktimer); tcphalt(tpriv, &tcb->katimer); | |
| 1997/0327 | tcpsetstate(s, Time_wait); tcb->timer.start = MSL2*(1000 / MSPTICK); | |
| 1998/0313 | tcpgo(tpriv, &tcb->timer); | |
| 1997/0327 | } break; case Last_ack: update(s, &seg); if(tcb->sndcnt == 0) { localclose(s, nil); goto raise; } case Time_wait: tcb->flags |= FORCE; if(tcb->timer.state != TimerON) | |
| 1998/0313 | tcpgo(tpriv, &tcb->timer); | |
| 1997/0327 | } if((seg.flags&URG) && seg.urg) { if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) { tcb->rcv.urg = seg.urg + seg.seq; pullblock(&bp, seg.urg); } } else if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg)) tcb->rcv.urg = tcb->rcv.nxt; if(length == 0) { if(bp != nil) freeblist(bp); } else { switch(tcb->state){ default: /* Ignore segment text */ if(bp != nil) freeblist(bp); break; case Syn_received: case Established: case Finwait1: /* If we still have some data place on * receive queue */ if(bp) { | |
| 1998/0923 | bp = packblock(bp); if(bp == nil) panic("tcp packblock"); qpassnolim(s->rq, bp); | |
| 1997/0327 | bp = nil; } tcb->rcv.nxt += length; tcprcvwin(s); | |
| 1998/0918 | /* * force an ack if we've got 2 segs * and the user isn't backing up */ if(tcb->rcv.nxt - tcb->last_ack >= 2*tcb->mss && | |
| 1998/0925 | qlen(s->rq) < 8*tcb->mss){ | |
| 1997/0327 | tcb->flags |= FORCE; | |
| 1998/0925 | } | |
| 1998/1008 | if(tcb->acktimer.state != TimerON) tcpgo(tpriv, &tcb->acktimer); | |
| 1997/0327 | break; case Finwait2: /* no process to read the data, send a reset */ if(bp != nil) freeblist(bp); | |
| 1998/0313 | sndrst(tcp, source, dest, length, &seg); | |
| 1999/0302 | qunlock(s); | |
| 2000/0102 | poperror(); | |
| 1997/0327 | return; } } if(seg.flags & FIN) { tcb->flags |= FORCE; switch(tcb->state) { case Syn_received: case Established: tcb->rcv.nxt++; tcpsetstate(s, Close_wait); break; case Finwait1: tcb->rcv.nxt++; if(tcb->sndcnt == 0) { | |
| 1998/1204 | tcphalt(tpriv, &tcb->rtt_timer); tcphalt(tpriv, &tcb->acktimer); tcphalt(tpriv, &tcb->katimer); | |
| 1997/0327 | tcpsetstate(s, Time_wait); tcb->timer.start = MSL2*(1000/MSPTICK); | |
| 1998/0313 | tcpgo(tpriv, &tcb->timer); | |
| 1997/0327 | } else tcpsetstate(s, Closing); break; case Finwait2: tcb->rcv.nxt++; | |
| 1998/1204 | tcphalt(tpriv, &tcb->rtt_timer); tcphalt(tpriv, &tcb->acktimer); tcphalt(tpriv, &tcb->katimer); | |
| 1997/0327 | tcpsetstate(s, Time_wait); tcb->timer.start = MSL2 * (1000/MSPTICK); | |
| 1998/0313 | tcpgo(tpriv, &tcb->timer); | |
| 1997/0327 | break; case Close_wait: case Closing: case Last_ack: break; case Time_wait: | |
| 1998/0313 | tcpgo(tpriv, &tcb->timer); | |
| 1997/0327 | break; } } /* * get next adjacent segment from the resequence queue. * dump/trim any overlapping segments */ for(;;) { if(tcb->reseq == nil) goto output; if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0) goto output; getreseq(tcb, &seg, &bp, &length); if(tcptrim(tcb, &seg, &bp, &length) == 0) break; } } output: tcpoutput(s); | |
| 1999/0302 | qunlock(s); | |
| 2000/0102 | poperror(); | |
| 1997/0327 | return; raise: | |
| 1999/0302 | qunlock(s); | |
| 2000/0102 | poperror(); | |
| 1997/0327 | freeblist(bp); tcpkick(s, 0); } /* * always enters and exits with the tcb locked */ void tcpoutput(Conv *s) { int x; Tcp seg; int msgs; Tcpctl *tcb; Block *hbp, *bp; | |
| 1999/0401 | int sndcnt, n; | |
| 1997/0327 | ulong ssize, dsize, usable, sent; | |
| 1998/0313 | Fs *f; Tcppriv *tpriv; | |
| 1997/0327 | ||
| 1998/0313 | f = s->p->f; tpriv = s->p->priv; | |
| 1997/0327 | tcb = (Tcpctl*)s->ptcl; switch(tcb->state) { case Listen: case Closed: case Finwait2: return; } /* force an ack when a window has opened up */ if(tcb->rcv.blocked && tcb->rcv.wnd > 0){ tcb->rcv.blocked = 0; tcb->flags |= FORCE; } for(msgs = 0; msgs < 100; msgs++) { sndcnt = tcb->sndcnt; sent = tcb->snd.ptr - tcb->snd.una; /* Don't send anything else until our SYN has been acked */ | |
| 1998/0901 | if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0) | |
| 1997/0327 | break; /* Compute usable segment based on offered window and limit * window probes to one */ if(tcb->snd.wnd == 0){ if(sent != 0) { if ((tcb->flags&FORCE) == 0) break; tcb->snd.ptr = tcb->snd.una; } usable = 1; } else { usable = tcb->cwind; if(tcb->snd.wnd < usable) usable = tcb->snd.wnd; usable -= sent; } ssize = sndcnt-sent; if(usable < ssize) ssize = usable; if(tcb->mss < ssize) ssize = tcb->mss; dsize = ssize; seg.urg = 0; if(ssize == 0) if((tcb->flags&FORCE) == 0) break; | |
| 1999/0401 | /* avoid sending short packets unless... */ if(dsize != 0) { /* ...we have a full segment */ if(dsize != tcb->mss) /* ...the data was just queued */ if((dsize + sent) != sndcnt) /* ...we're being forced */ if(!(tcb->flags&FORCE)) /* ...we have at least half a window's worth to send */ if(dsize < tcb->snd.wnd/2 || tcb->snd.wnd == 0) return; } | |
| 1998/0313 | tcphalt(tpriv, &tcb->acktimer); | |
| 1997/0327 | tcb->flags &= ~FORCE; tcprcvwin(s); /* By default we will generate an ack */ seg.source = s->lport; seg.dest = s->rport; seg.flags = ACK; seg.mss = 0; switch(tcb->state){ case Syn_sent: seg.flags = 0; if(tcb->snd.ptr == tcb->iss){ seg.flags |= SYN; dsize--; | |
| 1998/0813 | seg.mss = tcpmtu(s); } break; case Syn_received: /* * don't send any data with a SYN/ACK packet * because Linux rejects the packet in its * attempt to solve the SYN attack problem */ if(tcb->snd.ptr == tcb->iss){ seg.flags |= SYN; dsize = 0; | |
| 1998/0831 | ssize = 1; | |
| 1997/0327 | seg.mss = tcpmtu(s); } break; } tcb->last_ack = tcb->rcv.nxt; seg.seq = tcb->snd.ptr; seg.ack = tcb->rcv.nxt; seg.wnd = tcb->rcv.wnd; /* Pull out data to send */ bp = nil; if(dsize != 0) { bp = qcopy(s->wq, dsize, sent); if(BLEN(bp) != dsize) { seg.flags |= FIN; dsize--; } | |
| 1998/0313 | netlog(f, Logtcp, "qcopy: dlen %d blen %d sndcnt %d qlen %d sent %d rp[0] %d\n", | |
| 1997/0327 | dsize, BLEN(bp), sndcnt, qlen(s->wq), sent, bp->rp[0]); } if(sent+dsize == sndcnt) seg.flags |= PSH; /* keep track of balance of resent data */ | |
| 1999/0401 | if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) { | |
| 1997/0327 | n = tcb->snd.nxt - tcb->snd.ptr; if(ssize < n) n = ssize; tcb->resent += n; | |
| 1999/0401 | tpriv->tstats.tcpRetransSegs++; | |
| 1997/0327 | } tcb->snd.ptr += ssize; /* Pull up the send pointer so we can accept acks * for this window */ if(seq_gt(tcb->snd.ptr,tcb->snd.nxt)) tcb->snd.nxt = tcb->snd.ptr; /* Build header, link data and compute cksum */ | |
| 1998/0306 | hbp = htontcp(&seg, bp, &tcb->protohdr); | |
| 1997/0327 | if(hbp == nil) { freeblist(bp); return; } /* Start the transmission timers if there is new data and we * expect acknowledges */ if(ssize != 0){ | |
| 1998/0918 | /* round trip depenency */ | |
| 1997/0327 | x = backoff(tcb->backoff) * (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK; | |
| 1998/0918 | /* take into account delayed ack */ if(sent <= 2*tcb->mss) x += TCP_ACK/MSPTICK; /* sanity check */ | |
| 1997/0327 | if(x > (10000/MSPTICK)) x = 10000/MSPTICK; tcb->timer.start = x; if(tcb->timer.state != TimerON) | |
| 1998/0313 | tcpgo(tpriv, &tcb->timer); | |
| 1997/0327 | ||
| 1999/0401 | /* If round trip timer isn't running, start it. * measure the longest packet only in case the * transmission time dominates RTT */ if(tcb->rtt_timer.state != TimerON) if(ssize == tcb->mss) { | |
| 1998/0313 | tcpgo(tpriv, &tcb->rtt_timer); | |
| 1997/0327 | tcb->rttseq = tcb->snd.ptr; } } | |
| 1998/0313 | tpriv->tstats.tcpOutSegs++; | |
| 1998/1118 | if(tcb->kacounter > 0) tcpgo(tpriv, &tcb->katimer); | |
| 1999/0817 | ipoput(f, hbp, 0, s->ttl, s->tos); | |
| 1997/0327 | } } /* | |
| 1998/0306 | * the BSD convention (hack?) for keep alives. resend last uchar acked. | |
| 1997/0327 | */ void | |
| 1998/1118 | tcpsendka(Conv *s) | |
| 1997/0327 | { Tcp seg; Tcpctl *tcb; Block *hbp,*dbp; tcb = (Tcpctl*)s->ptcl; dbp = nil; seg.urg = 0; seg.source = s->lport; seg.dest = s->rport; seg.flags = ACK|PSH; seg.mss = 0; seg.seq = tcb->snd.una-1; seg.ack = tcb->rcv.nxt; seg.wnd = tcb->rcv.wnd; tcb->last_ack = tcb->rcv.nxt; if(tcb->state == Finwait2){ seg.flags |= FIN; } else { dbp = allocb(1); dbp->wp++; } /* Build header, link data and compute cksum */ | |
| 1998/0306 | hbp = htontcp(&seg, dbp, &tcb->protohdr); | |
| 1997/0327 | if(hbp == nil) { freeblist(dbp); return; } | |
| 1999/0817 | ipoput(s->p->f, hbp, 0, s->ttl, s->tos); | |
| 1997/0327 | } | |
| 1998/1118 | /* * if we've timed out, close the connection * otherwise, send a keepalive and restart the timer */ | |
| 1997/0327 | void | |
| 1999/0529 | tcpsetkacounter(Tcpctl *tcb) { tcb->kacounter = MAXBACKMS / (tcb->katimer.start*MSPTICK);; if(tcb->kacounter < 3) tcb->kacounter = 3; } void | |
| 1999/0320 | tcpkeepalive(void *v) | |
| 1998/1118 | { Tcpctl *tcb; | |
| 1999/0320 | Conv *s; | |
| 1998/1118 | ||
| 1999/0320 | s = v; | |
| 1998/1118 | tcb = (Tcpctl*)s->ptcl; | |
| 2000/0102 | if(waserror()){ | |
| 1999/0302 | qunlock(s); | |
| 2000/0102 | nexterror(); | |
| 1998/1118 | } | |
| 2000/0102 | qlock(s); if(tcb->state != Closed){ if(--(tcb->kacounter) <= 0) { localclose(s, Etimedout); } else { tcpsendka(s); tcpgo(s->p->priv, &tcb->katimer); } } qunlock(s); poperror(); | |
| 1998/1118 | } /* * start keepalive timer */ char* tcpstartka(Conv *s, char **f, int n) { Tcpctl *tcb; int x; tcb = (Tcpctl*)s->ptcl; if(n > 1){ x = atoi(f[1]); if(x >= MSPTICK) tcb->katimer.start = x/MSPTICK; } | |
| 1999/0529 | tcpsetkacounter(tcb); | |
| 1998/1118 | tcpgo(s->p->priv, &tcb->katimer); return nil; } void | |
| 2000/0102 | tcprxmit(Conv *s) | |
| 1997/0327 | { Tcpctl *tcb; tcb = (Tcpctl*)s->ptcl; tcb->flags |= RETRAN|FORCE; tcb->snd.ptr = tcb->snd.una; | |
| 1999/0401 | /* * We should be halving the slow start thershhold (down to one * mss) but leaving it at mss seems to work well enough | |
| 1997/0327 | */ | |
| 1999/0401 | // win = (tcb->cwind<tcb->snd.wnd)?tcb->cwind:tcb->snd.wnd/ tcb->mss; // win = win/2; // if ( win < 2 ) // win = 2; // tcb->ssthresh = win * tcb->mss; tcb->ssthresh = tcb->mss; | |
| 1997/0327 | ||
| 1999/0401 | /* * pull window down to a single packet */ | |
| 1997/0327 | tcb->cwind = tcb->mss; tcpoutput(s); } void tcptimeout(void *arg) { Conv *s; Tcpctl *tcb; | |
| 1999/0607 | int maxback; | |
| 1999/0401 | Tcppriv *tpriv; | |
| 1997/0327 | s = (Conv*)arg; | |
| 1999/0401 | tpriv = s->p->priv; | |
| 1997/0327 | tcb = (Tcpctl*)s->ptcl; | |
| 2000/0102 | if(waserror()){ qunlock(s); nexterror(); } qlock(s); | |
| 1997/0327 | switch(tcb->state){ default: tcb->backoff++; if(tcb->state == Syn_sent) | |
| 1999/0529 | maxback = MAXBACKMS/2; | |
| 1997/0327 | else | |
| 1999/0529 | maxback = MAXBACKMS; | |
| 1999/0607 | tcb->backedoff += tcb->timer.start * MSPTICK; if(tcb->backedoff >= maxback) { | |
| 1997/0327 | localclose(s, Etimedout); break; } | |
| 2000/0102 | tcprxmit(s); | |
| 1999/0401 | tpriv->tstats.tcpRetransTimeouts++; tcb->snd.dupacks = 0; | |
| 1997/0327 | break; case Time_wait: localclose(s, nil); break; | |
| 2000/0102 | case Closed: break; | |
| 1997/0327 | } | |
| 2000/0102 | qunlock(s); poperror(); | |
| 1997/0327 | } int inwindow(Tcpctl *tcb, int seq) { return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1); } void procsyn(Conv *s, Tcp *seg) { Tcpctl *tcb; int mtu; tcb = (Tcpctl*)s->ptcl; tcb->flags |= FORCE; tcb->rcv.nxt = seg->seq + 1; tcb->rcv.urg = tcb->rcv.nxt; tcb->snd.wl1 = seg->seq; tcb->irs = seg->seq; tcb->snd.wnd = seg->wnd; if(seg->mss != 0) tcb->mss = seg->mss; tcb->max_snd = seg->wnd; mtu = tcpmtu(s); if(tcb->mss > mtu) tcb->mss = mtu; tcb->cwind = tcb->mss; } | |
| 1999/1006 | int | |
| 1997/0327 | addreseq(Tcpctl *tcb, Tcp *seg, Block *bp, ushort length) { Reseq *rp, *rp1; | |
| 1999/0811 | int i; | |
| 1999/1006 | static int once; | |
| 1997/0327 | rp = malloc(sizeof(Reseq)); if(rp == nil){ freeblist(bp); /* bp always consumed by add_reseq */ | |
| 1999/1006 | return 0; | |
| 1997/0327 | } rp->seg = *seg; rp->bp = bp; rp->length = length; /* Place on reassembly list sorting by starting seq number */ rp1 = tcb->reseq; if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) { rp->next = rp1; tcb->reseq = rp; | |
| 1999/1006 | return 0; | |
| 1997/0327 | } | |
| 1999/0810 | length = 0; | |
| 1998/0808 | for(i = 0;; i++) { | |
| 1999/0810 | length += rp1->length; | |
| 1997/0327 | if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) { rp->next = rp1->next; rp1->next = rp; break; } rp1 = rp1->next; } | |
| 2000/0101 | if(length > QMAX && once++ == 0){ | |
| 1999/0810 | print("very long tcp resequence queue: %d\n", length); | |
| 1999/1006 | for(rp1 = tcb->reseq, i = 0; i < 10 && rp1 != nil; rp1 = rp1->next, i++) | |
| 1999/1224 | print("0x%lux 0x%lux 0x%ux\n", rp1->seg.seq, rp1->seg.ack, | |
| 1999/1006 | rp1->seg.flags); return -1; } return 0; | |
| 1997/0327 | } void getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length) { Reseq *rp; rp = tcb->reseq; if(rp == nil) return; tcb->reseq = rp->next; *seg = rp->seg; *bp = rp->bp; *length = rp->length; free(rp); } int tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length) { ushort len; Block *nbp; | |
| 1998/0306 | uchar accept; | |
| 1997/0327 | int dupcnt, excess; accept = 0; len = *length; if(seg->flags & SYN) len++; if(seg->flags & FIN) len++; if(tcb->rcv.wnd == 0) { if(len == 0 && seg->seq == tcb->rcv.nxt) return 0; } else { /* Some part of the segment should be in the window */ if(inwindow(tcb,seg->seq)) accept++; else if(len != 0) { if(inwindow(tcb, seg->seq+len-1) || seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1)) accept++; } } if(!accept) { freeblist(*bp); return -1; } dupcnt = tcb->rcv.nxt - seg->seq; if(dupcnt > 0){ tcb->rerecv += dupcnt; if(seg->flags & SYN){ seg->flags &= ~SYN; seg->seq++; if (seg->urg > 1) seg->urg--; else seg->flags &= ~URG; dupcnt--; } if(dupcnt > 0){ pullblock(bp, (ushort)dupcnt); seg->seq += dupcnt; *length -= dupcnt; if (seg->urg > dupcnt) seg->urg -= dupcnt; else { seg->flags &= ~URG; seg->urg = 0; } } } excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd); if(excess > 0) { tcb->rerecv += excess; *length -= excess; nbp = copyblock(*bp, *length); freeblist(*bp); *bp = nbp; seg->flags &= ~FIN; } return 0; } void | |
| 1998/0313 | tcpadvise(Proto *tcp, Block *bp, char *msg) | |
| 1997/0327 | { Tcphdr *h; Tcpctl *tcb; | |
| 1998/0306 | uchar source[IPaddrlen]; uchar dest[IPaddrlen]; | |
| 1997/0327 | ushort psource, pdest; Conv *s, **p; h = (Tcphdr*)(bp->rp); | |
| 1998/0306 | v4tov6(dest, h->tcpdst); v4tov6(source, h->tcpsrc); | |
| 1997/0327 | psource = nhgets(h->tcpsport); pdest = nhgets(h->tcpdport); /* Look for a connection */ | |
| 1999/0302 | qlock(tcp); | |
| 1998/0313 | for(p = tcp->conv; *p; p++) { | |
| 1997/0327 | s = *p; | |
| 1998/0724 | tcb = (Tcpctl*)s->ptcl; | |
| 1998/0306 | if(s->rport == pdest) if(s->lport == psource) | |
| 1998/0724 | if(tcb->state != Closed) | |
| 1998/0306 | if(ipcmp(s->raddr, dest) == 0) if(ipcmp(s->laddr, source) == 0){ | |
| 1999/0302 | qlock(s); qunlock(tcp); | |
| 1997/0327 | switch(tcb->state){ case Syn_sent: localclose(s, msg); break; } | |
| 1999/0302 | qunlock(s); freeblist(bp); return; | |
| 1997/0327 | } } | |
| 1999/0302 | qunlock(tcp); | |
| 1997/0327 | freeblist(bp); } | |
| 1998/0306 | /* called with c->car qlocked */ | |
| 1997/0327 | char* tcpctl(Conv* c, char** f, int n) { if(n == 1 && strcmp(f[0], "hangup") == 0) return tcphangup(c); | |
| 1998/1118 | if(n >= 1 && strcmp(f[0], "keepalive") == 0) return tcpstartka(c, f, n); | |
| 1997/0327 | return "unknown control request"; } | |
| 1997/0916 | int | |
| 1998/0313 | tcpstats(Proto *tcp, char *buf, int len) | |
| 1997/0916 | { | |
| 1998/0630 | Tcppriv *tpriv; | |
| 1998/0306 | ||
| 1998/0630 | tpriv = tcp->priv; | |
| 1999/0401 | return snprint(buf, len, "%lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud", | |
| 1998/0630 | tpriv->tstats.tcpRtoAlgorithm, tpriv->tstats.tcpRtoMin, tpriv->tstats.tcpRtoMax, tpriv->tstats.tcpMaxConn, tpriv->tstats.tcpActiveOpens, tpriv->tstats.tcpPassiveOpens, tpriv->tstats.tcpAttemptFails, tpriv->tstats.tcpEstabResets, tpriv->tstats.tcpCurrEstab, tpriv->tstats.tcpInSegs, tpriv->tstats.tcpOutSegs, tpriv->tstats.tcpRetransSegs, | |
| 1999/0401 | tpriv->tstats.tcpRetransTimeouts, | |
| 1998/0630 | tpriv->tstats.InErrs, tpriv->tstats.OutRsts); | |
| 1997/0916 | } | |
| 1997/0327 | void tcpinit(Fs *fs) { | |
| 1998/0313 | Proto *tcp; Tcppriv *tpriv; | |
| 1997/0327 | ||
| 1998/0313 | tcp = smalloc(sizeof(Proto)); tpriv = tcp->priv = smalloc(sizeof(Tcppriv)); tcp->name = "tcp"; tcp->kick = tcpkick; tcp->connect = tcpconnect; tcp->announce = tcpannounce; tcp->ctl = tcpctl; tcp->state = tcpstate; tcp->create = tcpcreate; tcp->close = tcpclose; tcp->rcv = tcpiput; tcp->advise = tcpadvise; tcp->stats = tcpstats; tcp->inuse = tcpinuse; tcp->ipproto = IP_TCPPROTO; tcp->nc = Nchans; tcp->ptclsize = sizeof(Tcpctl); tpriv->tstats.tcpMaxConn = Nchans; | |
| 1997/0327 | ||
| 1998/0313 | Fsproto(fs, tcp); } | |