*** ../../vanilla3.0/tcp_input.c Tue Apr 28 10:28:03 1998 --- tcp_input.c Thu Sep 24 10:48:29 1998 *************** *** 12,18 **** */ /* ! * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without --- 12,18 ---- */ /* ! * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1998 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without *************** *** 560,565 **** --- 560,569 ---- if (tp->t_state >= TCPS_ESTABLISHED) tp->t_timer[TCPT_KEEP] = tcp_keepidle; + #ifdef SACK + if (!tp->sack_disable) + tcp_del_sackholes(tp, ti); /* Delete stale SACK holes */ + #endif /* SACK */ /* * Process options if not in LISTEN state, * else do it below (after getting remote address). *************** *** 567,572 **** --- 571,582 ---- if (optp && tp->t_state != TCPS_LISTEN) tcp_dooptions(tp, optp, optlen, ti, &opti); + #ifdef SACK + if (!tp->sack_disable) { + tp->rcv_laststart = ti->ti_seq; /* last rec'vd segment*/ + tp->rcv_lastend = ti->ti_seq + ti->ti_len; + } + #endif /* SACK */ /* * Header prediction: check for the two common cases * of a uni-directional data xfer. If the packet has *************** *** 618,623 **** --- 628,637 ---- tcpstat.tcps_rcvackbyte += acked; sbdrop(&so->so_snd, acked); tp->snd_una = ti->ti_ack; + #if defined(SACK) && defined(FACK) + tp->snd_fack = tp->snd_una; + tp->retran_data = 0; + #endif /* FACK */ m_freem(m); /* *************** *** 629,635 **** * are ready to send, let tcp_output * decide between more output or persist. */ ! if (tp->snd_una == tp->snd_max) tp->t_timer[TCPT_REXMT] = 0; else if (tp->t_timer[TCPT_PERSIST] == 0) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; --- 643,649 ---- * are ready to send, let tcp_output * decide between more output or persist. */ ! if (tp->snd_una == tp->snd_max) tp->t_timer[TCPT_REXMT] = 0; else if (tp->t_timer[TCPT_PERSIST] == 0) tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; *************** *** 648,653 **** --- 662,672 ---- * with nothing on the reassembly queue and * we have enough buffer space to take it. */ + #ifdef SACK + /* Clean receiver SACK report if present */ + if (!tp->sack_disable && tp->rcv_numsacks) + tcp_clean_sackreport(tp); + #endif /* SACK */ ++tcpstat.tcps_preddat; tp->rcv_nxt += ti->ti_len; tcpstat.tcps_rcvpack++; *************** *** 760,765 **** --- 779,795 ---- tcp_dooptions(tp, optp, optlen, ti, &opti); else tp->t_flags &= ~(TF_SEND_TSTMP | TF_USE_SCALE); + #ifdef SACK + /* + * If peer did not send a SACK_PERMITTED option (i.e., if + * tcp_dooptions() did not set TF_SACK_PERMIT), set + * sack_disable to 1 if it is currently 0. + */ + if (!tp->sack_disable) + if ((tp->t_flags & TF_SACK_PERMIT) == 0) + tp->sack_disable = 1; + #endif + if (iss) tp->iss = iss; else *************** *** 767,772 **** --- 797,810 ---- tcp_iss += TCP_ISSINCR/4; tp->irs = ti->ti_seq; tcp_sendseqinit(tp); + #if defined(SACK) || defined(NEWRENO) + tp->snd_recover = tp->snd_una; + #endif /* SACK */ + #if defined(SACK) && defined(FACK) + tp->snd_fack = tp->snd_una; + tp->retran_data = 0; + tp->snd_awnd = 0; + #endif /* FACK */ tcp_rcvseqinit(tp); tp->t_flags |= TF_ACKNOW; tp->t_state = TCPS_SYN_RECEIVED; *************** *** 817,822 **** --- 855,870 ---- */ if (optp == NULL) tp->t_flags &= ~(TF_SEND_TSTMP | TF_USE_SCALE); + #ifdef SACK + /* + * If we've sent a SACK_PERMITTED option, and the peer + * also replied with one, then TF_SACK_PERMIT should have + * been set in tcp_dooptions(). If it was not, disable SACKs. + */ + if (!tp->sack_disable) + if ((tp->t_flags & TF_SACK_PERMIT) == 0) + tp->sack_disable = 1; + #endif tcp_peer_mss(tp, opti.maxseg); if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { tcpstat.tcps_connects++; *************** *** 1188,1214 **** */ if (tp->t_timer[TCPT_REXMT] == 0) tp->t_dupacks = 0; else if (++tp->t_dupacks == tcprexmtthresh) { tcp_seq onxt = tp->snd_nxt; u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / ! tp->t_maxseg; ! if (win < 2) win = 2; tp->snd_ssthresh = win * tp->t_maxseg; tp->t_timer[TCPT_REXMT] = 0; tp->t_rtt = 0; tp->snd_nxt = ti->ti_ack; tp->snd_cwnd = tp->t_maxseg; tcpstat.tcps_sndrexmitfast++; (void) tcp_output(tp); tp->snd_cwnd = tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; goto drop; } else if (tp->t_dupacks > tcprexmtthresh) { tp->snd_cwnd += tp->t_maxseg; (void) tcp_output(tp); goto drop; --- 1236,1336 ---- */ if (tp->t_timer[TCPT_REXMT] == 0) tp->t_dupacks = 0; + #if defined(SACK) && defined(FACK) + /* + * In FACK, can enter fast rec. if the receiver + * reports a reass. queue longer than 3 segs. + */ + else if (++tp->t_dupacks == tcprexmtthresh || + (SEQ_GT(tp->snd_fack, tcprexmtthresh * + tp->t_maxseg + tp->snd_una)) && + SEQ_GT(tp->snd_una, tp->snd_recover)) { + #else else if (++tp->t_dupacks == tcprexmtthresh) { + #endif /* FACK */ tcp_seq onxt = tp->snd_nxt; u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / ! tp->t_maxseg; ! #if defined(SACK) || defined(NEWRENO) ! if (SEQ_LT(ti->ti_ack, tp->snd_recover)){ ! /* ! * False fast retx after ! * timeout. Do not cut window. ! */ ! tp->snd_cwnd += tp->t_maxseg; ! tp->t_dupacks = 0; ! (void) tcp_output(tp); ! goto drop; ! } ! #endif if (win < 2) win = 2; tp->snd_ssthresh = win * tp->t_maxseg; + #if defined(SACK) || defined(NEWRENO) + tp->snd_recover = tp->snd_max; + #endif + #ifdef SACK + if (!tp->sack_disable) { + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + tcpstat.tcps_sndrexmitfast++; + #if defined(SACK) && defined(FACK) + (void) tcp_output(tp); + /* + * During FR, snd_cwnd is held + * constant for FACK. + */ + tp->snd_cwnd = tp->snd_ssthresh; + tp->t_dupacks = tcprexmtthresh; + #else + /* + * tcp_output() will send + * oldest SACK-eligible rtx. + */ + (void) tcp_output(tp); + tp->snd_cwnd = tp->snd_ssthresh+ + tp->t_maxseg * tp->t_dupacks; + #endif /* FACK */ + /* + * It is possible for + * tcp_output to fail to send + * a segment. If so, make + * sure that REMXT timer is set. + */ + if (SEQ_GT(tp->snd_max, + tp->snd_una) && + tp->t_timer[TCPT_REXMT] == 0 && + tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = + tp->t_rxtcur; + goto drop; + } + #endif /* SACK */ tp->t_timer[TCPT_REXMT] = 0; tp->t_rtt = 0; tp->snd_nxt = ti->ti_ack; tp->snd_cwnd = tp->t_maxseg; tcpstat.tcps_sndrexmitfast++; (void) tcp_output(tp); + tp->snd_cwnd = tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; if (SEQ_GT(onxt, tp->snd_nxt)) tp->snd_nxt = onxt; goto drop; } else if (tp->t_dupacks > tcprexmtthresh) { + #if defined(SACK) && defined(FACK) + /* + * while (awnd < cwnd) + * sendsomething(); + */ + if (!tp->sack_disable) { + if (tp->snd_awnd < tp->snd_cwnd) + tcp_output(tp); + goto drop; + } + #endif /* FACK */ tp->snd_cwnd += tp->t_maxseg; (void) tcp_output(tp); goto drop; *************** *** 1228,1237 **** --- 1350,1418 ---- * If the congestion window was inflated to account * for the other side's cached packets, retract it. */ + #ifdef NEWRENO + if (tp->t_dupacks >= tcprexmtthresh && !tcp_newreno(tp, ti)) { + /* Out of fast recovery */ + tp->snd_cwnd = tp->snd_ssthresh; + /* + * Window inflation should have left us with approx. + * snd_ssthresh outstanding data. But in case we + * would be inclined to send a burst, better to do + * it via the slow start mechanism. + */ + if (tcp_seq_subtract(tp->snd_max, ti->ti_ack) < + tp->snd_ssthresh) + tp->snd_cwnd = tcp_seq_subtract(tp->snd_max, + ti->ti_ack) + tp->t_maxseg; + tp->t_dupacks = 0; + } + #elif defined(SACK) + if (!tp->sack_disable) { + if (tp->t_dupacks >= tcprexmtthresh) { + /* Check for a partial ACK */ + if (tcp_sack_partialack(tp, ti)) { + #if defined(SACK) && defined(FACK) + /* Force call to tcp_output */ + if (tp->snd_awnd < tp->snd_cwnd) + needoutput = 1; + #else + tp->snd_cwnd += tp->t_maxseg; + needoutput = 1; + #endif /* FACK */ + } else { + /* Out of fast recovery */ + tp->snd_cwnd = tp->snd_ssthresh; + if (tcp_seq_subtract(tp->snd_max, + ti->ti_ack) < tp->snd_ssthresh) + tp->snd_cwnd = + tcp_seq_subtract(tp->snd_max, + ti->ti_ack) + tp->t_maxseg; + tp->t_dupacks = 0; + #if defined(SACK) && defined(FACK) + if (SEQ_GT(ti->ti_ack, tp->snd_fack)) + tp->snd_fack = ti->ti_ack; + #endif /* FACK */ + } + } + } else { + if (tp->t_dupacks >= tcprexmtthresh && + !tcp_newreno(tp, ti)) { + tp->snd_cwnd = tp->snd_ssthresh; + /* Out of fast recovery */ + if (tcp_seq_subtract(tp->snd_max, ti->ti_ack) < + tp->snd_ssthresh) + tp->snd_cwnd = + tcp_seq_subtract(tp->snd_max, + ti->ti_ack) + tp->t_maxseg; + tp->t_dupacks = 0; + } + } + #else /* else neither NEWRENO nor SACK */ if (tp->t_dupacks >= tcprexmtthresh && tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; tp->t_dupacks = 0; + #endif if (SEQ_GT(ti->ti_ack, tp->snd_max)) { tcpstat.tcps_rcvacktoomuch++; goto dropafterack; *************** *** 1278,1283 **** --- 1459,1467 ---- if (cw > tp->snd_ssthresh) incr = incr * incr / cw; + #if defined (NEWRENO) || defined (SACK) + if (SEQ_GEQ(ti->ti_ack, tp->snd_recover)) + #endif tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<snd_scale); } if (acked > so->so_snd.sb_cc) { *************** *** 1294,1299 **** --- 1478,1487 ---- tp->snd_una = ti->ti_ack; if (SEQ_LT(tp->snd_nxt, tp->snd_una)) tp->snd_nxt = tp->snd_una; + #if defined (SACK) && defined (FACK) + if (SEQ_GT(tp->snd_una, tp->snd_fack)) + tp->snd_fack = tp->snd_una; + #endif switch (tp->t_state) { *************** *** 1370,1377 **** tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))) { /* keep track of pure window updates */ if (ti->ti_len == 0 && ! tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd) tcpstat.tcps_rcvwinupd++; tp->snd_wnd = tiwin; tp->snd_wl1 = ti->ti_seq; tp->snd_wl2 = ti->ti_ack; --- 1558,1566 ---- tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))) { /* keep track of pure window updates */ if (ti->ti_len == 0 && ! tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd) { tcpstat.tcps_rcvwinupd++; + } tp->snd_wnd = tiwin; tp->snd_wl1 = ti->ti_seq; tp->snd_wl2 = ti->ti_ack; *************** *** 1452,1457 **** --- 1641,1650 ---- if ((ti->ti_len || (tiflags&TH_FIN)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { TCP_REASS(tp, ti, m, so, tiflags); + #ifdef SACK + if (!tp->sack_disable) + tcp_update_sack_list(tp); + #endif /* * Note the amount of data that peer has sent into * our window, in order to estimate the sender's *************** *** 1519,1526 **** /* * Return any desired output. */ ! if (needoutput || (tp->t_flags & TF_ACKNOW)) (void) tcp_output(tp); return; dropafterack: --- 1712,1731 ---- /* * Return any desired output. */ ! if (needoutput || (tp->t_flags & TF_ACKNOW)) { (void) tcp_output(tp); + #ifdef SACK + /* + * In SACK, it is possible for tcp_output() to fail to send a segment + * after the retransmission timer has been turned off. Make sure that + * the retransmission timer is set if we are in fast recovery. + */ + if (needoutput && SEQ_GT(tp->snd_max, tp->snd_una) && + tp->t_timer[TCPT_REXMT] == 0 && + tp->t_timer[TCPT_PERSIST] == 0) + tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + #endif + } return; dropafterack: *************** *** 1637,1642 **** --- 1842,1861 ---- tp->ts_recent_age = tcp_now; } break; + + #ifdef SACK + case TCPOPT_SACK_PERMITTED: + if (tp->sack_disable || optlen!=TCPOLEN_SACK_PERMITTED) + continue; + if (ti->ti_flags & TH_SYN) + /* MUST only be set on SYN */ + tp->t_flags |= TF_SACK_PERMIT; + break; + case TCPOPT_SACK: + if (tcp_sack_option(tp, ti, cp, optlen)) + continue; + break; + #endif } } if (ti->ti_flags & TH_SYN) { *************** *** 1646,1651 **** --- 1865,2258 ---- tp->t_flags &= ~TF_USE_SCALE; } } + #if defined(SACK) || defined(NEWRENO) + u_long + tcp_seq_subtract(a, b) + u_long a, b; + { + return ((long)(a - b)); + } + #endif + + + #ifdef SACK + /* + * This function is called upon receipt of new valid data (while not in header + * prediction mode), and it updates the ordered list of sacks. + */ + void + tcp_update_sack_list(tp) + struct tcpcb *tp; + { + /* + * First reported block MUST be the most recent one. Subsequent + * blocks SHOULD be in the order in which they arrived at the + * receiver. These two conditions make the implementation fully + * compliant with RFC 2018. + */ + int i, j = 0, count = 0, lastpos = -1; + struct sackblk sack, firstsack, temp[MAX_SACK_BLKS]; + + /* First clean up current list of sacks */ + for (i = 0; i < tp->rcv_numsacks; i++) { + sack = tp->sackblks[i]; + if (sack.start == 0 && sack.end == 0) { + count++; /* count = number of blocks to be discarded */ + continue; + } + if (SEQ_LEQ(sack.end, tp->rcv_nxt)) { + tp->sackblks[i].start = tp->sackblks[i].end = 0; + count++; + } else { + temp[j].start = tp->sackblks[i].start; + temp[j++].end = tp->sackblks[i].end; + } + } + tp->rcv_numsacks -= count; + if (tp->rcv_numsacks == 0) { /* no sack blocks currently (fast path) */ + tcp_clean_sackreport(tp); + if (SEQ_LT(tp->rcv_nxt, tp->rcv_laststart)) { + /* ==> need first sack block */ + tp->sackblks[0].start = tp->rcv_laststart; + tp->sackblks[0].end = tp->rcv_lastend; + tp->rcv_numsacks = 1; + } + return; + } + /* Otherwise, sack blocks are already present. */ + for (i = 0; i < tp->rcv_numsacks; i++) + tp->sackblks[i] = temp[i]; /* first copy back sack list */ + if (SEQ_GEQ(tp->rcv_nxt, tp->rcv_lastend)) + return; /* sack list remains unchanged */ + /* + * From here, segment just received should be (part of) the 1st sack. + * Go through list, possibly coalescing sack block entries. + */ + firstsack.start = tp->rcv_laststart; + firstsack.end = tp->rcv_lastend; + for (i = 0; i < tp->rcv_numsacks; i++) { + sack = tp->sackblks[i]; + if (SEQ_LT(sack.end, firstsack.start) || + SEQ_GT(sack.start, firstsack.end)) + continue; /* no overlap */ + if (sack.start == firstsack.start && sack.end == firstsack.end){ + /* + * identical block; delete it here since we will + * move it to the front of the list. + */ + tp->sackblks[i].start = tp->sackblks[i].end = 0; + lastpos = i; /* last posn with a zero entry */ + continue; + } + if (SEQ_LEQ(sack.start, firstsack.start)) + firstsack.start = sack.start; /* merge blocks */ + if (SEQ_GEQ(sack.end, firstsack.end)) + firstsack.end = sack.end; /* merge blocks */ + tp->sackblks[i].start = tp->sackblks[i].end = 0; + lastpos = i; /* last posn with a zero entry */ + } + if (lastpos != -1) { /* at least one merge */ + for (i = 0, j = 1; i < tp->rcv_numsacks; i++) { + sack = tp->sackblks[i]; + if (sack.start == 0 && sack.end == 0) + continue; + temp[j++] = sack; + } + tp->rcv_numsacks = j; /* including first blk (added later) */ + for (i = 1; i < tp->rcv_numsacks; i++) /* now copy back */ + tp->sackblks[i] = temp[i]; + } else { /* no merges -- shift sacks by 1 */ + if (tp->rcv_numsacks < MAX_SACK_BLKS) + tp->rcv_numsacks++; + for (i = tp->rcv_numsacks-1; i > 0; i--) + tp->sackblks[i] = tp->sackblks[i-1]; + } + tp->sackblks[0] = firstsack; + return; + } + + /* + * Process the TCP SACK option. Returns 1 if tcp_dooptions() should continue, + * and 0 otherwise, if the option was fine. tp->snd_holes is an ordered list + * of holes (oldest to newest, in terms of the sequence space). + */ + int + tcp_sack_option(tp, ti, cp, optlen) + struct tcpcb *tp; + struct tcpiphdr *ti; + u_char *cp; + int optlen; + { + int i, tmp_olen, off, len; + tcp_seq lastack = max(ti->ti_ack, tp->snd_una); + u_char *tmp_cp; + struct sackblk sack; + struct sackhole *cur, *p, *temp; + + if (tp->sack_disable) + return 1; + + /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */ + if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0) + return 1; + tmp_cp = cp + 2; + tmp_olen = optlen - 2; + if (tp->snd_numholes < 0) + tp->snd_numholes = 0; + if (tp->t_maxseg == 0) + panic("tcp_sack_option"); /* Should never happen */ + while (tmp_olen > 0) { + struct sackblk sack; + + bcopy((char *) tmp_cp, (char *) &(sack.start), sizeof(tcp_seq)); + NTOHL(sack.start); + bcopy((char *) tmp_cp + sizeof(tcp_seq), + (char *) &(sack.end), sizeof(tcp_seq)); + NTOHL(sack.end); + tmp_olen -= TCPOLEN_SACK; + tmp_cp += TCPOLEN_SACK; + if (SEQ_LEQ(sack.end, sack.start)) + continue; /* bad SACK fields */ + if (SEQ_LEQ(sack.end, tp->snd_una)) + continue; /* old block */ + #if defined(SACK) && defined(FACK) + /* Updates snd_fack. */ + if (SEQ_GEQ(sack.end, tp->snd_fack)) + tp->snd_fack = sack.end; + #endif /* FACK */ + if (tp->snd_holes == 0) { /* first hole */ + tp->snd_holes = (struct sackhole *) + malloc(sizeof(struct sackhole), M_PCB, M_NOWAIT); + cur = tp->snd_holes; + cur->start = ti->ti_ack; + cur->end = sack.start; + cur->rxmit = cur->start; + cur->next = 0; + tp->snd_numholes = 1; + tp->rcv_lastsack = sack.end; + /* + * dups is at least one. If more data has been + * SACKed, it can be greater than one. + */ + cur->dups = min(tcprexmtthresh, + ((sack.end - cur->end)/tp->t_maxseg)); + if (cur->dups < 1) + cur->dups = 1; + continue; /* with next sack block */ + } + /* Go thru list of holes: p = previous, cur = current */ + p = cur = tp->snd_holes; + while (cur) { + if (SEQ_LEQ(sack.end, cur->start)) + /* SACKs data before the current hole */ + break; /* no use going through more holes */ + if (SEQ_GEQ(sack.start, cur->end)) { + /* SACKs data beyond the current hole */ + cur->dups++; + if ( ((sack.end - cur->end)/tp->t_maxseg) >= + tcprexmtthresh) + cur->dups = tcprexmtthresh; + p = cur; + cur = cur->next; + continue; + } + if (SEQ_LEQ(sack.start, cur->start)) { + /* Data acks at least the beginning of hole */ + #if defined(SACK) && defined(FACK) + if (SEQ_GT(sack.end, cur->rxmit)) + tp->retran_data -= + tcp_seq_subtract(cur->rxmit, + cur->start); + else + tp->retran_data -= + tcp_seq_subtract(sack.end, + cur->start); + #endif /* FACK */ + if (SEQ_GEQ(sack.end,cur->end)){ + /* Acks entire hole, so delete hole */ + if (p != cur) { + p->next = cur->next; + free(cur, M_PCB); + cur = p->next; + } else { + cur=cur->next; + free(p, M_PCB); + p = cur; + tp->snd_holes = p; + } + tp->snd_numholes--; + continue; + } + /* otherwise, move start of hole forward */ + cur->start = sack.end; + cur->rxmit = max (cur->rxmit, cur->start); + p = cur; + cur = cur->next; + continue; + } + /* move end of hole backward */ + if (SEQ_GEQ(sack.end, cur->end)) { + #if defined(SACK) && defined(FACK) + if (SEQ_GT(cur->rxmit, sack.start)) + tp->retran_data -= + tcp_seq_subtract(cur->rxmit, + sack.start); + #endif /* FACK */ + cur->end = sack.start; + cur->rxmit = min (cur->rxmit, cur->end); + cur->dups++; + if ( ((sack.end - cur->end)/tp->t_maxseg) >= + tcprexmtthresh) + cur->dups = tcprexmtthresh; + p = cur; + cur = cur->next; + continue; + } + if (SEQ_LT(cur->start, sack.start) && + SEQ_GT(cur->end, sack.end)) { + /* + * ACKs some data in middle of a hole; need to + * split current hole + */ + #if defined(SACK) && defined(FACK) + if (SEQ_GT(cur->rxmit, sack.end)) + tp->retran_data -= + tcp_seq_subtract(sack.end, + sack.start); + else if (SEQ_GT(cur->rxmit, sack.start)) + tp->retran_data -= + tcp_seq_subtract(cur->rxmit, + sack.start); + #endif /* FACK */ + temp = (struct sackhole *)malloc(sizeof(*temp), + M_PCB,M_NOWAIT); + temp->next = cur->next; + temp->start = sack.end; + temp->end = cur->end; + temp->dups = cur->dups; + temp->rxmit = max (cur->rxmit, temp->start); + cur->end = sack.start; + cur->rxmit = min (cur->rxmit, cur->end); + cur->dups++; + if ( ((sack.end - cur->end)/tp->t_maxseg) >= + tcprexmtthresh) + cur->dups = tcprexmtthresh; + cur->next = temp; + p = temp; + cur = p->next; + tp->snd_numholes++; + } + } + /* At this point, p points to the last hole on the list */ + if (SEQ_LT(tp->rcv_lastsack, sack.start)) { + /* + * Need to append new hole at end. + * Last hole is p (and it's not NULL). + */ + temp = (struct sackhole *) malloc(sizeof(*temp), + M_PCB, M_NOWAIT); + temp->start = tp->rcv_lastsack; + temp->end = sack.start; + temp->dups = min(tcprexmtthresh, + ((sack.end - sack.start)/tp->t_maxseg)); + if (temp->dups < 1) + temp->dups = 1; + temp->rxmit = temp->start; + temp->next = 0; + p->next = temp; + tp->rcv_lastsack = sack.end; + tp->snd_numholes++; + } + } + #if defined(SACK) && defined(FACK) + /* + * Update retran_data, snd_fack, and snd_awnd. Go through the list of + * holes. Increment retran_data by (hole->rxmit - hole->start). + * snd_fack gets the highest value of hole->end. + */ + tp->retran_data = 0; + cur = tp->snd_holes; + while (cur) { + tp->retran_data += cur->rxmit - cur->start; + cur = cur->next; + } + tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt, tp->snd_fack) + + tp->retran_data; + #endif /* FACK */ + } + + /* + * Delete stale (i.e, cumulatively ack'd) holes. Hole is deleted only if + * it is completely acked; otherwise, tcp_sack_option(), called from + * tcp_dooptions(), will fix up the hole. + */ + void + tcp_del_sackholes(tp, ti) + struct tcpcb *tp; + struct tcpiphdr *ti; + { + if (!tp->sack_disable && tp->t_state != TCPS_LISTEN) { + /* max because this could be an older ack just arrived */ + tcp_seq lastack = max(ti->ti_ack, tp->snd_una); + struct sackhole *cur = tp->snd_holes; + struct sackhole *prev = cur; + while (cur) + if (SEQ_LEQ(cur->end, lastack)) { + cur = cur->next; + free(prev, M_PCB); + prev = cur; + tp->snd_numholes--; + } else if (SEQ_LT(cur->start, lastack)) { + cur->start = lastack; + break; + } else + break; + tp->snd_holes = cur; + } + } + + /* + * Delete all receiver-side SACK information. + */ + void + tcp_clean_sackreport(tp) + struct tcpcb *tp; + { + int i; + + tp->rcv_numsacks = 0; + for (i = 0; i < MAX_SACK_BLKS; i++) + tp->sackblks[i].start = tp->sackblks[i].end=0; + + } + + /* + * Checks for partial ack. If partial ack arrives, turn off retransmission + * timer, deflate the window, do not clear tp->t_dupacks, and return 1. + * If the ack advances at least to tp->snd_recover, return 0. + */ + int + tcp_sack_partialack(tp, ti) + struct tcpcb *tp; + struct tcpiphdr *ti; + { + if (SEQ_LT(ti->ti_ack, tp->snd_recover)) { + /* Turn off retx. timer (will start again next segment) */ + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + #ifndef FACK + /* + * Partial window deflation. This statement relies on the + * fact that tp->snd_una has not been updated yet. In FACK + * hold snd_cwnd constant during fast recovery. + */ + tp->snd_cwnd -= (ti->ti_ack - tp->snd_una - tp->t_maxseg); + #endif + return 1; + } + return 0; + } + #endif SACK /* * Pull out of band byte out of a segment so *************** *** 2300,2305 **** --- 2907,2920 ---- tp->iss = sc->sc_iss; tp->irs = sc->sc_irs; tcp_sendseqinit(tp); + #if defined(SACK) || defined(NEWRENO) + tp->snd_recover = tp->snd_una; + #endif + #if defined(SACK) && defined(FACK) + tp->snd_fack = tp->snd_una; + tp->retran_data = 0; + tp->snd_awnd = 0; + #endif tcp_rcvseqinit(tp); tp->t_timer[TCPT_KEEP] = tcp_conntimeo; tcpstat.tcps_accepts++; *************** *** 2543,2545 **** --- 3158,3197 ---- return (tcp_respond(NULL, ti, m, sc->sc_irs + 1, sc->sc_iss, TH_SYN|TH_ACK)); } + + #if defined(NEWRENO) || defined (SACK) + /* + * Checks for partial ack. If partial ack arrives, force the retransmission + * of the next unacknowledged segment, do not clear tp->t_dupacks, and return + * 1. By setting snd_nxt to ti_ack, this forces retransmission timer to + * be started again. If the ack advances at least to tp->snd_recover, return 0. + */ + int + tcp_newreno(tp, ti) + struct tcpcb *tp; + struct tcpiphdr *ti; + { + if (SEQ_LT(ti->ti_ack, tp->snd_recover)) { + tcp_seq onxt = tp->snd_nxt; + tcp_seq ouna = tp->snd_una; /* Haven't updated snd_una yet*/ + u_long ocwnd = tp->snd_cwnd; + tp->t_timer[TCPT_REXMT] = 0; + tp->t_rtt = 0; + tp->snd_nxt = ti->ti_ack; + tp->snd_cwnd = tp->t_maxseg; + tp->snd_una = ti->ti_ack; + (void) tcp_output(tp); + tp->snd_cwnd = ocwnd; + tp->snd_una = ouna; + if (SEQ_GT(onxt, tp->snd_nxt)) + tp->snd_nxt = onxt; + /* + * Partial window deflation. Relies on fact that tp->snd_una + * not updated yet. + */ + tp->snd_cwnd -= (ti->ti_ack - tp->snd_una - tp->t_maxseg); + return 1; + } + return 0; + } + #endif /* NEWRENO || SACK */