diff options
author | Luigi Rizzo <luigi@FreeBSD.org> | 2002-11-20 18:56:25 +0000 |
---|---|---|
committer | Luigi Rizzo <luigi@FreeBSD.org> | 2002-11-20 18:56:25 +0000 |
commit | b375c9ec2ca23ffdb4e664d705b9664a39f82d06 (patch) | |
tree | 33bcc97feab79c2560a22954253be0f8100516b4 /sys/netinet/ip_output.c | |
parent | 82e5a9a354114ad62ae3b2631ccd50873a7fce8b (diff) | |
download | src-b375c9ec2ca23ffdb4e664d705b9664a39f82d06.tar.gz src-b375c9ec2ca23ffdb4e664d705b9664a39f82d06.zip |
Back out the ip_fragment() code -- it is not urgent to have it in now,
I will put it back in in a better form after 5.0 is out.
Requested by: sam, rwatson, luigi (on second thought)
Approved by: re
Notes
Notes:
svn path=/head/; revision=107112
Diffstat (limited to 'sys/netinet/ip_output.c')
-rw-r--r-- | sys/netinet/ip_output.c | 311 |
1 files changed, 143 insertions, 168 deletions
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 1ed5459e02de..4c190900e470 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -116,10 +116,15 @@ extern struct protosw inetsw[]; * The mbuf opt, if present, will not be freed. */ int -ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, - int flags, struct ip_moptions *imo, struct inpcb *inp) +ip_output(m0, opt, ro, flags, imo, inp) + struct mbuf *m0; + struct mbuf *opt; + struct route *ro; + int flags; + struct ip_moptions *imo; + struct inpcb *inp; { - struct ip *ip; + struct ip *ip, *mhip; struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m; int hlen = sizeof (struct ip); @@ -453,7 +458,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, goto bad; } /* don't allow broadcast messages to be fragmented */ - if (ip->ip_len > ifp->if_mtu) { + if ((u_short)ip->ip_len > ifp->if_mtu) { error = EMSGSIZE; goto bad; } @@ -980,7 +985,8 @@ pass: * If small enough for interface, or the interface will take * care of the fragmentation for us, can just send directly. */ - if (ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) { + if ((u_short)ip->ip_len <= ifp->if_mtu || + ifp->if_hwassist & CSUM_FRAGMENT) { ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; @@ -1002,6 +1008,10 @@ pass: (struct sockaddr *)dst, ro->ro_rt); goto done; } + /* + * Too large for interface; fragment if possible. + * Must be able to put at least 8 bytes per fragment. + */ if (ip->ip_off & IP_DF) { error = EMSGSIZE; /* @@ -1019,122 +1029,38 @@ pass: ipstat.ips_cantfrag++; goto bad; } - /* - * Too large for interface; fragment if possible. If successful, - * on return m will point to a list of packets to be sent. - */ - error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); - if (error) + len = (ifp->if_mtu - hlen) &~ 7; + if (len < 8) { + error = EMSGSIZE; goto bad; - for (; m; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = NULL; -#ifdef IPSEC - /* clean ipsec history once it goes out of the node */ - ipsec_delaux(m); -#endif - if (error == 0) { - /* Record statistics for this interface address. */ - if (ia != NULL) { - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; - } - - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, ro->ro_rt); - } else - m_freem(m); - } - - if (error == 0) - ipstat.ips_fragmented++; - -done: -#ifdef IPSEC - if (ro == &iproute && ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; } - if (sp != NULL) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ip_output call free SP:%p\n", sp)); - key_freesp(sp); - } -#endif /* IPSEC */ -#ifdef FAST_IPSEC - if (ro == &iproute && ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - } - if (sp != NULL) - KEY_FREESP(&sp); -#endif /* FAST_IPSEC */ - return (error); -bad: - m_freem(m); - goto done; -} - -/* - * Create a chain of fragments which fit the given mtu. m_frag points to the - * mbuf to be fragmented; on return it points to the chain with the fragments. - * Return 0 if no error. - * - * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) - * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). - */ -int -ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, - u_long if_hwassist_flags, int sw_csum) -{ - int error = 0; - int hlen = ip->ip_hl << 2; - int len = (mtu - hlen) &~ 7; /* size of payload in each fragment */ - int off; - struct mbuf *m0 = *m_frag; /* the original packet */ - int firstlen; - struct mbuf **mnext; - int nfrags; - - if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ - ipstat.ips_cantfrag++; - return EMSGSIZE; - } - - /* - * Must be able to put at least 8 bytes per fragment. - */ - if (len < 8) - return EMSGSIZE; /* * if the interface will not calculate checksums on * fragmented packets, then do it here. */ - if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && - (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { - in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && + (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } if (len > PAGE_SIZE) { /* - * Fragment large datagrams such that each segment + * Fragement large datagrams such that each segment * contains a multiple of PAGE_SIZE amount of data, * plus headers. This enables a receiver to perform * page-flipping zero-copy optimizations. - * - * XXX When does this help given that sender and receiver - * could have different page sizes, and also mtu could - * be less than the receiver's page size ? */ int newlen; - struct mbuf *m; + struct mbuf *mtmp; - for (m = m0, off = 0; m && ((off + m->m_len) <= mtu); - m = m->m_next) - off += m->m_len; + for (mtmp = m, off = 0; + mtmp && ((off + mtmp->m_len) <= ifp->if_mtu); + mtmp = mtmp->m_next) { + off += mtmp->m_len; + } /* * firstlen (off - hlen) must be aligned on an * 8-byte boundary @@ -1142,46 +1068,44 @@ ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, if (off < hlen) goto smart_frag_failure; off = ((off - hlen) & ~7) + hlen; - newlen = (~PAGE_MASK) & mtu; - if ((newlen + sizeof (struct ip)) > mtu) { + newlen = (~PAGE_MASK) & ifp->if_mtu; + if ((newlen + sizeof (struct ip)) > ifp->if_mtu) { /* we failed, go back the default */ smart_frag_failure: newlen = len; off = hlen + len; } + +/* printf("ipfrag: len = %d, hlen = %d, mhlen = %d, newlen = %d, off = %d\n", + len, hlen, sizeof (struct ip), newlen, off);*/ + len = newlen; } else { off = hlen + len; } - firstlen = off - hlen; - mnext = &m0->m_nextpkt; /* pointer to next packet */ + + + { + int mhlen, firstlen = off - hlen; + struct mbuf **mnext = &m->m_nextpkt; + int nfrags = 1; /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. - * Here, m0 is the original packet, m is the fragment being created. - * The fragments are linked off the m_nextpkt of the original - * packet, which after processing serves as the first fragment. */ - for (nfrags=1; off < ip->ip_len; off += len, nfrags++) { - struct ip *mhip; /* ip header on the fragment */ - struct mbuf *m; - int mhlen = sizeof (struct ip); - + m0 = m; + mhlen = sizeof (struct ip); + for (; off < (u_short)ip->ip_len; off += len) { MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == 0) { error = ENOBUFS; ipstat.ips_odropped++; - goto done; + goto sendorfree; } m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; - /* - * In the first mbuf, leave room for the link header, then - * copy the original IP header including options. The payload - * goes into an additional mbuf chain returned by m_copy(). - */ m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); *mhip = *ip; @@ -1191,20 +1115,18 @@ smart_frag_failure: mhip->ip_hl = mhlen >> 2; } m->m_len = mhlen; - /* XXX do we need to add ip->ip_off below ? */ mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; - if (off + len >= ip->ip_len) { /* last fragment */ - len = ip->ip_len - off; - m->m_flags |= M_LASTFRAG; - } else + if (off + len >= (u_short)ip->ip_len) + len = (u_short)ip->ip_len - off; + else mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); m->m_next = m_copy(m0, off, len); - if (m->m_next == 0) { /* copy failed */ - m_free(m); + if (m->m_next == 0) { + (void) m_free(m); error = ENOBUFS; /* ??? */ ipstat.ips_odropped++; - goto done; + goto sendorfree; } m->m_pkthdr.len = mhlen + len; m->m_pkthdr.rcvif = (struct ifnet *)0; @@ -1218,42 +1140,76 @@ smart_frag_failure: mhip->ip_sum = in_cksum(m, mhlen); *mnext = m; mnext = &m->m_nextpkt; + nfrags++; } ipstat.ips_ofragments += nfrags; - /* set first markers for fragment chain */ + /* set first/last markers for fragment chain */ + m->m_flags |= M_LASTFRAG; m0->m_flags |= M_FIRSTFRAG | M_FRAG; m0->m_pkthdr.csum_data = nfrags; /* - * Update first fragment by trimming what has been copied out - * and updating header. + * Update first fragment by trimming what's been copied out + * and updating header, then send each fragment (in order). */ - m_adj(m0, hlen + firstlen - ip->ip_len); - m0->m_pkthdr.len = hlen + firstlen; - ip->ip_len = htons((u_short)m0->m_pkthdr.len); + m = m0; + m_adj(m, hlen + firstlen - (u_short)ip->ip_len); + m->m_pkthdr.len = hlen + firstlen; + ip->ip_len = htons((u_short)m->m_pkthdr.len); ip->ip_off |= IP_MF; ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) - ip->ip_sum = in_cksum(m0, hlen); - *m_frag = m0; - -done: - if (error) { - struct mbuf *m; - - for (m = m0; m; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = 0; + ip->ip_sum = in_cksum(m, hlen); +sendorfree: + for (m = m0; m; m = m0) { + m0 = m->m_nextpkt; + m->m_nextpkt = 0; #ifdef IPSEC - /* clean ipsec history */ - ipsec_delaux(m); + /* clean ipsec history once it goes out of the node */ + ipsec_delaux(m); #endif + if (error == 0) { + /* Record statistics for this interface address. */ + if (ia != NULL) { + ia->ia_ifa.if_opackets++; + ia->ia_ifa.if_obytes += m->m_pkthdr.len; + } + + error = (*ifp->if_output)(ifp, m, + (struct sockaddr *)dst, ro->ro_rt); + } else m_freem(m); - } } - return error; + + if (error == 0) + ipstat.ips_fragmented++; + } +done: +#ifdef IPSEC + if (ro == &iproute && ro->ro_rt) { + RTFREE(ro->ro_rt); + ro->ro_rt = NULL; + } + if (sp != NULL) { + KEYDEBUG(KEYDEBUG_IPSEC_STAMP, + printf("DP ip_output call free SP:%p\n", sp)); + key_freesp(sp); + } +#endif /* IPSEC */ +#ifdef FAST_IPSEC + if (ro == &iproute && ro->ro_rt) { + RTFREE(ro->ro_rt); + ro->ro_rt = NULL; + } + if (sp != NULL) + KEY_FREESP(&sp); +#endif /* FAST_IPSEC */ + return (error); +bad: + m_freem(m); + goto done; } void @@ -1291,15 +1247,18 @@ in_delayed_cksum(struct mbuf *m) * XXX This routine assumes that the packet has no options in place. */ static struct mbuf * -ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) +ip_insertoptions(m, opt, phlen) + register struct mbuf *m; + struct mbuf *opt; + int *phlen; { - struct ipoption *p = mtod(opt, struct ipoption *); + register struct ipoption *p = mtod(opt, struct ipoption *); struct mbuf *n; - struct ip *ip = mtod(m, struct ip *); + register struct ip *ip = mtod(m, struct ip *); unsigned optlen; optlen = opt->m_len - sizeof(p->ipopt_dst); - if (optlen + ip->ip_len > IP_MAXPACKET) { + if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) { *phlen = 0; return (m); /* XXX should fail */ } @@ -1343,9 +1302,10 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen) * omitting those not copied during fragmentation. */ int -ip_optcopy(struct ip *ip, struct ip *jp) +ip_optcopy(ip, jp) + struct ip *ip, *jp; { - u_char *cp, *dp; + register u_char *cp, *dp; int opt, optlen, cnt; cp = (u_char *)(ip + 1); @@ -1385,7 +1345,9 @@ ip_optcopy(struct ip *ip, struct ip *jp) * IP socket option processing. */ int -ip_ctloutput(struct socket *so, struct sockopt *sopt) +ip_ctloutput(so, sopt) + struct socket *so; + struct sockopt *sopt; { struct inpcb *inp = sotoinpcb(so); int error, optval; @@ -1646,10 +1608,13 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) * with destination address if source routed. */ static int -ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m) +ip_pcbopts(optname, pcbopt, m) + int optname; + struct mbuf **pcbopt; + register struct mbuf *m; { - int cnt, optlen; - u_char *cp; + register int cnt, optlen; + register u_char *cp; u_char opt; /* turn off any old options */ @@ -1752,7 +1717,9 @@ bad: * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. */ static struct ifnet * -ip_multicast_if(struct in_addr *a, int *ifindexp) +ip_multicast_if(a, ifindexp) + struct in_addr *a; + int *ifindexp; { int ifindex; struct ifnet *ifp; @@ -1776,7 +1743,9 @@ ip_multicast_if(struct in_addr *a, int *ifindexp) * Set the IP multicast options in response to user setsockopt(). */ static int -ip_setmoptions(struct sockopt *sopt, struct ip_moptions **imop) +ip_setmoptions(sopt, imop) + struct sockopt *sopt; + struct ip_moptions **imop; { int error = 0; int i; @@ -2072,7 +2041,9 @@ ip_setmoptions(struct sockopt *sopt, struct ip_moptions **imop) * Return the IP multicast options in response to user getsockopt(). */ static int -ip_getmoptions(struct sockopt *sopt, struct ip_moptions *imo) +ip_getmoptions(sopt, imo) + struct sockopt *sopt; + register struct ip_moptions *imo; { struct in_addr addr; struct in_ifaddr *ia; @@ -2136,9 +2107,10 @@ ip_getmoptions(struct sockopt *sopt, struct ip_moptions *imo) * Discard the IP multicast options. */ void -ip_freemoptions(struct ip_moptions *imo) +ip_freemoptions(imo) + register struct ip_moptions *imo; { - int i; + register int i; if (imo != NULL) { for (i = 0; i < imo->imo_num_memberships; ++i) @@ -2155,13 +2127,16 @@ ip_freemoptions(struct ip_moptions *imo) * replicating that code here. */ static void -ip_mloopback(struct ifnet *ifp, struct mbuf *m, - struct sockaddr_in *dst, int hlen) +ip_mloopback(ifp, m, dst, hlen) + struct ifnet *ifp; + register struct mbuf *m; + register struct sockaddr_in *dst; + int hlen; { - struct ip *ip; + register struct ip *ip; struct mbuf *copym; - copym = m_copypacket(m, M_DONTWAIT); + copym = m_copy(m, 0, M_COPYALL); if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen)) copym = m_pullup(copym, hlen); if (copym != NULL) { |