diff options
author | Jeffrey Hsu <hsu@FreeBSD.org> | 2003-08-07 18:16:59 +0000 |
---|---|---|
committer | Jeffrey Hsu <hsu@FreeBSD.org> | 2003-08-07 18:16:59 +0000 |
commit | 1e78ac216edcdcb63645d59c37b2df81b907260a (patch) | |
tree | 19b3069300529d15575ed4e6985a8ef6a20ac290 /sys/netinet/ip_output.c | |
parent | 8285491e78ad3a831f8fc124448b5e2be546f0aa (diff) | |
download | src-1e78ac216edcdcb63645d59c37b2df81b907260a.tar.gz src-1e78ac216edcdcb63645d59c37b2df81b907260a.zip |
1. Basic PIM kernel support
Disabled by default. To enable it, the new "options PIM" must be
added to the kernel configuration file (in addition to MROUTING):
options MROUTING # Multicast routing
options PIM # Protocol Independent Multicast
2. Add support for advanced multicast API setup/configuration and
extensibility.
3. Add support for kernel-level PIM Register encapsulation.
Disabled by default. Can be enabled by the advanced multicast API.
4. Implement a mechanism for "multicast bandwidth monitoring and upcalls".
Submitted by: Pavlin Radoslavov <pavlin@icir.org>
Notes
Notes:
svn path=/head/; revision=118622
Diffstat (limited to 'sys/netinet/ip_output.c')
-rw-r--r-- | sys/netinet/ip_output.c | 257 |
1 files changed, 145 insertions, 112 deletions
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 1c6d8942b48a..60d5b1ddaf8d 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -124,15 +124,10 @@ extern struct protosw inetsw[]; * The mbuf opt, if present, will not be freed. */ int -ip_output(m0, opt, ro, flags, imo, inp) - struct mbuf *m0; - struct mbuf *opt; - struct route *ro; - int flags; - struct ip_moptions *imo; - struct inpcb *inp; +ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, + int flags, struct ip_moptions *imo, struct inpcb *inp) { - struct ip *ip, *mhip; + struct ip *ip; struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m; int hlen = sizeof (struct ip); @@ -478,7 +473,7 @@ ip_output(m0, opt, ro, flags, imo, inp) goto bad; } /* don't allow broadcast messages to be fragmented */ - if ((u_short)ip->ip_len > ifp->if_mtu) { + if (ip->ip_len > ifp->if_mtu) { error = EMSGSIZE; goto bad; } @@ -1014,8 +1009,7 @@ pass: * If small enough for interface, or the interface will take * care of the fragmentation for us, can just send directly. */ - if ((u_short)ip->ip_len <= ifp->if_mtu || - ifp->if_hwassist & CSUM_FRAGMENT) { + if (ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) { ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; @@ -1057,10 +1051,7 @@ pass: (struct sockaddr *)dst, ro->ro_rt); goto done; } - /* - * Too large for interface; fragment if possible. - * Must be able to put at least 8 bytes per fragment. - */ + if (ip->ip_off & IP_DF) { error = EMSGSIZE; /* @@ -1070,46 +1061,131 @@ pass: * them, there is no way for one to update all its * routes when the MTU is changed. */ - if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) - && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) - && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { + if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) && + !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) && + (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) { ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu; } ipstat.ips_cantfrag++; goto bad; } - len = (ifp->if_mtu - hlen) &~ 7; - if (len < 8) { - error = EMSGSIZE; + + /* + * Too large for interface; fragment if possible. If successful, + * on return, m will point to a list of packets to be sent. + */ + error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum); + if (error) goto bad; + for (; m; m = m0) { + m0 = m->m_nextpkt; + m->m_nextpkt = 0; +#ifdef IPSEC + /* clean ipsec history once it goes out of the node */ + ipsec_delaux(m); +#endif + if (error == 0) { + /* Record statistics for this interface address. */ + if (ia != NULL) { + ia->ia_ifa.if_opackets++; + ia->ia_ifa.if_obytes += m->m_pkthdr.len; + } + + error = (*ifp->if_output)(ifp, m, + (struct sockaddr *)dst, ro->ro_rt); + } else + m_freem(m); + } + + if (error == 0) + ipstat.ips_fragmented++; + +done: +#ifdef IPSEC + if (ro == &iproute && ro->ro_rt) { + RTFREE(ro->ro_rt); + ro->ro_rt = NULL; + } + if (sp != NULL) { + KEYDEBUG(KEYDEBUG_IPSEC_STAMP, + printf("DP ip_output call free SP:%p\n", sp)); + key_freesp(sp); + } +#endif +#ifdef FAST_IPSEC + if (ro == &iproute && ro->ro_rt) { + RTFREE(ro->ro_rt); + ro->ro_rt = NULL; + } + if (sp != NULL) + KEY_FREESP(&sp); +#endif + return (error); +bad: + m_freem(m); + goto done; +} + +/* + * Create a chain of fragments which fit the given mtu. m_frag points to the + * mbuf to be fragmented; on return it points to the chain with the fragments. + * Return 0 if no error. If error, m_frag may contain a partially built + * chain of fragments that should be freed by the caller. + * + * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist) + * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP). + */ +int +ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, + u_long if_hwassist_flags, int sw_csum) +{ + int error = 0; + int hlen = ip->ip_hl << 2; + int len = (mtu - hlen) & ~7; /* size of payload in each fragment */ + int off; + struct mbuf *m0 = *m_frag; /* the original packet */ + int firstlen; + struct mbuf **mnext; + int nfrags; + + if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ + ipstat.ips_cantfrag++; + return EMSGSIZE; } /* - * if the interface will not calculate checksums on + * Must be able to put at least 8 bytes per fragment. + */ + if (len < 8) + return EMSGSIZE; + + /* + * If the interface will not calculate checksums on * fragmented packets, then do it here. */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA && - (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) { - in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA && + (if_hwassist_flags & CSUM_IP_FRAGS) == 0) { + in_delayed_cksum(m0); + m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } if (len > PAGE_SIZE) { /* - * Fragement large datagrams such that each segment + * Fragment large datagrams such that each segment * contains a multiple of PAGE_SIZE amount of data, * plus headers. This enables a receiver to perform * page-flipping zero-copy optimizations. + * + * XXX When does this help given that sender and receiver + * could have different page sizes, and also mtu could + * be less than the receiver's page size ? */ - int newlen; - struct mbuf *mtmp; + struct mbuf *m; + + for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next) + off += m->m_len; - for (mtmp = m, off = 0; - mtmp && ((off + mtmp->m_len) <= ifp->if_mtu); - mtmp = mtmp->m_next) { - off += mtmp->m_len; - } /* * firstlen (off - hlen) must be aligned on an * 8-byte boundary @@ -1117,44 +1193,46 @@ pass: if (off < hlen) goto smart_frag_failure; off = ((off - hlen) & ~7) + hlen; - newlen = (~PAGE_MASK) & ifp->if_mtu; - if ((newlen + sizeof (struct ip)) > ifp->if_mtu) { + newlen = (~PAGE_MASK) & mtu; + if ((newlen + sizeof (struct ip)) > mtu) { /* we failed, go back the default */ smart_frag_failure: newlen = len; off = hlen + len; } - -/* printf("ipfrag: len = %d, hlen = %d, mhlen = %d, newlen = %d, off = %d\n", - len, hlen, sizeof (struct ip), newlen, off);*/ - len = newlen; } else { off = hlen + len; } - - - { - int mhlen, firstlen = off - hlen; - struct mbuf **mnext = &m->m_nextpkt; - int nfrags = 1; + firstlen = off - hlen; + mnext = &m0->m_nextpkt; /* pointer to next packet */ /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. + * Here, m0 is the original packet, m is the fragment being created. + * The fragments are linked off the m_nextpkt of the original + * packet, which after processing serves as the first fragment. */ - m0 = m; - mhlen = sizeof (struct ip); - for (; off < (u_short)ip->ip_len; off += len) { + for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { + struct ip *mhip; /* ip header on the fragment */ + struct mbuf *m; + int mhlen = sizeof (struct ip); + MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == 0) { error = ENOBUFS; ipstat.ips_odropped++; - goto sendorfree; + goto done; } m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG; + /* + * In the first mbuf, leave room for the link header, then + * copy the original IP header including options. The payload + * goes into an additional mbuf chain returned by m_copy(). + */ m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); *mhip = *ip; @@ -1164,18 +1242,20 @@ smart_frag_failure: mhip->ip_hl = mhlen >> 2; } m->m_len = mhlen; + /* XXX do we need to add ip->ip_off below ? */ mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; - if (off + len >= (u_short)ip->ip_len) - len = (u_short)ip->ip_len - off; - else + if (off + len >= ip->ip_len) { /* last fragment */ + len = ip->ip_len - off; + m->m_flags |= M_LASTFRAG; + } else mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); m->m_next = m_copy(m0, off, len); - if (m->m_next == 0) { - (void) m_free(m); + if (m->m_next == 0) { /* copy failed */ + m_free(m); error = ENOBUFS; /* ??? */ ipstat.ips_odropped++; - goto sendorfree; + goto done; } m->m_pkthdr.len = mhlen + len; m->m_pkthdr.rcvif = (struct ifnet *)0; @@ -1189,76 +1269,29 @@ smart_frag_failure: mhip->ip_sum = in_cksum(m, mhlen); *mnext = m; mnext = &m->m_nextpkt; - nfrags++; } ipstat.ips_ofragments += nfrags; - /* set first/last markers for fragment chain */ - m->m_flags |= M_LASTFRAG; + /* set first marker for fragment chain */ m0->m_flags |= M_FIRSTFRAG | M_FRAG; m0->m_pkthdr.csum_data = nfrags; /* * Update first fragment by trimming what's been copied out - * and updating header, then send each fragment (in order). + * and updating header. */ - m = m0; - m_adj(m, hlen + firstlen - (u_short)ip->ip_len); - m->m_pkthdr.len = hlen + firstlen; - ip->ip_len = htons((u_short)m->m_pkthdr.len); + m_adj(m0, hlen + firstlen - ip->ip_len); + m0->m_pkthdr.len = hlen + firstlen; + ip->ip_len = htons((u_short)m0->m_pkthdr.len); ip->ip_off |= IP_MF; ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) - ip->ip_sum = in_cksum(m, hlen); -sendorfree: - for (m = m0; m; m = m0) { - m0 = m->m_nextpkt; - m->m_nextpkt = 0; -#ifdef IPSEC - /* clean ipsec history once it goes out of the node */ - ipsec_delaux(m); -#endif - if (error == 0) { - /* Record statistics for this interface address. */ - if (ia != NULL) { - ia->ia_ifa.if_opackets++; - ia->ia_ifa.if_obytes += m->m_pkthdr.len; - } - - error = (*ifp->if_output)(ifp, m, - (struct sockaddr *)dst, ro->ro_rt); - } else - m_freem(m); - } + ip->ip_sum = in_cksum(m0, hlen); - if (error == 0) - ipstat.ips_fragmented++; - } done: -#ifdef IPSEC - if (ro == &iproute && ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - } - if (sp != NULL) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP ip_output call free SP:%p\n", sp)); - key_freesp(sp); - } -#endif /* IPSEC */ -#ifdef FAST_IPSEC - if (ro == &iproute && ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - } - if (sp != NULL) - KEY_FREESP(&sp); -#endif /* FAST_IPSEC */ - return (error); -bad: - m_freem(m); - goto done; + *m_frag = m0; + return error; } void @@ -1307,7 +1340,7 @@ ip_insertoptions(m, opt, phlen) unsigned optlen; optlen = opt->m_len - sizeof(p->ipopt_dst); - if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) { + if (optlen + ip->ip_len > IP_MAXPACKET) { *phlen = 0; return (m); /* XXX should fail */ } |