diff options
author | Max Laier <mlaier@FreeBSD.org> | 2008-12-10 21:24:31 +0000 |
---|---|---|
committer | Max Laier <mlaier@FreeBSD.org> | 2008-12-10 21:24:31 +0000 |
commit | bbe43470868206ce791124974947c85b485b8005 (patch) | |
tree | 8788aee247dba7e1bacacbad57a9bb2deecbf425 | |
parent | 17f418e29d3ae329309cbe068d302f7e12e15b55 (diff) |
Import OPENBSD_4_4_BASEvendor/pf-sys/4.4
Notes
Notes:
svn path=/vendor-sys/pf/dist/; revision=185888
svn path=/vendor-sys/pf/4.4/; revision=185889; tag=vendor/pf-sys/4.4
-rw-r--r-- | net/if_pfsync.c | 310 | ||||
-rw-r--r-- | net/if_pfsync.h | 78 | ||||
-rw-r--r-- | net/pf.c | 2829 | ||||
-rw-r--r-- | net/pf_if.c | 8 | ||||
-rw-r--r-- | net/pf_ioctl.c | 342 | ||||
-rw-r--r-- | net/pf_norm.c | 26 | ||||
-rw-r--r-- | net/pf_osfp.c | 14 | ||||
-rw-r--r-- | net/pf_table.c | 55 | ||||
-rw-r--r-- | net/pfvar.h | 219 |
9 files changed, 2011 insertions, 1870 deletions
diff --git a/net/if_pfsync.c b/net/if_pfsync.c index 2d3d3e4443a5..68d3ac40877b 100644 --- a/net/if_pfsync.c +++ b/net/if_pfsync.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.c,v 1.89 2008/01/12 17:08:33 mpf Exp $ */ +/* $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff @@ -89,7 +89,6 @@ int pfsync_clone_destroy(struct ifnet *); void pfsync_setmtu(struct pfsync_softc *, int); int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, struct pf_state_peer *); -int pfsync_insert_net_state(struct pfsync_state *, u_int8_t); void pfsync_update_net_tdb(struct pfsync_tdb *); int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); @@ -224,117 +223,218 @@ pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { - d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); + d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); if (d->scrub == NULL) return (ENOMEM); - bzero(d->scrub, sizeof(*d->scrub)); } return (0); } +void +pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) +{ + bzero(sp, sizeof(struct pfsync_state)); + + /* copy from state key */ + sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; + sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; + sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; + sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; + sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; + sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; + sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; + sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; + sp->proto = st->key[PF_SK_WIRE]->proto; + sp->af = st->key[PF_SK_WIRE]->af; + + /* copy from state */ + strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); + bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); + sp->creation = htonl(time_second - st->creation); + sp->expire = pf_state_expires(st); + if (sp->expire <= time_second) + sp->expire = htonl(0); + else + sp->expire = htonl(sp->expire - time_second); + + sp->direction = st->direction; + sp->log = st->log; + sp->timeout = st->timeout; + sp->state_flags = st->state_flags; + if (st->src_node) + sp->sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->nat_src_node) + sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; + + bcopy(&st->id, &sp->id, sizeof(sp->id)); + sp->creatorid = st->creatorid; + pf_state_peer_hton(&st->src, &sp->src); + pf_state_peer_hton(&st->dst, &sp->dst); + + if (st->rule.ptr == NULL) + sp->rule = htonl(-1); + else + sp->rule = htonl(st->rule.ptr->nr); + if (st->anchor.ptr == NULL) + sp->anchor = htonl(-1); + else + sp->anchor = htonl(st->anchor.ptr->nr); + if (st->nat_rule.ptr == NULL) + sp->nat_rule = htonl(-1); + else + sp->nat_rule = htonl(st->nat_rule.ptr->nr); + + pf_state_counter_hton(st->packets[0], sp->packets[0]); + pf_state_counter_hton(st->packets[1], sp->packets[1]); + pf_state_counter_hton(st->bytes[0], sp->bytes[0]); + pf_state_counter_hton(st->bytes[1], sp->bytes[1]); + +} + int -pfsync_insert_net_state(struct pfsync_state *sp, u_int8_t chksum_flag) +pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) { struct pf_state *st = NULL; - struct pf_state_key *sk = NULL; + struct pf_state_key *skw = NULL, *sks = NULL; struct pf_rule *r = NULL; struct pfi_kif *kif; + int pool_flags; + int error; if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { - printf("pfsync_insert_net_state: invalid creator id:" + printf("pfsync_state_import: invalid creator id:" " %08x\n", ntohl(sp->creatorid)); return (EINVAL); } - kif = pfi_kif_get(sp->ifname); - if (kif == NULL) { + if ((kif = pfi_kif_get(sp->ifname)) == NULL) { if (pf_status.debug >= PF_DEBUG_MISC) - printf("pfsync_insert_net_state: " + printf("pfsync_state_import: " "unknown interface: %s\n", sp->ifname); - /* skip this state */ - return (0); + if (flags & PFSYNC_SI_IOCTL) + return (EINVAL); + return (0); /* skip this state */ } /* - * If the ruleset checksums match, it's safe to associate the state - * with the rule of that number. + * If the ruleset checksums match or the state is coming from the ioctl, + * it's safe to associate the state with the rule of that number. */ - if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && chksum_flag && - ntohl(sp->rule) < + if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && + (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) r = pf_main_ruleset.rules[ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; else r = &pf_default_rule; - if (!r->max_states || r->states < r->max_states) - st = pool_get(&pf_state_pl, PR_NOWAIT); - if (st == NULL) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - return (ENOMEM); - } - bzero(st, sizeof(*st)); + if ((r->max_states && r->states_cur >= r->max_states)) + goto cleanup; - if ((sk = pf_alloc_state_key(st)) == NULL) { - pool_put(&pf_state_pl, st); - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - return (ENOMEM); - } + if (flags & PFSYNC_SI_IOCTL) + pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; + else + pool_flags = PR_LIMITFAIL | PR_ZERO; + + if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) + goto cleanup; + + if ((skw = pf_alloc_state_key(pool_flags)) == NULL) + goto cleanup; + + if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], + &sp->key[PF_SK_STACK].addr[0], sp->af) || + PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], + &sp->key[PF_SK_STACK].addr[1], sp->af) || + sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || + sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { + if ((sks = pf_alloc_state_key(pool_flags)) == NULL) + goto cleanup; + } else + sks = skw; /* allocate memory for scrub info */ if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || - pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - if (st->src.scrub) - pool_put(&pf_state_scrub_pl, st->src.scrub); - pool_put(&pf_state_pl, st); - pool_put(&pf_state_key_pl, sk); - return (ENOMEM); + pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) + goto cleanup; + + /* copy to state key(s) */ + skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; + skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; + skw->port[0] = sp->key[PF_SK_WIRE].port[0]; + skw->port[1] = sp->key[PF_SK_WIRE].port[1]; + skw->proto = sp->proto; + skw->af = sp->af; + if (sks != skw) { + sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; + sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; + sks->port[0] = sp->key[PF_SK_STACK].port[0]; + sks->port[1] = sp->key[PF_SK_STACK].port[1]; + sks->proto = sp->proto; + sks->af = sp->af; } - st->rule.ptr = r; - /* XXX get pointers to nat_rule and anchor */ - - /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ - r->states++; - - /* fill in the rest of the state entry */ - pf_state_host_ntoh(&sp->lan, &sk->lan); - pf_state_host_ntoh(&sp->gwy, &sk->gwy); - pf_state_host_ntoh(&sp->ext, &sk->ext); - - pf_state_peer_ntoh(&sp->src, &st->src); - pf_state_peer_ntoh(&sp->dst, &st->dst); - + /* copy to state */ bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); st->creation = time_second - ntohl(sp->creation); - st->expire = ntohl(sp->expire) + time_second; + st->expire = time_second; + if (sp->expire) { + /* XXX No adaptive scaling. */ + st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); + } - sk->af = sp->af; - sk->proto = sp->proto; - sk->direction = sp->direction; + st->expire = ntohl(sp->expire) + time_second; + st->direction = sp->direction; st->log = sp->log; st->timeout = sp->timeout; - st->allow_opts = sp->allow_opts; + st->state_flags = sp->state_flags; + if (!(flags & PFSYNC_SI_IOCTL)) + st->sync_flags = PFSTATE_FROMSYNC; bcopy(sp->id, &st->id, sizeof(st->id)); st->creatorid = sp->creatorid; - st->sync_flags = PFSTATE_FROMSYNC; + pf_state_peer_ntoh(&sp->src, &st->src); + pf_state_peer_ntoh(&sp->dst, &st->dst); + + st->rule.ptr = r; + st->nat_rule.ptr = NULL; + st->anchor.ptr = NULL; + st->rt_kif = NULL; + + st->pfsync_time = 0; - if (pf_insert_state(kif, st)) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); + + /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ + r->states_cur++; + r->states_tot++; + + if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ - r->states--; + r->states_cur--; + goto cleanup_state; + } + + return (0); + + cleanup: + error = ENOMEM; + if (skw == sks) + sks = NULL; + if (skw != NULL) + pool_put(&pf_state_key_pl, skw); + if (sks != NULL) + pool_put(&pf_state_key_pl, sks); + + cleanup_state: /* pf_state_insert frees the state keys */ + if (st) { if (st->dst.scrub) pool_put(&pf_state_scrub_pl, st->dst.scrub); if (st->src.scrub) pool_put(&pf_state_scrub_pl, st->src.scrub); pool_put(&pf_state_pl, st); - return (EINVAL); } - - return (0); + return (error); } void @@ -345,6 +445,7 @@ pfsync_input(struct mbuf *m, ...) struct pfsync_softc *sc = pfsyncif; struct pf_state *st; struct pf_state_key *sk; + struct pf_state_item *si; struct pf_state_cmp id_key; struct pfsync_state *sp; struct pfsync_state_upd *up; @@ -358,7 +459,7 @@ pfsync_input(struct mbuf *m, ...) struct in_addr src; struct mbuf *mp; int iplen, action, error, i, s, count, offp, sfail, stale = 0; - u_int8_t chksum_flag = 0; + u_int8_t flags = 0; pfsyncstats.pfsyncs_ipackets++; @@ -413,7 +514,7 @@ pfsync_input(struct mbuf *m, ...) src = ip->ip_src; if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) - chksum_flag++; + flags |= PFSYNC_SI_CKSUM; switch (action) { case PFSYNC_ACT_CLR: { @@ -444,15 +545,16 @@ pfsync_input(struct mbuf *m, ...) splx(s); return; } - for (sk = RB_MIN(pf_state_tree_lan_ext, - &pf_statetbl_lan_ext); sk; sk = nextsk) { - nextsk = RB_NEXT(pf_state_tree_lan_ext, - &pf_statetbl_lan_ext, sk); - TAILQ_FOREACH(st, &sk->states, next) { - if (st->creatorid == creatorid) { - st->sync_flags |= + /* XXX correct? */ + for (sk = RB_MIN(pf_state_tree, + &pf_statetbl); sk; sk = nextsk) { + nextsk = RB_NEXT(pf_state_tree, + &pf_statetbl, sk); + TAILQ_FOREACH(si, &sk->states, entry) { + if (si->s->creatorid == creatorid) { + si->s->sync_flags |= PFSTATE_FROMSYNC; - pf_unlink_state(st); + pf_unlink_state(si->s); } } } @@ -484,8 +586,7 @@ pfsync_input(struct mbuf *m, ...) continue; } - if ((error = pfsync_insert_net_state(sp, - chksum_flag))) { + if ((error = pfsync_state_import(sp, flags))) { if (error == ENOMEM) { splx(s); goto done; @@ -524,11 +625,11 @@ pfsync_input(struct mbuf *m, ...) st = pf_find_state_byid(&id_key); if (st == NULL) { /* insert the update */ - if (pfsync_insert_net_state(sp, chksum_flag)) + if (pfsync_state_import(sp, flags)) pfsyncstats.pfsyncs_badstate++; continue; } - sk = st->state_key; + sk = st->key[PF_SK_WIRE]; /* XXX right one? */ sfail = 0; if (sk->proto == IPPROTO_TCP) { /* @@ -589,7 +690,7 @@ pfsync_input(struct mbuf *m, ...) } continue; } - pfsync_alloc_scrub_memory(&sp->dst, &st->dst); + pfsync_alloc_scrub_memory(&sp->dst, &st->dst); pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); st->expire = ntohl(sp->expire) + time_second; @@ -665,7 +766,7 @@ pfsync_input(struct mbuf *m, ...) pfsyncstats.pfsyncs_badstate++; continue; } - sk = st->state_key; + sk = st->key[PF_SK_WIRE]; /* XXX right one? */ sfail = 0; if (sk->proto == IPPROTO_TCP) { /* @@ -716,7 +817,7 @@ pfsync_input(struct mbuf *m, ...) PFSYNC_FLAG_STALE); continue; } - pfsync_alloc_scrub_memory(&up->dst, &st->dst); + pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->src, &st->src); pf_state_peer_ntoh(&up->dst, &st->dst); st->expire = ntohl(up->expire) + time_second; @@ -1117,9 +1218,6 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) struct pfsync_state *sp = NULL; struct pfsync_state_upd *up = NULL; struct pfsync_state_del *dp = NULL; - struct pf_state_key *sk = st->state_key; - struct pf_rule *r; - u_long secs; int s, ret = 0; u_int8_t i = 255, newaction = 0; @@ -1186,8 +1284,6 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) } } - secs = time_second; - st->pfsync_time = time_uptime; if (sp == NULL) { @@ -1199,47 +1295,19 @@ pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) h->count++; bzero(sp, sizeof(*sp)); - bcopy(&st->id, sp->id, sizeof(sp->id)); - sp->creatorid = st->creatorid; - - strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); - pf_state_host_hton(&sk->lan, &sp->lan); - pf_state_host_hton(&sk->gwy, &sp->gwy); - pf_state_host_hton(&sk->ext, &sp->ext); - - bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); - - sp->creation = htonl(secs - st->creation); - pf_state_counter_hton(st->packets[0], sp->packets[0]); - pf_state_counter_hton(st->packets[1], sp->packets[1]); - pf_state_counter_hton(st->bytes[0], sp->bytes[0]); - pf_state_counter_hton(st->bytes[1], sp->bytes[1]); - if ((r = st->rule.ptr) == NULL) - sp->rule = htonl(-1); - else - sp->rule = htonl(r->nr); - if ((r = st->anchor.ptr) == NULL) - sp->anchor = htonl(-1); - else - sp->anchor = htonl(r->nr); - sp->af = sk->af; - sp->proto = sk->proto; - sp->direction = sk->direction; - sp->log = st->log; - sp->allow_opts = st->allow_opts; - sp->timeout = st->timeout; + pfsync_state_export(sp, st); if (flags & PFSYNC_FLAG_STALE) sp->sync_flags |= PFSTATE_STALE; - } - - pf_state_peer_hton(&st->src, &sp->src); - pf_state_peer_hton(&st->dst, &sp->dst); + } else { + pf_state_peer_hton(&st->src, &sp->src); + pf_state_peer_hton(&st->dst, &sp->dst); - if (st->expire <= secs) - sp->expire = htonl(0); - else - sp->expire = htonl(st->expire - secs); + if (st->expire <= time_second) + sp->expire = htonl(0); + else + sp->expire = htonl(st->expire - time_second); + } /* do we need to build "compressed" actions for network transfer? */ if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { @@ -1715,7 +1783,7 @@ pfsync_update_tdb(struct tdb *tdb, int output) for (i = 0; !pt && i < h->count; i++) { if (tdb->tdb_spi == u->spi && tdb->tdb_sproto == u->sproto && - !bcmp(&tdb->tdb_dst, &u->dst, + !bcmp(&tdb->tdb_dst, &u->dst, SA_LEN(&u->dst.sa))) { pt = u; pt->updates++; diff --git a/net/if_pfsync.h b/net/if_pfsync.h index e94dad5fa46a..1fa562c9590d 100644 --- a/net/if_pfsync.h +++ b/net/if_pfsync.h @@ -1,4 +1,4 @@ -/* $OpenBSD: if_pfsync.h,v 1.32 2007/12/14 18:33:37 deraadt Exp $ */ +/* $OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $ */ /* * Copyright (c) 2001 Michael Shalayeff @@ -146,7 +146,7 @@ extern struct pfsync_softc *pfsyncif; struct pfsync_header { u_int8_t version; -#define PFSYNC_VERSION 3 +#define PFSYNC_VERSION 4 u_int8_t af; u_int8_t action; #define PFSYNC_ACT_CLR 0 /* clear all states */ @@ -205,72 +205,22 @@ struct pfsyncreq { int pfsyncr_authlevel; }; - -/* for copies to/from network */ -#define pf_state_peer_hton(s,d) do { \ - (d)->seqlo = htonl((s)->seqlo); \ - (d)->seqhi = htonl((s)->seqhi); \ - (d)->seqdiff = htonl((s)->seqdiff); \ - (d)->max_win = htons((s)->max_win); \ - (d)->mss = htons((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub) { \ - (d)->scrub.pfss_flags = \ - htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ - (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ - (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ - (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ - } \ -} while (0) - -#define pf_state_peer_ntoh(s,d) do { \ - (d)->seqlo = ntohl((s)->seqlo); \ - (d)->seqhi = ntohl((s)->seqhi); \ - (d)->seqdiff = ntohl((s)->seqdiff); \ - (d)->max_win = ntohs((s)->max_win); \ - (d)->mss = ntohs((s)->mss); \ - (d)->state = (s)->state; \ - (d)->wscale = (s)->wscale; \ - if ((s)->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && \ - (d)->scrub != NULL) { \ - (d)->scrub->pfss_flags = \ - ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ - (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ - (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ - } \ -} while (0) - -#define pf_state_host_hton(s,d) do { \ - bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ - (d)->port = (s)->port; \ -} while (0) - -#define pf_state_host_ntoh(s,d) do { \ - bcopy(&(s)->addr, &(d)->addr, sizeof((d)->addr)); \ - (d)->port = (s)->port; \ -} while (0) - -#define pf_state_counter_hton(s,d) do { \ - d[0] = htonl((s>>32)&0xffffffff); \ - d[1] = htonl(s&0xffffffff); \ -} while (0) - -#define pf_state_counter_ntoh(s,d) do { \ - d = ntohl(s[0]); \ - d = d<<32; \ - d += ntohl(s[1]); \ -} while (0) - #ifdef _KERNEL -void pfsync_input(struct mbuf *, ...); -int pfsync_clear_states(u_int32_t, char *); -int pfsync_pack_state(u_int8_t, struct pf_state *, int); -int pfsync_sysctl(int *, u_int, void *, size_t *, void *, size_t); +void pfsync_input(struct mbuf *, ...); +int pfsync_clear_states(u_int32_t, char *); +int pfsync_pack_state(u_int8_t, struct pf_state *, int); +int pfsync_sysctl(int *, u_int, void *, size_t *, + void *, size_t); +void pfsync_state_export(struct pfsync_state *, + struct pf_state *); + +#define PFSYNC_SI_IOCTL 0x01 +#define PFSYNC_SI_CKSUM 0x02 +int pfsync_state_import(struct pfsync_state *, u_int8_t); #define pfsync_insert_state(st) do { \ if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || \ - (st->state_key->proto == IPPROTO_PFSYNC)) \ + (st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC)) \ st->sync_flags |= PFSTATE_NOSYNC; \ else if (!st->sync_flags) \ pfsync_pack_state(PFSYNC_ACT_INS, (st), \ @@ -1,8 +1,8 @@ -/* $OpenBSD: pf.c,v 1.567 2008/02/20 23:40:13 henning Exp $ */ +/* $OpenBSD: pf.c,v 1.614 2008/08/02 12:34:37 henning Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier - * Copyright (c) 2002,2003 Henning Brauer + * Copyright (c) 2002 - 2008 Henning Brauer * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -98,8 +98,7 @@ */ /* state tables */ -struct pf_state_tree_lan_ext pf_statetbl_lan_ext; -struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy; +struct pf_state_tree pf_statetbl; struct pf_altqqueue pf_altqs[2]; struct pf_palist pf_pabuf; @@ -125,7 +124,7 @@ struct pf_anchor_stackframe { } pf_anchor_stack[64]; struct pool pf_src_tree_pl, pf_rule_pl, pf_pooladdr_pl; -struct pool pf_state_pl, pf_state_key_pl; +struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; struct pool pf_altq_pl; void pf_print_host(struct pf_addr *, u_int16_t, u_int8_t); @@ -161,21 +160,41 @@ struct pf_rule *pf_match_translation(struct pf_pdesc *, struct mbuf *, u_int16_t, int); struct pf_rule *pf_get_translation(struct pf_pdesc *, struct mbuf *, int, int, struct pfi_kif *, struct pf_src_node **, - struct pf_addr *, u_int16_t, - struct pf_addr *, u_int16_t, - struct pf_addr *, u_int16_t *); -void pf_attach_state(struct pf_state_key *, - struct pf_state *, int); -void pf_detach_state(struct pf_state *, int); + struct pf_state_key **, struct pf_state_key **, + struct pf_state_key **, struct pf_state_key **, + struct pf_addr *, struct pf_addr *, + u_int16_t, u_int16_t); +void pf_detach_state(struct pf_state *); +int pf_state_key_setup(struct pf_pdesc *, struct pf_rule *, + struct pf_state_key **, struct pf_state_key **, + struct pf_state_key **, struct pf_state_key **, + struct pf_addr *, struct pf_addr *, + u_int16_t, u_int16_t); +void pf_state_key_detach(struct pf_state *, int); u_int32_t pf_tcp_iss(struct pf_pdesc *); int pf_test_rule(struct pf_rule **, struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **, struct ifqueue *); +static __inline int pf_create_state(struct pf_rule *, struct pf_rule *, + struct pf_rule *, struct pf_pdesc *, + struct pf_src_node *, struct pf_state_key *, + struct pf_state_key *, struct pf_state_key *, + struct pf_state_key *, struct mbuf *, int, + u_int16_t, u_int16_t, int *, struct pfi_kif *, + struct pf_state **, int, u_int16_t, u_int16_t, + int); int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, struct pf_ruleset **); +int pf_tcp_track_full(struct pf_state_peer *, + struct pf_state_peer *, struct pf_state **, + struct pfi_kif *, struct mbuf *, int, + struct pf_pdesc *, u_short *, int *); +int pf_tcp_track_sloppy(struct pf_state_peer *, + struct pf_state_peer *, struct pf_state **, + struct pf_pdesc *, u_short *); int pf_test_state_tcp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); @@ -186,10 +205,9 @@ int pf_test_state_icmp(struct pf_state **, int, struct pfi_kif *, struct mbuf *, int, void *, struct pf_pdesc *, u_short *); int pf_test_state_other(struct pf_state **, int, - struct pfi_kif *, struct pf_pdesc *); -int pf_match_tag(struct mbuf *, struct pf_rule *, int *); + struct pfi_kif *, struct mbuf *, struct pf_pdesc *); void pf_step_into_anchor(int *, struct pf_ruleset **, int, - struct pf_rule **, struct pf_rule **, int *); + struct pf_rule **, struct pf_rule **, int *); int pf_step_out_of_anchor(int *, struct pf_ruleset **, int, struct pf_rule **, struct pf_rule **, int *); @@ -219,13 +237,14 @@ void pf_set_rt_ifp(struct pf_state *, struct pf_addr *); int pf_check_proto_cksum(struct mbuf *, int, int, u_int8_t, sa_family_t); +struct pf_divert *pf_get_divert(struct mbuf *); +void pf_print_state_parts(struct pf_state *, + struct pf_state_key *, struct pf_state_key *); int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); struct pf_state *pf_find_state(struct pfi_kif *, - struct pf_state_key_cmp *, u_int); + struct pf_state_key_cmp *, u_int, struct mbuf *); int pf_src_connlimit(struct pf_state **); -void pf_stateins_err(const char *, struct pf_state *, - struct pfi_kif *); int pf_check_congestion(struct ifqueue *); extern struct pool pfr_ktable_pl; @@ -239,54 +258,49 @@ struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { { &pfr_kentry_pl, PFR_KENTRY_HIWAT } }; -#define STATE_LOOKUP() \ +#define STATE_LOOKUP(i, k, d, s, m) \ do { \ - *state = pf_find_state(kif, &key, direction); \ - if (*state == NULL || (*state)->timeout == PFTM_PURGE) \ + s = pf_find_state(i, k, d, m); \ + if (s == NULL || (s)->timeout == PFTM_PURGE) \ return (PF_DROP); \ - if (direction == PF_OUT && \ - (((*state)->rule.ptr->rt == PF_ROUTETO && \ - (*state)->rule.ptr->direction == PF_OUT) || \ - ((*state)->rule.ptr->rt == PF_REPLYTO && \ - (*state)->rule.ptr->direction == PF_IN)) && \ - (*state)->rt_kif != NULL && \ - (*state)->rt_kif != kif) \ + if (d == PF_OUT && \ + (((s)->rule.ptr->rt == PF_ROUTETO && \ + (s)->rule.ptr->direction == PF_OUT) || \ + ((s)->rule.ptr->rt == PF_REPLYTO && \ + (s)->rule.ptr->direction == PF_IN)) && \ + (s)->rt_kif != NULL && \ + (s)->rt_kif != i) \ return (PF_PASS); \ } while (0) -#define STATE_TRANSLATE(sk) \ - (sk)->lan.addr.addr32[0] != (sk)->gwy.addr.addr32[0] || \ - ((sk)->af == AF_INET6 && \ - ((sk)->lan.addr.addr32[1] != (sk)->gwy.addr.addr32[1] || \ - (sk)->lan.addr.addr32[2] != (sk)->gwy.addr.addr32[2] || \ - (sk)->lan.addr.addr32[3] != (sk)->gwy.addr.addr32[3])) || \ - (sk)->lan.port != (sk)->gwy.port - #define BOUND_IFACE(r, k) \ ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all #define STATE_INC_COUNTERS(s) \ do { \ - s->rule.ptr->states++; \ - if (s->anchor.ptr != NULL) \ - s->anchor.ptr->states++; \ - if (s->nat_rule.ptr != NULL) \ - s->nat_rule.ptr->states++; \ + s->rule.ptr->states_cur++; \ + s->rule.ptr->states_tot++; \ + if (s->anchor.ptr != NULL) { \ + s->anchor.ptr->states_cur++; \ + s->anchor.ptr->states_tot++; \ + } \ + if (s->nat_rule.ptr != NULL) { \ + s->nat_rule.ptr->states_cur++; \ + s->nat_rule.ptr->states_tot++; \ + } \ } while (0) #define STATE_DEC_COUNTERS(s) \ do { \ if (s->nat_rule.ptr != NULL) \ - s->nat_rule.ptr->states--; \ + s->nat_rule.ptr->states_cur--; \ if (s->anchor.ptr != NULL) \ - s->anchor.ptr->states--; \ - s->rule.ptr->states--; \ + s->anchor.ptr->states_cur--; \ + s->rule.ptr->states_cur--; \ } while (0) static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); -static __inline int pf_state_compare_lan_ext(struct pf_state_key *, - struct pf_state_key *); -static __inline int pf_state_compare_ext_gwy(struct pf_state_key *, +static __inline int pf_state_compare_key(struct pf_state_key *, struct pf_state_key *); static __inline int pf_state_compare_id(struct pf_state *, struct pf_state *); @@ -297,16 +311,10 @@ struct pf_state_tree_id tree_id; struct pf_state_queue state_list; RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); -RB_GENERATE(pf_state_tree_lan_ext, pf_state_key, - entry_lan_ext, pf_state_compare_lan_ext); -RB_GENERATE(pf_state_tree_ext_gwy, pf_state_key, - entry_ext_gwy, pf_state_compare_ext_gwy); +RB_GENERATE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); RB_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); -#define PF_DT_SKIP_LANEXT 0x01 -#define PF_DT_SKIP_EXTGWY 0x02 - static __inline int pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) { @@ -351,157 +359,6 @@ pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) return (0); } -static __inline int -pf_state_compare_lan_ext(struct pf_state_key *a, struct pf_state_key *b) -{ - int diff; - - if ((diff = a->proto - b->proto) != 0) - return (diff); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) - return (1); - if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->lan.addr.addr32[3] > b->lan.addr.addr32[3]) - return (1); - if (a->lan.addr.addr32[3] < b->lan.addr.addr32[3]) - return (-1); - if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) - return (1); - if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) - return (-1); - if (a->lan.addr.addr32[2] > b->lan.addr.addr32[2]) - return (1); - if (a->lan.addr.addr32[2] < b->lan.addr.addr32[2]) - return (-1); - if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) - return (1); - if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) - return (-1); - if (a->lan.addr.addr32[1] > b->lan.addr.addr32[1]) - return (1); - if (a->lan.addr.addr32[1] < b->lan.addr.addr32[1]) - return (-1); - if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) - return (1); - if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) - return (-1); - if (a->lan.addr.addr32[0] > b->lan.addr.addr32[0]) - return (1); - if (a->lan.addr.addr32[0] < b->lan.addr.addr32[0]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } - - if ((diff = a->lan.port - b->lan.port) != 0) - return (diff); - if ((diff = a->ext.port - b->ext.port) != 0) - return (diff); - - return (0); -} - -static __inline int -pf_state_compare_ext_gwy(struct pf_state_key *a, struct pf_state_key *b) -{ - int diff; - - if ((diff = a->proto - b->proto) != 0) - return (diff); - if ((diff = a->af - b->af) != 0) - return (diff); - switch (a->af) { -#ifdef INET - case AF_INET: - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) - return (1); - if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) - return (-1); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - if (a->ext.addr.addr32[3] > b->ext.addr.addr32[3]) - return (1); - if (a->ext.addr.addr32[3] < b->ext.addr.addr32[3]) - return (-1); - if (a->gwy.addr.addr32[3] > b->gwy.addr.addr32[3]) - return (1); - if (a->gwy.addr.addr32[3] < b->gwy.addr.addr32[3]) - return (-1); - if (a->ext.addr.addr32[2] > b->ext.addr.addr32[2]) - return (1); - if (a->ext.addr.addr32[2] < b->ext.addr.addr32[2]) - return (-1); - if (a->gwy.addr.addr32[2] > b->gwy.addr.addr32[2]) - return (1); - if (a->gwy.addr.addr32[2] < b->gwy.addr.addr32[2]) - return (-1); - if (a->ext.addr.addr32[1] > b->ext.addr.addr32[1]) - return (1); - if (a->ext.addr.addr32[1] < b->ext.addr.addr32[1]) - return (-1); - if (a->gwy.addr.addr32[1] > b->gwy.addr.addr32[1]) - return (1); - if (a->gwy.addr.addr32[1] < b->gwy.addr.addr32[1]) - return (-1); - if (a->ext.addr.addr32[0] > b->ext.addr.addr32[0]) - return (1); - if (a->ext.addr.addr32[0] < b->ext.addr.addr32[0]) - return (-1); - if (a->gwy.addr.addr32[0] > b->gwy.addr.addr32[0]) - return (1); - if (a->gwy.addr.addr32[0] < b->gwy.addr.addr32[0]) - return (-1); - break; -#endif /* INET6 */ - } - - if ((diff = a->ext.port - b->ext.port) != 0) - return (diff); - if ((diff = a->gwy.port - b->gwy.port) != 0) - return (diff); - - return (0); -} - -static __inline int -pf_state_compare_id(struct pf_state *a, struct pf_state *b) -{ - if (a->id > b->id) - return (1); - if (a->id < b->id) - return (-1); - if (a->creatorid > b->creatorid) - return (1); - if (a->creatorid < b->creatorid) - return (-1); - - return (0); -} - #ifdef INET6 void pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) @@ -522,77 +379,6 @@ pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) } #endif /* INET6 */ -struct pf_state * -pf_find_state_byid(struct pf_state_cmp *key) -{ - pf_status.fcounters[FCNT_STATE_SEARCH]++; - - return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); -} - -struct pf_state * -pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir) -{ - struct pf_state_key *sk; - struct pf_state *s; - - pf_status.fcounters[FCNT_STATE_SEARCH]++; - - switch (dir) { - case PF_OUT: - sk = RB_FIND(pf_state_tree_lan_ext, &pf_statetbl_lan_ext, - (struct pf_state_key *)key); - break; - case PF_IN: - sk = RB_FIND(pf_state_tree_ext_gwy, &pf_statetbl_ext_gwy, - (struct pf_state_key *)key); - break; - default: - panic("pf_find_state"); - } - - /* list is sorted, if-bound states before floating ones */ - if (sk != NULL) - TAILQ_FOREACH(s, &sk->states, next) - if (s->kif == pfi_all || s->kif == kif) - return (s); - - return (NULL); -} - -struct pf_state * -pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) -{ - struct pf_state_key *sk; - struct pf_state *s, *ret = NULL; - - pf_status.fcounters[FCNT_STATE_SEARCH]++; - - switch (dir) { - case PF_OUT: - sk = RB_FIND(pf_state_tree_lan_ext, - &pf_statetbl_lan_ext, (struct pf_state_key *)key); - break; - case PF_IN: - sk = RB_FIND(pf_state_tree_ext_gwy, - &pf_statetbl_ext_gwy, (struct pf_state_key *)key); - break; - default: - panic("pf_find_state_all"); - } - - if (sk != NULL) { - ret = TAILQ_FIRST(&sk->states); - if (more == NULL) - return (ret); - - TAILQ_FOREACH(s, &sk->states, next) - (*more)++; - } - - return (ret); -} - void pf_init_threshold(struct pf_threshold *threshold, u_int32_t limit, u_int32_t seconds) @@ -656,12 +442,12 @@ pf_src_connlimit(struct pf_state **state) if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf_src_connlimit: blocking address "); pf_print_host(&(*state)->src_node->addr, 0, - (*state)->state_key->af); + (*state)->key[PF_SK_WIRE]->af); } bzero(&p, sizeof(p)); - p.pfra_af = (*state)->state_key->af; - switch ((*state)->state_key->af) { + p.pfra_af = (*state)->key[PF_SK_WIRE]->af; + switch ((*state)->key[PF_SK_WIRE]->af) { #ifdef INET case AF_INET: p.pfra_net = 32; @@ -686,21 +472,20 @@ pf_src_connlimit(struct pf_state **state) pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; RB_FOREACH(st, pf_state_tree_id, &tree_id) { - sk = st->state_key; + sk = st->key[PF_SK_WIRE]; /* * Kill states from this source. (Only those * from the same rule if PF_FLUSH_GLOBAL is not * set) */ if (sk->af == - (*state)->state_key->af && - (((*state)->state_key->direction == - PF_OUT && + (*state)->key[PF_SK_WIRE]->af && + (((*state)->direction == PF_OUT && PF_AEQ(&(*state)->src_node->addr, - &sk->lan.addr, sk->af)) || - ((*state)->state_key->direction == PF_IN && + &sk->addr[0], sk->af)) || + ((*state)->direction == PF_IN && PF_AEQ(&(*state)->src_node->addr, - &sk->ext.addr, sk->af))) && + &sk->addr[1], sk->af))) && ((*state)->rule.ptr->flush & PF_FLUSH_GLOBAL || (*state)->rule.ptr == st->rule.ptr)) { @@ -743,12 +528,11 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, if (*sn == NULL) { if (!rule->max_src_nodes || rule->src_nodes < rule->max_src_nodes) - (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT); + (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); else pf_status.lcounters[LCNT_SRCNODES]++; if ((*sn) == NULL) return (-1); - bzero(*sn, sizeof(struct pf_src_node)); pf_init_threshold(&(*sn)->conn_rate, rule->max_src_conn_rate.limit, @@ -787,59 +571,251 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, return (0); } -void -pf_stateins_err(const char *tree, struct pf_state *s, struct pfi_kif *kif) +/* state table stuff */ + +static __inline int +pf_state_compare_key(struct pf_state_key *a, struct pf_state_key *b) { - struct pf_state_key *sk = s->state_key; - - if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf: state insert failed: %s %s", tree, kif->pfik_name); - printf(" lan: "); - pf_print_host(&sk->lan.addr, sk->lan.port, - sk->af); - printf(" gwy: "); - pf_print_host(&sk->gwy.addr, sk->gwy.port, - sk->af); - printf(" ext: "); - pf_print_host(&sk->ext.addr, sk->ext.port, - sk->af); - if (s->sync_flags & PFSTATE_FROMSYNC) - printf(" (from sync)"); - printf("\n"); + int diff; + + if ((diff = a->proto - b->proto) != 0) + return (diff); + if ((diff = a->af - b->af) != 0) + return (diff); + switch (a->af) { +#ifdef INET + case AF_INET: + if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) + return (1); + if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) + return (-1); + if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) + return (1); + if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) + return (-1); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (a->addr[0].addr32[3] > b->addr[0].addr32[3]) + return (1); + if (a->addr[0].addr32[3] < b->addr[0].addr32[3]) + return (-1); + if (a->addr[1].addr32[3] > b->addr[1].addr32[3]) + return (1); + if (a->addr[1].addr32[3] < b->addr[1].addr32[3]) + return (-1); + if (a->addr[0].addr32[2] > b->addr[0].addr32[2]) + return (1); + if (a->addr[0].addr32[2] < b->addr[0].addr32[2]) + return (-1); + if (a->addr[1].addr32[2] > b->addr[1].addr32[2]) + return (1); + if (a->addr[1].addr32[2] < b->addr[1].addr32[2]) + return (-1); + if (a->addr[0].addr32[1] > b->addr[0].addr32[1]) + return (1); + if (a->addr[0].addr32[1] < b->addr[0].addr32[1]) + return (-1); + if (a->addr[1].addr32[1] > b->addr[1].addr32[1]) + return (1); + if (a->addr[1].addr32[1] < b->addr[1].addr32[1]) + return (-1); + if (a->addr[0].addr32[0] > b->addr[0].addr32[0]) + return (1); + if (a->addr[0].addr32[0] < b->addr[0].addr32[0]) + return (-1); + if (a->addr[1].addr32[0] > b->addr[1].addr32[0]) + return (1); + if (a->addr[1].addr32[0] < b->addr[1].addr32[0]) + return (-1); + break; +#endif /* INET6 */ } + + if ((diff = a->port[0] - b->port[0]) != 0) + return (diff); + if ((diff = a->port[1] - b->port[1]) != 0) + return (diff); + + return (0); +} + +static __inline int +pf_state_compare_id(struct pf_state *a, struct pf_state *b) +{ + if (a->id > b->id) + return (1); + if (a->id < b->id) + return (-1); + if (a->creatorid > b->creatorid) + return (1); + if (a->creatorid < b->creatorid) + return (-1); + + return (0); } int -pf_insert_state(struct pfi_kif *kif, struct pf_state *s) +pf_state_key_attach(struct pf_state_key *sk, struct pf_state *s, int idx) { - struct pf_state_key *cur; - struct pf_state *sp; + struct pf_state_item *si; + struct pf_state_key *cur; - KASSERT(s->state_key != NULL); - s->kif = kif; + KASSERT(s->key[idx] == NULL); /* XXX handle this? */ - if ((cur = RB_INSERT(pf_state_tree_lan_ext, &pf_statetbl_lan_ext, - s->state_key)) != NULL) { + if ((cur = RB_INSERT(pf_state_tree, &pf_statetbl, sk)) != NULL) { /* key exists. check for same kif, if none, add to key */ - TAILQ_FOREACH(sp, &cur->states, next) - if (sp->kif == kif) { /* collision! */ - pf_stateins_err("tree_lan_ext", s, kif); - pf_detach_state(s, - PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY); - return (-1); + TAILQ_FOREACH(si, &cur->states, entry) + if (si->s->kif == s->kif && + si->s->direction == s->direction) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf( + "pf: %s key attach failed on %s: ", + (idx == PF_SK_WIRE) ? + "wire" : "stack", + s->kif->pfik_name); + pf_print_state_parts(s, + (idx == PF_SK_WIRE) ? sk : NULL, + (idx == PF_SK_STACK) ? sk : NULL); + printf("\n"); + } + pool_put(&pf_state_key_pl, sk); + return (-1); /* collision! */ } - pf_detach_state(s, PF_DT_SKIP_LANEXT|PF_DT_SKIP_EXTGWY); - pf_attach_state(cur, s, kif == pfi_all ? 1 : 0); - } + pool_put(&pf_state_key_pl, sk); + s->key[idx] = cur; + } else + s->key[idx] = sk; - /* if cur != NULL, we already found a state key and attached to it */ - if (cur == NULL && (cur = RB_INSERT(pf_state_tree_ext_gwy, - &pf_statetbl_ext_gwy, s->state_key)) != NULL) { - /* must not happen. we must have found the sk above! */ - pf_stateins_err("tree_ext_gwy", s, kif); - pf_detach_state(s, PF_DT_SKIP_EXTGWY); + if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { + pf_state_key_detach(s, idx); return (-1); } + si->s = s; + + /* list is sorted, if-bound states before floating */ + if (s->kif == pfi_all) + TAILQ_INSERT_TAIL(&s->key[idx]->states, si, entry); + else + TAILQ_INSERT_HEAD(&s->key[idx]->states, si, entry); + return (0); +} + +void +pf_detach_state(struct pf_state *s) +{ + if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) + s->key[PF_SK_WIRE] = NULL; + + if (s->key[PF_SK_STACK] != NULL) + pf_state_key_detach(s, PF_SK_STACK); + + if (s->key[PF_SK_WIRE] != NULL) + pf_state_key_detach(s, PF_SK_WIRE); +} + +void +pf_state_key_detach(struct pf_state *s, int idx) +{ + struct pf_state_item *si; + + si = TAILQ_FIRST(&s->key[idx]->states); + while (si && si->s != s) + si = TAILQ_NEXT(si, entry); + + if (si) { + TAILQ_REMOVE(&s->key[idx]->states, si, entry); + pool_put(&pf_state_item_pl, si); + } + + if (TAILQ_EMPTY(&s->key[idx]->states)) { + RB_REMOVE(pf_state_tree, &pf_statetbl, s->key[idx]); + if (s->key[idx]->reverse) + s->key[idx]->reverse->reverse = NULL; + if (s->key[idx]->inp) + s->key[idx]->inp->inp_pf_sk = NULL; + pool_put(&pf_state_key_pl, s->key[idx]); + } + s->key[idx] = NULL; +} + +struct pf_state_key * +pf_alloc_state_key(int pool_flags) +{ + struct pf_state_key *sk; + + if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) + return (NULL); + TAILQ_INIT(&sk->states); + + return (sk); +} + +int +pf_state_key_setup(struct pf_pdesc *pd, struct pf_rule *nr, + struct pf_state_key **skw, struct pf_state_key **sks, + struct pf_state_key **skp, struct pf_state_key **nkp, + struct pf_addr *saddr, struct pf_addr *daddr, + u_int16_t sport, u_int16_t dport) +{ + KASSERT((*skp == NULL && *nkp == NULL)); + + if ((*skp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) + return (ENOMEM); + + PF_ACPY(&(*skp)->addr[pd->sidx], saddr, pd->af); + PF_ACPY(&(*skp)->addr[pd->didx], daddr, pd->af); + (*skp)->port[pd->sidx] = sport; + (*skp)->port[pd->didx] = dport; + (*skp)->proto = pd->proto; + (*skp)->af = pd->af; + + if (nr != NULL) { + if ((*nkp = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) + return (ENOMEM); /* caller must handle cleanup */ + + /* XXX maybe just bcopy and TAILQ_INIT(&(*nkp)->states) */ + PF_ACPY(&(*nkp)->addr[0], &(*skp)->addr[0], pd->af); + PF_ACPY(&(*nkp)->addr[1], &(*skp)->addr[1], pd->af); + (*nkp)->port[0] = (*skp)->port[0]; + (*nkp)->port[1] = (*skp)->port[1]; + (*nkp)->proto = pd->proto; + (*nkp)->af = pd->af; + } else + *nkp = *skp; + + if (pd->dir == PF_IN) { + *skw = *skp; + *sks = *nkp; + } else { + *sks = *skp; + *skw = *nkp; + } + return (0); +} + + +int +pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, + struct pf_state_key *sks, struct pf_state *s) +{ + s->kif = kif; + + if (skw == sks) { + if (pf_state_key_attach(skw, s, PF_SK_WIRE)) + return (-1); + s->key[PF_SK_STACK] = s->key[PF_SK_WIRE]; + } else { + if (pf_state_key_attach(skw, s, PF_SK_WIRE)) { + pool_put(&pf_state_key_pl, sks); + return (-1); + } + if (pf_state_key_attach(sks, s, PF_SK_STACK)) { + pf_state_key_detach(s, PF_SK_WIRE); + return (-1); + } + } if (s->id == 0 && s->creatorid == 0) { s->id = htobe64(pf_status.stateid++); @@ -854,7 +830,7 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *s) printf(" (from sync)"); printf("\n"); } - pf_detach_state(s, 0); + pf_detach_state(s); return (-1); } TAILQ_INSERT_TAIL(&state_list, s, entry_list); @@ -867,6 +843,80 @@ pf_insert_state(struct pfi_kif *kif, struct pf_state *s) return (0); } +struct pf_state * +pf_find_state_byid(struct pf_state_cmp *key) +{ + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + return (RB_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); +} + +struct pf_state * +pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir, + struct mbuf *m) +{ + struct pf_state_key *sk; + struct pf_state_item *si; + + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + if (dir == PF_OUT && m->m_pkthdr.pf.statekey && + ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse) + sk = ((struct pf_state_key *)m->m_pkthdr.pf.statekey)->reverse; + else { + if ((sk = RB_FIND(pf_state_tree, &pf_statetbl, + (struct pf_state_key *)key)) == NULL) + return (NULL); + if (dir == PF_OUT && m->m_pkthdr.pf.statekey) { + ((struct pf_state_key *) + m->m_pkthdr.pf.statekey)->reverse = sk; + sk->reverse = m->m_pkthdr.pf.statekey; + } + } + + if (dir == PF_OUT) + m->m_pkthdr.pf.statekey = NULL; + + /* list is sorted, if-bound states before floating ones */ + TAILQ_FOREACH(si, &sk->states, entry) + if ((si->s->kif == pfi_all || si->s->kif == kif) && + sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : + si->s->key[PF_SK_STACK])) + return (si->s); + + return (NULL); +} + +struct pf_state * +pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) +{ + struct pf_state_key *sk; + struct pf_state_item *si, *ret = NULL; + + pf_status.fcounters[FCNT_STATE_SEARCH]++; + + sk = RB_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); + + if (sk != NULL) { + TAILQ_FOREACH(si, &sk->states, entry) + if (dir == PF_INOUT || + (sk == (dir == PF_IN ? si->s->key[PF_SK_WIRE] : + si->s->key[PF_SK_STACK]))) { + if (more == NULL) + return (si->s); + + if (ret) + (*more)++; + else + ret = si; + } + } + return (ret ? ret->s : NULL); +} + +/* END state table stuff */ + + void pf_purge_thread(void *v) { @@ -913,7 +963,7 @@ pf_state_expires(const struct pf_state *state) start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START]; if (start) { end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END]; - states = state->rule.ptr->states; + states = state->rule.ptr->states_cur; } else { start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; @@ -932,33 +982,33 @@ pf_state_expires(const struct pf_state *state) void pf_purge_expired_src_nodes(int waslocked) { - struct pf_src_node *cur, *next; - int locked = waslocked; - - for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { - next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); - - if (cur->states <= 0 && cur->expire <= time_second) { - if (! locked) { - rw_enter_write(&pf_consistency_lock); - next = RB_NEXT(pf_src_tree, - &tree_src_tracking, cur); - locked = 1; - } - if (cur->rule.ptr != NULL) { - cur->rule.ptr->src_nodes--; - if (cur->rule.ptr->states <= 0 && - cur->rule.ptr->max_src_nodes <= 0) - pf_rm_rule(NULL, cur->rule.ptr); - } - RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, cur); - } - } - - if (locked && !waslocked) + struct pf_src_node *cur, *next; + int locked = waslocked; + + for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { + next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); + + if (cur->states <= 0 && cur->expire <= time_second) { + if (! locked) { + rw_enter_write(&pf_consistency_lock); + next = RB_NEXT(pf_src_tree, + &tree_src_tracking, cur); + locked = 1; + } + if (cur->rule.ptr != NULL) { + cur->rule.ptr->src_nodes--; + if (cur->rule.ptr->states_cur <= 0 && + cur->rule.ptr->max_src_nodes <= 0) + pf_rm_rule(NULL, cur->rule.ptr); + } + RB_REMOVE(pf_src_tree, &tree_src_tracking, cur); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, cur); + } + } + + if (locked && !waslocked) rw_exit_write(&pf_consistency_lock); } @@ -995,9 +1045,12 @@ void pf_unlink_state(struct pf_state *cur) { if (cur->src.state == PF_TCPS_PROXY_DST) { - pf_send_tcp(cur->rule.ptr, cur->state_key->af, - &cur->state_key->ext.addr, &cur->state_key->lan.addr, - cur->state_key->ext.port, cur->state_key->lan.port, + /* XXX wire key the right one? */ + pf_send_tcp(cur->rule.ptr, cur->key[PF_SK_WIRE]->af, + &cur->key[PF_SK_WIRE]->addr[1], + &cur->key[PF_SK_WIRE]->addr[0], + cur->key[PF_SK_WIRE]->port[1], + cur->key[PF_SK_WIRE]->port[0], cur->src.seqhi, cur->src.seqlo + 1, TH_RST|TH_ACK, 0, 0, 0, 1, cur->tag, NULL, NULL); } @@ -1008,7 +1061,7 @@ pf_unlink_state(struct pf_state *cur) #endif cur->timeout = PFTM_UNLINKED; pf_src_tree_remove_state(cur); - pf_detach_state(cur, 0); + pf_detach_state(cur); } /* callers should be at splsoftnet and hold the @@ -1023,15 +1076,15 @@ pf_free_state(struct pf_state *cur) return; #endif KASSERT(cur->timeout == PFTM_UNLINKED); - if (--cur->rule.ptr->states <= 0 && + if (--cur->rule.ptr->states_cur <= 0 && cur->rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->rule.ptr); if (cur->nat_rule.ptr != NULL) - if (--cur->nat_rule.ptr->states <= 0 && + if (--cur->nat_rule.ptr->states_cur <= 0 && cur->nat_rule.ptr->src_nodes <= 0) pf_rm_rule(NULL, cur->nat_rule.ptr); if (cur->anchor.ptr != NULL) - if (--cur->anchor.ptr->states <= 0) + if (--cur->anchor.ptr->states_cur <= 0) pf_rm_rule(NULL, cur->anchor.ptr); pf_normalize_tcp_cleanup(cur); pfi_kif_unref(cur->kif, PFI_KIF_REF_STATE); @@ -1048,7 +1101,7 @@ pf_purge_expired_states(u_int32_t maxcheck) { static struct pf_state *cur = NULL; struct pf_state *next; - int locked = 0; + int locked = 0; while (maxcheck--) { /* wrap to start of list when we hit the end */ @@ -1184,40 +1237,81 @@ pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) void pf_print_state(struct pf_state *s) { - struct pf_state_key *sk = s->state_key; - switch (sk->proto) { + pf_print_state_parts(s, NULL, NULL); +} + +void +pf_print_state_parts(struct pf_state *s, + struct pf_state_key *skwp, struct pf_state_key *sksp) +{ + struct pf_state_key *skw, *sks; + u_int8_t proto, dir; + + /* Do our best to fill these, but they're skipped if NULL */ + skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL); + sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL); + proto = skw ? skw->proto : (sks ? sks->proto : 0); + dir = s ? s->direction : 0; + + switch (proto) { case IPPROTO_TCP: - printf("TCP "); + printf("TCP"); break; case IPPROTO_UDP: - printf("UDP "); + printf("UDP"); break; case IPPROTO_ICMP: - printf("ICMP "); + printf("ICMP"); break; case IPPROTO_ICMPV6: - printf("ICMPV6 "); + printf("ICMPV6"); break; default: - printf("%u ", sk->proto); + printf("%u", skw->proto); break; } - pf_print_host(&sk->lan.addr, sk->lan.port, sk->af); - printf(" "); - pf_print_host(&sk->gwy.addr, sk->gwy.port, sk->af); - printf(" "); - pf_print_host(&sk->ext.addr, sk->ext.port, sk->af); - printf(" [lo=%u high=%u win=%u modulator=%u", s->src.seqlo, - s->src.seqhi, s->src.max_win, s->src.seqdiff); - if (s->src.wscale && s->dst.wscale) - printf(" wscale=%u", s->src.wscale & PF_WSCALE_MASK); - printf("]"); - printf(" [lo=%u high=%u win=%u modulator=%u", s->dst.seqlo, - s->dst.seqhi, s->dst.max_win, s->dst.seqdiff); - if (s->src.wscale && s->dst.wscale) - printf(" wscale=%u", s->dst.wscale & PF_WSCALE_MASK); - printf("]"); - printf(" %u:%u", s->src.state, s->dst.state); + switch (dir) { + case PF_IN: + printf(" in"); + break; + case PF_OUT: + printf(" out"); + break; + } + if (skw) { + printf(" wire: "); + pf_print_host(&skw->addr[0], skw->port[0], skw->af); + printf(" "); + pf_print_host(&skw->addr[1], skw->port[1], skw->af); + } + if (sks) { + printf(" stack: "); + if (sks != skw) { + pf_print_host(&sks->addr[0], sks->port[0], sks->af); + printf(" "); + pf_print_host(&sks->addr[1], sks->port[1], sks->af); + } else + printf("-"); + } + if (s) { + if (proto == IPPROTO_TCP) { + printf(" [lo=%u high=%u win=%u modulator=%u", + s->src.seqlo, s->src.seqhi, + s->src.max_win, s->src.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", + s->src.wscale & PF_WSCALE_MASK); + printf("]"); + printf(" [lo=%u high=%u win=%u modulator=%u", + s->dst.seqlo, s->dst.seqhi, + s->dst.max_win, s->dst.seqdiff); + if (s->src.wscale && s->dst.wscale) + printf(" wscale=%u", + s->dst.wscale & PF_WSCALE_MASK); + printf("]"); + } + printf(" %u:%u", s->src.state, s->dst.state); + } } void @@ -1424,7 +1518,8 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, struct pf_addr oia, ooa; PF_ACPY(&oia, ia, af); - PF_ACPY(&ooa, oa, af); + if (oa) + PF_ACPY(&ooa, oa, af); /* Change inner protocol port, fix inner protocol checksum. */ if (ip != NULL) { @@ -1473,31 +1568,33 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, break; #endif /* INET6 */ } - /* Change outer ip address, fix outer ip or icmpv6 checksum. */ - PF_ACPY(oa, na, af); - switch (af) { + /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ + if (oa) { + PF_ACPY(oa, na, af); + switch (af) { #ifdef INET - case AF_INET: - *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, - ooa.addr16[0], oa->addr16[0], 0), - ooa.addr16[1], oa->addr16[1], 0); - break; + case AF_INET: + *hc = pf_cksum_fixup(pf_cksum_fixup(*hc, + ooa.addr16[0], oa->addr16[0], 0), + ooa.addr16[1], oa->addr16[1], 0); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( - pf_cksum_fixup(pf_cksum_fixup(*ic, - ooa.addr16[0], oa->addr16[0], u), - ooa.addr16[1], oa->addr16[1], u), - ooa.addr16[2], oa->addr16[2], u), - ooa.addr16[3], oa->addr16[3], u), - ooa.addr16[4], oa->addr16[4], u), - ooa.addr16[5], oa->addr16[5], u), - ooa.addr16[6], oa->addr16[6], u), - ooa.addr16[7], oa->addr16[7], u); - break; + case AF_INET6: + *ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( + pf_cksum_fixup(pf_cksum_fixup(*ic, + ooa.addr16[0], oa->addr16[0], u), + ooa.addr16[1], oa->addr16[1], u), + ooa.addr16[2], oa->addr16[2], u), + ooa.addr16[3], oa->addr16[3], u), + ooa.addr16[4], oa->addr16[4], u), + ooa.addr16[5], oa->addr16[5], u), + ooa.addr16[6], oa->addr16[6], u), + ooa.addr16[7], oa->addr16[7], u); + break; #endif /* INET6 */ + } } } @@ -1722,7 +1819,9 @@ pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af, { struct mbuf *m0; - m0 = m_copy(m, 0, M_COPYALL); + if ((m0 = m_copy(m, 0, M_COPYALL)) == NULL) + return; + m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; if (r->rtableid >= 0) @@ -1911,7 +2010,7 @@ pf_tag_packet(struct mbuf *m, int tag, int rtableid) void pf_step_into_anchor(int *depth, struct pf_ruleset **rs, int n, - struct pf_rule **r, struct pf_rule **a, int *match) + struct pf_rule **r, struct pf_rule **a, int *match) { struct pf_anchor_stackframe *f; @@ -1975,7 +2074,7 @@ pf_step_out_of_anchor(int *depth, struct pf_ruleset **rs, int n, if (*depth == 0 && a != NULL) *a = NULL; *rs = f->rs; - if (f->r->anchor->match || (match != NULL && *match)) + if (f->r->anchor->match || (match != NULL && *match)) quick = f->r->quick; *r = TAILQ_NEXT(f->r, entries); } while (*r == NULL); @@ -2300,9 +2399,9 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, do { key.af = af; key.proto = proto; - PF_ACPY(&key.ext.addr, daddr, key.af); - PF_ACPY(&key.gwy.addr, naddr, key.af); - key.ext.port = dport; + PF_ACPY(&key.addr[1], daddr, key.af); + PF_ACPY(&key.addr[0], naddr, key.af); + key.port[1] = dport; /* * port search; start random, step; @@ -2310,15 +2409,15 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, */ if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || proto == IPPROTO_ICMP)) { - key.gwy.port = dport; + key.port[0] = dport; if (pf_find_state_all(&key, PF_IN, NULL) == NULL) return (0); } else if (low == 0 && high == 0) { - key.gwy.port = *nport; + key.port[0] = *nport; if (pf_find_state_all(&key, PF_IN, NULL) == NULL) return (0); } else if (low == high) { - key.gwy.port = htons(low); + key.port[0] = htons(low); if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { *nport = htons(low); return (0); @@ -2332,20 +2431,20 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, high = tmp; } /* low < high */ - cut = htonl(arc4random()) % (1 + high - low) + low; + cut = arc4random_uniform(1 + high - low) + low; /* low <= cut <= high */ for (tmp = cut; tmp <= high; ++(tmp)) { - key.gwy.port = htons(tmp); + key.port[0] = htons(tmp); if (pf_find_state_all(&key, PF_IN, NULL) == - NULL) { + NULL && !in_baddynamic(tmp, proto)) { *nport = htons(tmp); return (0); } } for (tmp = cut - 1; tmp >= low; --(tmp)) { - key.gwy.port = htons(tmp); + key.port[0] = htons(tmp); if (pf_find_state_all(&key, PF_IN, NULL) == - NULL) { + NULL && !in_baddynamic(tmp, proto)) { *nport = htons(tmp); return (0); } @@ -2365,7 +2464,6 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, return (1); } } while (! PF_AEQ(&init_addr, naddr, af) ); - return (1); /* none available */ } @@ -2453,12 +2551,14 @@ pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off, struct pf_rule * pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, struct pfi_kif *kif, struct pf_src_node **sn, - struct pf_addr *saddr, u_int16_t sport, - struct pf_addr *daddr, u_int16_t dport, - struct pf_addr *naddr, u_int16_t *nport) + struct pf_state_key **skw, struct pf_state_key **sks, + struct pf_state_key **skp, struct pf_state_key **nkp, + struct pf_addr *saddr, struct pf_addr *daddr, + u_int16_t sport, u_int16_t dport) { struct pf_rule *r = NULL; + if (direction == PF_OUT) { r = pf_match_translation(pd, m, off, direction, kif, saddr, sport, daddr, dport, PF_RULESET_BINAT); @@ -2474,6 +2574,17 @@ pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off, int direction, } if (r != NULL) { + struct pf_addr *naddr; + u_int16_t *nport; + + if (pf_state_key_setup(pd, r, skw, sks, skp, nkp, + saddr, daddr, sport, dport)) + return r; + + /* XXX We only modify one side for now. */ + naddr = &(*nkp)->addr[1]; + nport = &(*nkp)->port[1]; + switch (r->action) { case PF_NONAT: case PF_NOBINAT: @@ -2649,7 +2760,8 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) case AF_INET: inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport); if (inp == NULL) { - inp = in_pcblookup_listen(tb, daddr->v4, dport, 0); + inp = in_pcblookup_listen(tb, daddr->v4, dport, 0, + NULL); if (inp == NULL) return (-1); } @@ -2660,7 +2772,8 @@ pf_socket_lookup(int direction, struct pf_pdesc *pd) inp = in6_pcbhashlookup(tb, &saddr->v6, sport, &daddr->v6, dport); if (inp == NULL) { - inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0); + inp = in6_pcblookup_listen(tb, &daddr->v6, dport, 0, + NULL); if (inp == NULL) return (-1); } @@ -2814,7 +2927,7 @@ pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) s->rt_kif = NULL; if (!r->rt || r->rt == PF_FASTROUTE) return; - switch (s->state_key->af) { + switch (s->key[PF_SK_WIRE]->af) { #ifdef INET case AF_INET: pf_map_addr(AF_INET, r, saddr, &s->rt_addr, NULL, @@ -2832,54 +2945,6 @@ pf_set_rt_ifp(struct pf_state *s, struct pf_addr *saddr) } } -void -pf_attach_state(struct pf_state_key *sk, struct pf_state *s, int tail) -{ - s->state_key = sk; - sk->refcnt++; - - /* list is sorted, if-bound states before floating */ - if (tail) - TAILQ_INSERT_TAIL(&sk->states, s, next); - else - TAILQ_INSERT_HEAD(&sk->states, s, next); -} - -void -pf_detach_state(struct pf_state *s, int flags) -{ - struct pf_state_key *sk = s->state_key; - - if (sk == NULL) - return; - - s->state_key = NULL; - TAILQ_REMOVE(&sk->states, s, next); - if (--sk->refcnt == 0) { - if (!(flags & PF_DT_SKIP_EXTGWY)) - RB_REMOVE(pf_state_tree_ext_gwy, - &pf_statetbl_ext_gwy, sk); - if (!(flags & PF_DT_SKIP_LANEXT)) - RB_REMOVE(pf_state_tree_lan_ext, - &pf_statetbl_lan_ext, sk); - pool_put(&pf_state_key_pl, sk); - } -} - -struct pf_state_key * -pf_alloc_state_key(struct pf_state *s) -{ - struct pf_state_key *sk; - - if ((sk = pool_get(&pf_state_key_pl, PR_NOWAIT)) == NULL) - return (NULL); - bzero(sk, sizeof(*sk)); - TAILQ_INIT(&sk->states); - pf_attach_state(sk, s, 0); - - return (sk); -} - u_int32_t pf_tcp_iss(struct pf_pdesc *pd) { @@ -2887,9 +2952,10 @@ pf_tcp_iss(struct pf_pdesc *pd) u_int32_t digest[4]; if (pf_tcp_secret_init == 0) { - arc4random_bytes(pf_tcp_secret, sizeof(pf_tcp_secret)); + arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); MD5Init(&pf_tcp_secret_ctx); - MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, sizeof(pf_tcp_secret)); + MD5Update(&pf_tcp_secret_ctx, pf_tcp_secret, + sizeof(pf_tcp_secret)); pf_tcp_secret_init = 1; } ctx = pf_tcp_secret_ctx; @@ -2916,29 +2982,30 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, { struct pf_rule *nr = NULL; struct pf_addr *saddr = pd->src, *daddr = pd->dst; - u_int16_t bport, nport = 0; sa_family_t af = pd->af; struct pf_rule *r, *a = NULL; struct pf_ruleset *ruleset = NULL; struct pf_src_node *nsn = NULL; struct tcphdr *th = pd->hdr.tcp; + struct pf_state_key *skw = NULL, *sks = NULL; + struct pf_state_key *sk = NULL, *nk = NULL; u_short reason; int rewrite = 0, hdrlen = 0; int tag = -1, rtableid = -1; int asd = 0; int match = 0; int state_icmp = 0; - u_int16_t mss = tcp_mssdflt; u_int16_t sport, dport; + u_int16_t nport = 0, bport = 0; + u_int16_t bproto_sum = 0, bip_sum; u_int8_t icmptype = 0, icmpcode = 0; + if (direction == PF_IN && pf_check_congestion(ifq)) { REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); } - sport = dport = hdrlen = 0; - switch (pd->proto) { case IPPROTO_TCP: sport = th->th_sport; @@ -2955,6 +3022,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (pd->af != AF_INET) break; sport = dport = pd->hdr.icmp->icmp_id; + hdrlen = sizeof(*pd->hdr.icmp); icmptype = pd->hdr.icmp->icmp_type; icmpcode = pd->hdr.icmp->icmp_code; @@ -2968,7 +3036,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, #endif /* INET */ #ifdef INET6 case IPPROTO_ICMPV6: - if (pd->af != AF_INET6) + if (af != AF_INET6) break; sport = dport = pd->hdr.icmp6->icmp6_id; hdrlen = sizeof(*pd->hdr.icmp6); @@ -2982,122 +3050,142 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, state_icmp++; break; #endif /* INET6 */ + default: + sport = dport = hdrlen = 0; + break; } r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr); - if (direction == PF_OUT) { - bport = nport = sport; - /* check outgoing packet for BINAT/NAT */ - if ((nr = pf_get_translation(pd, m, off, PF_OUT, kif, &nsn, - saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) { - PF_ACPY(&pd->baddr, saddr, af); - switch (pd->proto) { - case IPPROTO_TCP: + bport = nport = sport; + /* check packet for BINAT/NAT/RDR */ + if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, + &skw, &sks, &sk, &nk, saddr, daddr, sport, dport)) != NULL) { + if (nk == NULL || sk == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto cleanup; + } + + if (pd->ip_sum) + bip_sum = *pd->ip_sum; + + switch (pd->proto) { + case IPPROTO_TCP: + bproto_sum = th->th_sum; + pd->proto_sum = &th->th_sum; + + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || + nk->port[pd->sidx] != sport) { pf_change_ap(saddr, &th->th_sport, pd->ip_sum, - &th->th_sum, &pd->naddr, nport, 0, af); + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, af); + pd->sport = &th->th_sport; sport = th->th_sport; - rewrite++; - break; - case IPPROTO_UDP: + } + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || + nk->port[pd->didx] != dport) { + pf_change_ap(daddr, &th->th_dport, pd->ip_sum, + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, af); + dport = th->th_dport; + pd->dport = &th->th_dport; + } + rewrite++; + break; + case IPPROTO_UDP: + bproto_sum = pd->hdr.udp->uh_sum; + pd->proto_sum = &pd->hdr.udp->uh_sum; + + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) || + nk->port[pd->sidx] != sport) { pf_change_ap(saddr, &pd->hdr.udp->uh_sport, pd->ip_sum, &pd->hdr.udp->uh_sum, - &pd->naddr, nport, 1, af); + &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, af); sport = pd->hdr.udp->uh_sport; - rewrite++; - break; -#ifdef INET - case IPPROTO_ICMP: - pf_change_a(&saddr->v4.s_addr, pd->ip_sum, - pd->naddr.v4.s_addr, 0); - pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( - pd->hdr.icmp->icmp_cksum, sport, nport, 0); - pd->hdr.icmp->icmp_id = nport; - m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); - break; -#endif /* INET */ -#ifdef INET6 - case IPPROTO_ICMPV6: - pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, - &pd->naddr, 0); - rewrite++; - break; -#endif /* INET */ - default: - switch (af) { -#ifdef INET - case AF_INET: - pf_change_a(&saddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - PF_ACPY(saddr, &pd->naddr, af); - break; -#endif /* INET */ - } - break; + pd->sport = &pd->hdr.udp->uh_sport; } - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; - } - } else { - bport = nport = dport; - /* check incoming packet for BINAT/RDR */ - if ((nr = pf_get_translation(pd, m, off, PF_IN, kif, &nsn, - saddr, sport, daddr, dport, &pd->naddr, &nport)) != NULL) { - PF_ACPY(&pd->baddr, daddr, af); - switch (pd->proto) { - case IPPROTO_TCP: - pf_change_ap(daddr, &th->th_dport, pd->ip_sum, - &th->th_sum, &pd->naddr, nport, 0, af); - dport = th->th_dport; - rewrite++; - break; - case IPPROTO_UDP: + if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) || + nk->port[pd->didx] != dport) { pf_change_ap(daddr, &pd->hdr.udp->uh_dport, pd->ip_sum, &pd->hdr.udp->uh_sum, - &pd->naddr, nport, 1, af); + &nk->addr[pd->didx], + nk->port[pd->didx], 1, af); dport = pd->hdr.udp->uh_dport; - rewrite++; - break; + pd->dport = &pd->hdr.udp->uh_dport; + } + rewrite++; + break; #ifdef INET - case IPPROTO_ICMP: + case IPPROTO_ICMP: + nk->port[0] = nk->port[1]; + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET)) + pf_change_a(&saddr->v4.s_addr, pd->ip_sum, + nk->addr[pd->sidx].v4.s_addr, 0); + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, - pd->naddr.v4.s_addr, 0); - break; + nk->addr[pd->didx].v4.s_addr, 0); + + if (nk->port[1] != pd->hdr.icmp->icmp_id) { + pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( + pd->hdr.icmp->icmp_cksum, sport, + nk->port[1], 0); + pd->hdr.icmp->icmp_id = nk->port[1]; + pd->sport = &pd->hdr.icmp->icmp_id; + } + m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); + break; #endif /* INET */ #ifdef INET6 - case IPPROTO_ICMPV6: + case IPPROTO_ICMPV6: + nk->port[0] = nk->port[1]; + if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6)) + pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum, + &nk->addr[pd->sidx], 0); + + if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, - &pd->naddr, 0); - rewrite++; - break; -#endif /* INET6 */ - default: - switch (af) { + &nk->addr[pd->didx], 0); + rewrite++; + break; +#endif /* INET */ + default: + switch (af) { #ifdef INET - case AF_INET: + case AF_INET: + if (PF_ANEQ(saddr, + &nk->addr[pd->sidx], AF_INET)) + pf_change_a(&saddr->v4.s_addr, + pd->ip_sum, + nk->addr[pd->sidx].v4.s_addr, 0); + + if (PF_ANEQ(daddr, + &nk->addr[pd->didx], AF_INET)) pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, pd->naddr.v4.s_addr, 0); - break; + pd->ip_sum, + nk->addr[pd->didx].v4.s_addr, 0); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - PF_ACPY(daddr, &pd->naddr, af); - break; -#endif /* INET */ - } + case AF_INET6: + if (PF_ANEQ(saddr, + &nk->addr[pd->sidx], AF_INET6)) + PF_ACPY(saddr, &nk->addr[pd->sidx], af); + + if (PF_ANEQ(daddr, + &nk->addr[pd->didx], AF_INET6)) + PF_ACPY(saddr, &nk->addr[pd->didx], af); break; +#endif /* INET */ } - - if (nr->natpass) - r = NULL; - pd->nat_rule = nr; + break; } + if (nr->natpass) + r = NULL; + pd->nat_rule = nr; } while (r != NULL) { @@ -3149,8 +3237,8 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1], pd->lookup.gid)) r = TAILQ_NEXT(r, entries); - else if (r->prob && r->prob <= - (arc4random() % (UINT_MAX - 1) + 1)) + else if (r->prob && + r->prob <= arc4random_uniform(UINT_MAX - 1) + 1) r = TAILQ_NEXT(r, entries); else if (r->match_tag && !pf_match_tag(m, r, &tag)) r = TAILQ_NEXT(r, entries); @@ -3199,77 +3287,17 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, (r->rule_flag & PFRULE_RETURN))) { /* undo NAT changes, if they have taken place */ if (nr != NULL) { - if (direction == PF_OUT) { - switch (pd->proto) { - case IPPROTO_TCP: - pf_change_ap(saddr, &th->th_sport, - pd->ip_sum, &th->th_sum, - &pd->baddr, bport, 0, af); - sport = th->th_sport; - rewrite++; - break; - case IPPROTO_UDP: - pf_change_ap(saddr, - &pd->hdr.udp->uh_sport, pd->ip_sum, - &pd->hdr.udp->uh_sum, &pd->baddr, - bport, 1, af); - sport = pd->hdr.udp->uh_sport; - rewrite++; - break; - case IPPROTO_ICMP: -#ifdef INET6 - case IPPROTO_ICMPV6: -#endif - /* nothing! */ - break; - default: - switch (af) { - case AF_INET: - pf_change_a(&saddr->v4.s_addr, - pd->ip_sum, - pd->baddr.v4.s_addr, 0); - break; - case AF_INET6: - PF_ACPY(saddr, &pd->baddr, af); - break; - } - } - } else { - switch (pd->proto) { - case IPPROTO_TCP: - pf_change_ap(daddr, &th->th_dport, - pd->ip_sum, &th->th_sum, - &pd->baddr, bport, 0, af); - dport = th->th_dport; - rewrite++; - break; - case IPPROTO_UDP: - pf_change_ap(daddr, - &pd->hdr.udp->uh_dport, pd->ip_sum, - &pd->hdr.udp->uh_sum, &pd->baddr, - bport, 1, af); - dport = pd->hdr.udp->uh_dport; - rewrite++; - break; - case IPPROTO_ICMP: -#ifdef INET6 - case IPPROTO_ICMPV6: -#endif - /* nothing! */ - break; - default: - switch (af) { - case AF_INET: - pf_change_a(&daddr->v4.s_addr, - pd->ip_sum, - pd->baddr.v4.s_addr, 0); - break; - case AF_INET6: - PF_ACPY(daddr, &pd->baddr, af); - break; - } - } - } + PF_ACPY(saddr, &sk->addr[pd->sidx], af); + PF_ACPY(daddr, &sk->addr[pd->didx], af); + if (pd->sport) + *pd->sport = sk->port[pd->sidx]; + if (pd->dport) + *pd->dport = sk->port[pd->didx]; + if (pd->proto_sum) + *pd->proto_sum = bproto_sum; + if (pd->ip_sum) + *pd->ip_sum = bip_sum; + m_copyback(m, off, hdrlen, pd->hdr.any); } if (pd->proto == IPPROTO_TCP && ((r->rule_flag & PFRULE_RETURNRST) || @@ -3314,268 +3342,250 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, } if (r->action == PF_DROP) - return (PF_DROP); + goto cleanup; if (pf_tag_packet(m, tag, rtableid)) { REASON_SET(&reason, PFRES_MEMORY); - return (PF_DROP); + goto cleanup; } if (!state_icmp && (r->keep_state || nr != NULL || (pd->flags & PFDESC_TCP_NORM))) { - /* create new state */ - struct pf_state *s = NULL; - struct pf_state_key *sk = NULL; - struct pf_src_node *sn = NULL; - - /* check maximums */ - if (r->max_states && (r->states >= r->max_states)) { - pf_status.lcounters[LCNT_STATES]++; - REASON_SET(&reason, PFRES_MAXSTATES); - goto cleanup; - } - /* src node for filter rule */ - if ((r->rule_flag & PFRULE_SRCTRACK || - r->rpool.opts & PF_POOL_STICKYADDR) && - pf_insert_src_node(&sn, r, saddr, af) != 0) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - /* src node for translation rule */ - if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && - ((direction == PF_OUT && - pf_insert_src_node(&nsn, nr, &pd->baddr, af) != 0) || - (pf_insert_src_node(&nsn, nr, saddr, af) != 0))) { - REASON_SET(&reason, PFRES_SRCLIMIT); - goto cleanup; - } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - REASON_SET(&reason, PFRES_MEMORY); + int action; + action = pf_create_state(r, nr, a, pd, nsn, skw, sks, nk, sk, m, + off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, + bip_sum, hdrlen); + if (action != PF_PASS) + return (action); + } + + /* copy back packet headers if we performed NAT operations */ + if (rewrite) + m_copyback(m, off, hdrlen, pd->hdr.any); + + return (PF_PASS); + cleanup: - if (sn != NULL && sn->states == 0 && sn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, sn); - } - if (nsn != sn && nsn != NULL && nsn->states == 0 && - nsn->expire == 0) { - RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); - pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; - pf_status.src_nodes--; - pool_put(&pf_src_tree_pl, nsn); - } - if (sk != NULL) { - pool_put(&pf_state_key_pl, sk); - } - return (PF_DROP); - } - bzero(s, sizeof(*s)); - s->rule.ptr = r; - s->nat_rule.ptr = nr; - s->anchor.ptr = a; - STATE_INC_COUNTERS(s); - s->allow_opts = r->allow_opts; - s->log = r->log & PF_LOG_ALL; - if (nr != NULL) - s->log |= nr->log & PF_LOG_ALL; - switch (pd->proto) { - case IPPROTO_TCP: - s->src.seqlo = ntohl(th->th_seq); - s->src.seqhi = s->src.seqlo + pd->p_len + 1; - if ((th->th_flags & (TH_SYN|TH_ACK)) == - TH_SYN && r->keep_state == PF_STATE_MODULATE) { - /* Generate sequence number modulator */ - if ((s->src.seqdiff = pf_tcp_iss(pd) - - s->src.seqlo) == 0) - s->src.seqdiff = 1; - pf_change_a(&th->th_seq, &th->th_sum, - htonl(s->src.seqlo + s->src.seqdiff), 0); - rewrite = 1; - } else - s->src.seqdiff = 0; - if (th->th_flags & TH_SYN) { - s->src.seqhi++; - s->src.wscale = pf_get_wscale(m, off, - th->th_off, af); - } - s->src.max_win = MAX(ntohs(th->th_win), 1); - if (s->src.wscale & PF_WSCALE_MASK) { - /* Remove scale factor from initial window */ - int win = s->src.max_win; - win += 1 << (s->src.wscale & PF_WSCALE_MASK); - s->src.max_win = (win - 1) >> - (s->src.wscale & PF_WSCALE_MASK); - } - if (th->th_flags & TH_FIN) - s->src.seqhi++; - s->dst.seqhi = 1; - s->dst.max_win = 1; - s->src.state = TCPS_SYN_SENT; - s->dst.state = TCPS_CLOSED; - s->timeout = PFTM_TCP_FIRST_PACKET; - break; - case IPPROTO_UDP: - s->src.state = PFUDPS_SINGLE; - s->dst.state = PFUDPS_NO_TRAFFIC; - s->timeout = PFTM_UDP_FIRST_PACKET; - break; - case IPPROTO_ICMP: -#ifdef INET6 - case IPPROTO_ICMPV6: -#endif - s->timeout = PFTM_ICMP_FIRST_PACKET; - break; - default: - s->src.state = PFOTHERS_SINGLE; - s->dst.state = PFOTHERS_NO_TRAFFIC; - s->timeout = PFTM_OTHER_FIRST_PACKET; - } + if (sk != NULL) + pool_put(&pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&pf_state_key_pl, nk); + return (PF_DROP); +} - s->creation = time_second; - s->expire = time_second; +static __inline int +pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a, + struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *skw, + struct pf_state_key *sks, struct pf_state_key *nk, struct pf_state_key *sk, + struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, + struct pfi_kif *kif, struct pf_state **sm, int tag, u_int16_t bproto_sum, + u_int16_t bip_sum, int hdrlen) +{ + struct pf_state *s = NULL; + struct pf_src_node *sn = NULL; + struct tcphdr *th = pd->hdr.tcp; + u_int16_t mss = tcp_mssdflt; + u_short reason; - if (sn != NULL) { - s->src_node = sn; - s->src_node->states++; - } - if (nsn != NULL) { - PF_ACPY(&nsn->raddr, &pd->naddr, af); - s->nat_src_node = nsn; - s->nat_src_node->states++; - } - if (pd->proto == IPPROTO_TCP) { - if ((pd->flags & PFDESC_TCP_NORM) && - pf_normalize_tcp_init(m, off, pd, th, &s->src, - &s->dst)) { - REASON_SET(&reason, PFRES_MEMORY); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } - if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && - pf_normalize_tcp_stateful(m, off, pd, &reason, - th, s, &s->src, &s->dst, &rewrite)) { - /* This really shouldn't happen!!! */ - DPFPRINTF(PF_DEBUG_URGENT, - ("pf_normalize_tcp_stateful failed on " - "first pkt")); - pf_normalize_tcp_cleanup(s); - pf_src_tree_remove_state(s); - STATE_DEC_COUNTERS(s); - pool_put(&pf_state_pl, s); - return (PF_DROP); - } + /* check maximums */ + if (r->max_states && (r->states_cur >= r->max_states)) { + pf_status.lcounters[LCNT_STATES]++; + REASON_SET(&reason, PFRES_MAXSTATES); + return (PF_DROP); + } + /* src node for filter rule */ + if ((r->rule_flag & PFRULE_SRCTRACK || + r->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) { + REASON_SET(&reason, PFRES_SRCLIMIT); + goto csfailed; + } + /* src node for translation rule */ + if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) && + pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) { + REASON_SET(&reason, PFRES_SRCLIMIT); + goto csfailed; + } + s = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); + if (s == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto csfailed; + } + s->rule.ptr = r; + s->nat_rule.ptr = nr; + s->anchor.ptr = a; + STATE_INC_COUNTERS(s); + if (r->allow_opts) + s->state_flags |= PFSTATE_ALLOWOPTS; + if (r->rule_flag & PFRULE_STATESLOPPY) + s->state_flags |= PFSTATE_SLOPPY; + s->log = r->log & PF_LOG_ALL; + if (nr != NULL) + s->log |= nr->log & PF_LOG_ALL; + switch (pd->proto) { + case IPPROTO_TCP: + s->src.seqlo = ntohl(th->th_seq); + s->src.seqhi = s->src.seqlo + pd->p_len + 1; + if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && + r->keep_state == PF_STATE_MODULATE) { + /* Generate sequence number modulator */ + if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) == + 0) + s->src.seqdiff = 1; + pf_change_a(&th->th_seq, &th->th_sum, + htonl(s->src.seqlo + s->src.seqdiff), 0); + *rewrite = 1; + } else + s->src.seqdiff = 0; + if (th->th_flags & TH_SYN) { + s->src.seqhi++; + s->src.wscale = pf_get_wscale(m, off, + th->th_off, pd->af); } - - if ((sk = pf_alloc_state_key(s)) == NULL) { - REASON_SET(&reason, PFRES_MEMORY); - goto cleanup; + s->src.max_win = MAX(ntohs(th->th_win), 1); + if (s->src.wscale & PF_WSCALE_MASK) { + /* Remove scale factor from initial window */ + int win = s->src.max_win; + win += 1 << (s->src.wscale & PF_WSCALE_MASK); + s->src.max_win = (win - 1) >> + (s->src.wscale & PF_WSCALE_MASK); } - - sk->proto = pd->proto; - sk->direction = direction; - sk->af = af; - if (direction == PF_OUT) { - PF_ACPY(&sk->gwy.addr, saddr, af); - PF_ACPY(&sk->ext.addr, daddr, af); - switch (pd->proto) { - case IPPROTO_ICMP: -#ifdef INET6 - case IPPROTO_ICMPV6: -#endif - sk->gwy.port = nport; - sk->ext.port = 0; - break; - default: - sk->gwy.port = sport; - sk->ext.port = dport; - } - if (nr != NULL) { - PF_ACPY(&sk->lan.addr, &pd->baddr, af); - sk->lan.port = bport; - } else { - PF_ACPY(&sk->lan.addr, &sk->gwy.addr, af); - sk->lan.port = sk->gwy.port; - } - } else { - PF_ACPY(&sk->lan.addr, daddr, af); - PF_ACPY(&sk->ext.addr, saddr, af); - switch (pd->proto) { - case IPPROTO_ICMP: + if (th->th_flags & TH_FIN) + s->src.seqhi++; + s->dst.seqhi = 1; + s->dst.max_win = 1; + s->src.state = TCPS_SYN_SENT; + s->dst.state = TCPS_CLOSED; + s->timeout = PFTM_TCP_FIRST_PACKET; + break; + case IPPROTO_UDP: + s->src.state = PFUDPS_SINGLE; + s->dst.state = PFUDPS_NO_TRAFFIC; + s->timeout = PFTM_UDP_FIRST_PACKET; + break; + case IPPROTO_ICMP: #ifdef INET6 - case IPPROTO_ICMPV6: + case IPPROTO_ICMPV6: #endif - sk->lan.port = nport; - sk->ext.port = 0; - break; - default: - sk->lan.port = dport; - sk->ext.port = sport; - } - if (nr != NULL) { - PF_ACPY(&sk->gwy.addr, &pd->baddr, af); - sk->gwy.port = bport; - } else { - PF_ACPY(&sk->gwy.addr, &sk->lan.addr, af); - sk->gwy.port = sk->lan.port; - } - } + s->timeout = PFTM_ICMP_FIRST_PACKET; + break; + default: + s->src.state = PFOTHERS_SINGLE; + s->dst.state = PFOTHERS_NO_TRAFFIC; + s->timeout = PFTM_OTHER_FIRST_PACKET; + } - pf_set_rt_ifp(s, saddr); /* needs s->state_key set */ + s->creation = time_second; + s->expire = time_second; - if (pf_insert_state(BOUND_IFACE(r, kif), s)) { - if (pd->proto == IPPROTO_TCP) - pf_normalize_tcp_cleanup(s); - REASON_SET(&reason, PFRES_STATEINS); + if (sn != NULL) { + s->src_node = sn; + s->src_node->states++; + } + if (nsn != NULL) { + /* XXX We only modify one side for now. */ + PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af); + s->nat_src_node = nsn; + s->nat_src_node->states++; + } + if (pd->proto == IPPROTO_TCP) { + if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m, + off, pd, th, &s->src, &s->dst)) { + REASON_SET(&reason, PFRES_MEMORY); pf_src_tree_remove_state(s); STATE_DEC_COUNTERS(s); pool_put(&pf_state_pl, s); return (PF_DROP); - } else - *sm = s; - if (tag > 0) { - pf_tag_ref(tag); - s->tag = tag; } - if (pd->proto == IPPROTO_TCP && - (th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && - r->keep_state == PF_STATE_SYNPROXY) { - s->src.state = PF_TCPS_PROXY_SRC; - if (nr != NULL) { - if (direction == PF_OUT) { - pf_change_ap(saddr, &th->th_sport, - pd->ip_sum, &th->th_sum, &pd->baddr, - bport, 0, af); - sport = th->th_sport; - } else { - pf_change_ap(daddr, &th->th_dport, - pd->ip_sum, &th->th_sum, &pd->baddr, - bport, 0, af); - sport = th->th_dport; - } - } - s->src.seqhi = htonl(arc4random()); - /* Find mss option */ - mss = pf_get_mss(m, off, th->th_off, af); - mss = pf_calc_mss(saddr, af, mss); - mss = pf_calc_mss(daddr, af, mss); - s->src.mss = mss; - pf_send_tcp(r, af, daddr, saddr, th->th_dport, - th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); - REASON_SET(&reason, PFRES_SYNPROXY); - return (PF_SYNPROXY_DROP); + if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub && + pf_normalize_tcp_stateful(m, off, pd, &reason, th, s, + &s->src, &s->dst, rewrite)) { + /* This really shouldn't happen!!! */ + DPFPRINTF(PF_DEBUG_URGENT, + ("pf_normalize_tcp_stateful failed on first pkt")); + pf_normalize_tcp_cleanup(s); + pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); } } + s->direction = pd->dir; - /* copy back packet headers if we performed NAT operations */ - if (rewrite) - m_copyback(m, off, hdrlen, pd->hdr.any); + if (sk == NULL && pf_state_key_setup(pd, nr, &skw, &sks, &sk, &nk, + pd->src, pd->dst, sport, dport)) + goto csfailed; + + if (pf_state_insert(BOUND_IFACE(r, kif), skw, sks, s)) { + if (pd->proto == IPPROTO_TCP) + pf_normalize_tcp_cleanup(s); + REASON_SET(&reason, PFRES_STATEINS); + pf_src_tree_remove_state(s); + STATE_DEC_COUNTERS(s); + pool_put(&pf_state_pl, s); + return (PF_DROP); + } else + *sm = s; + + pf_set_rt_ifp(s, pd->src); /* needs s->state_key set */ + if (tag > 0) { + pf_tag_ref(tag); + s->tag = tag; + } + if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == + TH_SYN && r->keep_state == PF_STATE_SYNPROXY) { + s->src.state = PF_TCPS_PROXY_SRC; + /* undo NAT changes, if they have taken place */ + if (nr != NULL) { + struct pf_state_key *skt = s->key[PF_SK_WIRE]; + if (pd->dir == PF_OUT) + skt = s->key[PF_SK_STACK]; + PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af); + PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af); + if (pd->sport) + *pd->sport = skt->port[pd->sidx]; + if (pd->dport) + *pd->dport = skt->port[pd->didx]; + if (pd->proto_sum) + *pd->proto_sum = bproto_sum; + if (pd->ip_sum) + *pd->ip_sum = bip_sum; + m_copyback(m, off, hdrlen, pd->hdr.any); + } + s->src.seqhi = htonl(arc4random()); + /* Find mss option */ + mss = pf_get_mss(m, off, th->th_off, pd->af); + mss = pf_calc_mss(pd->src, pd->af, mss); + mss = pf_calc_mss(pd->dst, pd->af, mss); + s->src.mss = mss; + pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, + th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL, NULL); + REASON_SET(&reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } return (PF_PASS); + +csfailed: + if (sk != NULL) + pool_put(&pf_state_key_pl, sk); + if (nk != NULL) + pool_put(&pf_state_key_pl, nk); + + if (sn != NULL && sn->states == 0 && sn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, sn); + } + if (nsn != sn && nsn != NULL && nsn->states == 0 && nsn->expire == 0) { + RB_REMOVE(pf_src_tree, &tree_src_tracking, nsn); + pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; + pf_status.src_nodes--; + pool_put(&pf_src_tree_pl, nsn); + } + return (PF_DROP); } int @@ -3666,147 +3676,15 @@ pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif, } int -pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, - struct mbuf *m, int off, void *h, struct pf_pdesc *pd, - u_short *reason) +pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst, + struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off, + struct pf_pdesc *pd, u_short *reason, int *copyback) { - struct pf_state_key_cmp key; struct tcphdr *th = pd->hdr.tcp; u_int16_t win = ntohs(th->th_win); u_int32_t ack, end, seq, orig_seq; u_int8_t sws, dws; int ackskew; - int copyback = 0; - struct pf_state_peer *src, *dst; - - key.af = pd->af; - key.proto = IPPROTO_TCP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = th->th_sport; - key.gwy.port = th->th_dport; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = th->th_sport; - key.ext.port = th->th_dport; - } - - STATE_LOOKUP(); - - if (direction == (*state)->state_key->direction) { - src = &(*state)->src; - dst = &(*state)->dst; - } else { - src = &(*state)->dst; - dst = &(*state)->src; - } - - if ((*state)->src.state == PF_TCPS_PROXY_SRC) { - if (direction != (*state)->state_key->direction) { - REASON_SET(reason, PFRES_SYNPROXY); - return (PF_SYNPROXY_DROP); - } - if (th->th_flags & TH_SYN) { - if (ntohl(th->th_seq) != (*state)->src.seqlo) { - REASON_SET(reason, PFRES_SYNPROXY); - return (PF_DROP); - } - pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, - pd->src, th->th_dport, th->th_sport, - (*state)->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, - 0, NULL, NULL); - REASON_SET(reason, PFRES_SYNPROXY); - return (PF_SYNPROXY_DROP); - } else if (!(th->th_flags & TH_ACK) || - (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || - (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { - REASON_SET(reason, PFRES_SYNPROXY); - return (PF_DROP); - } else if ((*state)->src_node != NULL && - pf_src_connlimit(state)) { - REASON_SET(reason, PFRES_SRCLIMIT); - return (PF_DROP); - } else - (*state)->src.state = PF_TCPS_PROXY_DST; - } - if ((*state)->src.state == PF_TCPS_PROXY_DST) { - struct pf_state_host *src, *dst; - - if (direction == PF_OUT) { - src = &(*state)->state_key->gwy; - dst = &(*state)->state_key->ext; - } else { - src = &(*state)->state_key->ext; - dst = &(*state)->state_key->lan; - } - if (direction == (*state)->state_key->direction) { - if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || - (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || - (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { - REASON_SET(reason, PFRES_SYNPROXY); - return (PF_DROP); - } - (*state)->src.max_win = MAX(ntohs(th->th_win), 1); - if ((*state)->dst.seqhi == 1) - (*state)->dst.seqhi = htonl(arc4random()); - pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, - &dst->addr, src->port, dst->port, - (*state)->dst.seqhi, 0, TH_SYN, 0, - (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); - REASON_SET(reason, PFRES_SYNPROXY); - return (PF_SYNPROXY_DROP); - } else if (((th->th_flags & (TH_SYN|TH_ACK)) != - (TH_SYN|TH_ACK)) || - (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { - REASON_SET(reason, PFRES_SYNPROXY); - return (PF_DROP); - } else { - (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); - (*state)->dst.seqlo = ntohl(th->th_seq); - pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, - pd->src, th->th_dport, th->th_sport, - ntohl(th->th_ack), ntohl(th->th_seq) + 1, - TH_ACK, (*state)->src.max_win, 0, 0, 0, - (*state)->tag, NULL, NULL); - pf_send_tcp((*state)->rule.ptr, pd->af, &src->addr, - &dst->addr, src->port, dst->port, - (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, - TH_ACK, (*state)->dst.max_win, 0, 0, 1, - 0, NULL, NULL); - (*state)->src.seqdiff = (*state)->dst.seqhi - - (*state)->src.seqlo; - (*state)->dst.seqdiff = (*state)->src.seqhi - - (*state)->dst.seqlo; - (*state)->src.seqhi = (*state)->src.seqlo + - (*state)->dst.max_win; - (*state)->dst.seqhi = (*state)->dst.seqlo + - (*state)->src.max_win; - (*state)->src.wscale = (*state)->dst.wscale = 0; - (*state)->src.state = (*state)->dst.state = - TCPS_ESTABLISHED; - REASON_SET(reason, PFRES_SYNPROXY); - return (PF_SYNPROXY_DROP); - } - } - - if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && - dst->state >= TCPS_FIN_WAIT_2 && - src->state >= TCPS_FIN_WAIT_2) { - if (pf_status.debug >= PF_DEBUG_MISC) { - printf("pf: state reuse "); - pf_print_state(*state); - pf_print_flags(th->th_flags); - printf("\n"); - } - /* XXX make sure it's the same direction ?? */ - (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; - pf_unlink_state(*state); - *state = NULL; - return (PF_DROP); - } if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { sws = src->wscale & PF_WSCALE_MASK; @@ -3841,7 +3719,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); - copyback = 1; + *copyback = 1; } else { ack = ntohl(th->th_ack); } @@ -3893,7 +3771,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, pf_change_a(&th->th_seq, &th->th_sum, htonl(seq + src->seqdiff), 0); pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0); - copyback = 1; + *copyback = 1; } end = seq + pd->p_len; if (th->th_flags & TH_SYN) @@ -3939,7 +3817,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, */ if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { if (pf_modulate_sack(m, off, pd, th, dst)) - copyback = 1; + *copyback = 1; } @@ -3959,7 +3837,7 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, - *state, src, dst, ©back)) + *state, src, dst, copyback)) return (PF_DROP); } @@ -4052,14 +3930,13 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, ackskew, (*state)->packets[0], (*state)->packets[1], - direction == PF_IN ? "in" : "out", - direction == (*state)->state_key->direction ? - "fwd" : "rev"); + pd->dir == PF_IN ? "in" : "out", + pd->dir == (*state)->direction ? "fwd" : "rev"); } if (dst->scrub || src->scrub) { if (pf_normalize_tcp_stateful(m, off, pd, reason, th, - *state, src, dst, ©back)) + *state, src, dst, copyback)) return (PF_DROP); } @@ -4108,9 +3985,8 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, pd->p_len, ackskew, (*state)->packets[0], (*state)->packets[1], - direction == PF_IN ? "in" : "out", - direction == (*state)->state_key->direction ? - "fwd" : "rev"); + pd->dir == PF_IN ? "in" : "out", + pd->dir == (*state)->direction ? "fwd" : "rev"); printf("pf: State failure on: %c %c %c %c | %c %c\n", SEQ_GEQ(src->seqhi, end) ? ' ' : '1', SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? @@ -4124,24 +4000,246 @@ pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, return (PF_DROP); } - /* Any packets which have gotten here are to be passed */ + return (PF_PASS); +} + +int +pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst, + struct pf_state **state, struct pf_pdesc *pd, u_short *reason) +{ + struct tcphdr *th = pd->hdr.tcp; + + if (th->th_flags & TH_SYN) + if (src->state < TCPS_SYN_SENT) + src->state = TCPS_SYN_SENT; + if (th->th_flags & TH_FIN) + if (src->state < TCPS_CLOSING) + src->state = TCPS_CLOSING; + if (th->th_flags & TH_ACK) { + if (dst->state == TCPS_SYN_SENT) { + dst->state = TCPS_ESTABLISHED; + if (src->state == TCPS_ESTABLISHED && + (*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } + } else if (dst->state == TCPS_CLOSING) { + dst->state = TCPS_FIN_WAIT_2; + } else if (src->state == TCPS_SYN_SENT && + dst->state < TCPS_SYN_SENT) { + /* + * Handle a special sloppy case where we only see one + * half of the connection. If there is a ACK after + * the initial SYN without ever seeing a packet from + * the destination, set the connection to established. + */ + dst->state = src->state = TCPS_ESTABLISHED; + if ((*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } + } else if (src->state == TCPS_CLOSING && + dst->state == TCPS_ESTABLISHED && + dst->seqlo == 0) { + /* + * Handle the closing of half connections where we + * don't see the full bidirectional FIN/ACK+ACK + * handshake. + */ + dst->state = TCPS_CLOSING; + } + } + if (th->th_flags & TH_RST) + src->state = dst->state = TCPS_TIME_WAIT; + + /* update expire time */ + (*state)->expire = time_second; + if (src->state >= TCPS_FIN_WAIT_2 && + dst->state >= TCPS_FIN_WAIT_2) + (*state)->timeout = PFTM_TCP_CLOSED; + else if (src->state >= TCPS_CLOSING && + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_FIN_WAIT; + else if (src->state < TCPS_ESTABLISHED || + dst->state < TCPS_ESTABLISHED) + (*state)->timeout = PFTM_TCP_OPENING; + else if (src->state >= TCPS_CLOSING || + dst->state >= TCPS_CLOSING) + (*state)->timeout = PFTM_TCP_CLOSING; + else + (*state)->timeout = PFTM_TCP_ESTABLISHED; + + return (PF_PASS); +} + +int +pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif, + struct mbuf *m, int off, void *h, struct pf_pdesc *pd, + u_short *reason) +{ + struct pf_state_key_cmp key; + struct tcphdr *th = pd->hdr.tcp; + int copyback = 0; + struct pf_state_peer *src, *dst; + struct pf_state_key *sk; + + key.af = pd->af; + key.proto = IPPROTO_TCP; + if (direction == PF_IN) { /* wire side, straight */ + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = th->th_sport; + key.port[1] = th->th_dport; + } else { /* stack side, reverse */ + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); + key.port[1] = th->th_sport; + key.port[0] = th->th_dport; + } + + STATE_LOOKUP(kif, &key, direction, *state, m); + + if (direction == (*state)->direction) { + src = &(*state)->src; + dst = &(*state)->dst; + } else { + src = &(*state)->dst; + dst = &(*state)->src; + } + + sk = (*state)->key[pd->didx]; + + if ((*state)->src.state == PF_TCPS_PROXY_SRC) { + if (direction != (*state)->direction) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } + if (th->th_flags & TH_SYN) { + if (ntohl(th->th_seq) != (*state)->src.seqlo) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_DROP); + } + pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, + pd->src, th->th_dport, th->th_sport, + (*state)->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, + 0, NULL, NULL); + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } else if (!(th->th_flags & TH_ACK) || + (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_DROP); + } else if ((*state)->src_node != NULL && + pf_src_connlimit(state)) { + REASON_SET(reason, PFRES_SRCLIMIT); + return (PF_DROP); + } else + (*state)->src.state = PF_TCPS_PROXY_DST; + } + if ((*state)->src.state == PF_TCPS_PROXY_DST) { + if (direction == (*state)->direction) { + if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || + (ntohl(th->th_ack) != (*state)->src.seqhi + 1) || + (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_DROP); + } + (*state)->src.max_win = MAX(ntohs(th->th_win), 1); + if ((*state)->dst.seqhi == 1) + (*state)->dst.seqhi = htonl(arc4random()); + pf_send_tcp((*state)->rule.ptr, pd->af, + &sk->addr[pd->sidx], &sk->addr[pd->didx], + sk->port[pd->sidx], sk->port[pd->didx], + (*state)->dst.seqhi, 0, TH_SYN, 0, + (*state)->src.mss, 0, 0, (*state)->tag, NULL, NULL); + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } else if (((th->th_flags & (TH_SYN|TH_ACK)) != + (TH_SYN|TH_ACK)) || + (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) { + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_DROP); + } else { + (*state)->dst.max_win = MAX(ntohs(th->th_win), 1); + (*state)->dst.seqlo = ntohl(th->th_seq); + pf_send_tcp((*state)->rule.ptr, pd->af, pd->dst, + pd->src, th->th_dport, th->th_sport, + ntohl(th->th_ack), ntohl(th->th_seq) + 1, + TH_ACK, (*state)->src.max_win, 0, 0, 0, + (*state)->tag, NULL, NULL); + pf_send_tcp((*state)->rule.ptr, pd->af, + &sk->addr[pd->sidx], &sk->addr[pd->didx], + sk->port[pd->sidx], sk->port[pd->didx], + (*state)->src.seqhi + 1, (*state)->src.seqlo + 1, + TH_ACK, (*state)->dst.max_win, 0, 0, 1, + 0, NULL, NULL); + (*state)->src.seqdiff = (*state)->dst.seqhi - + (*state)->src.seqlo; + (*state)->dst.seqdiff = (*state)->src.seqhi - + (*state)->dst.seqlo; + (*state)->src.seqhi = (*state)->src.seqlo + + (*state)->dst.max_win; + (*state)->dst.seqhi = (*state)->dst.seqlo + + (*state)->src.max_win; + (*state)->src.wscale = (*state)->dst.wscale = 0; + (*state)->src.state = (*state)->dst.state = + TCPS_ESTABLISHED; + REASON_SET(reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); + } + } + + if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) && + dst->state >= TCPS_FIN_WAIT_2 && + src->state >= TCPS_FIN_WAIT_2) { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: state reuse "); + pf_print_state(*state); + pf_print_flags(th->th_flags); + printf("\n"); + } + /* XXX make sure it's the same direction ?? */ + (*state)->src.state = (*state)->dst.state = TCPS_CLOSED; + pf_unlink_state(*state); + *state = NULL; + return (PF_DROP); + } + + if ((*state)->state_flags & PFSTATE_SLOPPY) { + if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP) + return (PF_DROP); + } else { + if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason, + ©back) == PF_DROP) + return (PF_DROP); + } /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_OUT) + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || + nk->port[pd->sidx] != th->th_sport) pf_change_ap(pd->src, &th->th_sport, pd->ip_sum, - &th->th_sum, &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.port, 0, pd->af); - else + &th->th_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 0, pd->af); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || + nk->port[pd->didx] != th->th_dport) pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum, - &th->th_sum, &(*state)->state_key->lan.addr, - (*state)->state_key->lan.port, 0, pd->af); - m_copyback(m, off, sizeof(*th), th); - } else if (copyback) { - /* Copyback sequence modulation or stateful scrub changes */ - m_copyback(m, off, sizeof(*th), th); + &th->th_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 0, pd->af); + copyback = 1; } + /* Copyback sequence modulation or stateful scrub changes if needed */ + if (copyback) + m_copyback(m, off, sizeof(*th), th); + return (PF_PASS); } @@ -4155,21 +4253,21 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd->af; key.proto = IPPROTO_UDP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = uh->uh_sport; - key.gwy.port = uh->uh_dport; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = uh->uh_sport; - key.ext.port = uh->uh_dport; + if (direction == PF_IN) { /* wire side, straight */ + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = uh->uh_sport; + key.port[1] = uh->uh_dport; + } else { /* stack side, reverse */ + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); + key.port[1] = uh->uh_sport; + key.port[0] = uh->uh_dport; } - STATE_LOOKUP(); + STATE_LOOKUP(kif, &key, direction, *state, m); - if (direction == (*state)->state_key->direction) { + if (direction == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; } else { @@ -4191,15 +4289,20 @@ pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->timeout = PFTM_UDP_SINGLE; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_OUT) + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) || + nk->port[pd->sidx] != uh->uh_sport) pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum, - &uh->uh_sum, &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.port, 1, pd->af); - else + &uh->uh_sum, &nk->addr[pd->sidx], + nk->port[pd->sidx], 1, pd->af); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) || + nk->port[pd->didx] != uh->uh_dport) pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum, - &uh->uh_sum, &(*state)->state_key->lan.addr, - (*state)->state_key->lan.port, 1, pd->af); + &uh->uh_sum, &nk->addr[pd->didx], + nk->port[pd->didx], 1, pd->af); m_copyback(m, off, sizeof(*uh), uh); } @@ -4210,7 +4313,7 @@ int pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason) { - struct pf_addr *saddr = pd->src, *daddr = pd->dst; + struct pf_addr *saddr = pd->src, *daddr = pd->dst; u_int16_t icmpid, *icmpsum; u_int8_t icmptype; int state_icmp = 0; @@ -4254,84 +4357,74 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, */ key.af = pd->af; key.proto = pd->proto; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = 0; - key.gwy.port = icmpid; - } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = icmpid; - key.ext.port = 0; + key.port[0] = key.port[1] = icmpid; + if (direction == PF_IN) { /* wire side, straight */ + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + } else { /* stack side, reverse */ + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); } - STATE_LOOKUP(); + STATE_LOOKUP(kif, &key, direction, *state, m); (*state)->expire = time_second; (*state)->timeout = PFTM_ICMP_ERROR_REPLY; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_OUT) { - switch (pd->af) { + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + switch (pd->af) { #ifdef INET - case AF_INET: + case AF_INET: + if (PF_ANEQ(pd->src, + &nk->addr[pd->sidx], AF_INET)) pf_change_a(&saddr->v4.s_addr, pd->ip_sum, - (*state)->state_key->gwy.addr.v4.s_addr, 0); - pd->hdr.icmp->icmp_cksum = - pf_cksum_fixup( - pd->hdr.icmp->icmp_cksum, icmpid, - (*state)->state_key->gwy.port, 0); - pd->hdr.icmp->icmp_id = - (*state)->state_key->gwy.port; - m_copyback(m, off, ICMP_MINLEN, - pd->hdr.icmp); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - pf_change_a6(saddr, - &pd->hdr.icmp6->icmp6_cksum, - &(*state)->state_key->gwy.addr, 0); - m_copyback(m, off, - sizeof(struct icmp6_hdr), - pd->hdr.icmp6); - break; -#endif /* INET6 */ - } - } else { - switch (pd->af) { -#ifdef INET - case AF_INET: + nk->addr[pd->sidx].v4.s_addr, 0); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], + AF_INET)) pf_change_a(&daddr->v4.s_addr, pd->ip_sum, - (*state)->state_key->lan.addr.v4.s_addr, 0); + nk->addr[pd->didx].v4.s_addr, 0); + + if (nk->port[0] != + pd->hdr.icmp->icmp_id) { pd->hdr.icmp->icmp_cksum = pf_cksum_fixup( pd->hdr.icmp->icmp_cksum, icmpid, - (*state)->state_key->lan.port, 0); + nk->port[pd->sidx], 0); pd->hdr.icmp->icmp_id = - (*state)->state_key->lan.port; - m_copyback(m, off, ICMP_MINLEN, - pd->hdr.icmp); - break; + nk->port[pd->sidx]; + } + + m_copyback(m, off, ICMP_MINLEN, + pd->hdr.icmp); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: + case AF_INET6: + if (PF_ANEQ(pd->src, + &nk->addr[pd->sidx], AF_INET6)) + pf_change_a6(saddr, + &pd->hdr.icmp6->icmp6_cksum, + &nk->addr[pd->sidx], 0); + + if (PF_ANEQ(pd->dst, + &nk->addr[pd->didx], AF_INET6)) pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum, - &(*state)->state_key->lan.addr, 0); - m_copyback(m, off, - sizeof(struct icmp6_hdr), - pd->hdr.icmp6); - break; + &nk->addr[pd->didx], 0); + + m_copyback(m, off, + sizeof(struct icmp6_hdr), + pd->hdr.icmp6); + break; #endif /* INET6 */ - } } } - return (PF_PASS); } else { @@ -4352,6 +4445,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, int off2; pd2.af = pd->af; + /* Payload packet is from the opposite direction. */ + pd2.sidx = (direction == PF_IN) ? 1 : 0; + pd2.didx = (direction == PF_IN) ? 0 : 1; switch (pd->af) { #ifdef INET case AF_INET: @@ -4462,21 +4558,14 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_TCP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = th.th_dport; - key.gwy.port = th.th_sport; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = th.th_dport; - key.ext.port = th.th_sport; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[pd2.sidx] = th.th_sport; + key.port[pd2.didx] = th.th_dport; - STATE_LOOKUP(); + STATE_LOOKUP(kif, &key, direction, *state, m); - if (direction == (*state)->state_key->direction) { + if (direction == (*state)->direction) { src = &(*state)->dst; dst = &(*state)->src; } else { @@ -4497,8 +4586,9 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, copyback = 1; } - if (!SEQ_GEQ(src->seqhi, seq) || - !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws))) { + if (!((*state)->state_flags & PFSTATE_SLOPPY) && + (!SEQ_GEQ(src->seqhi, seq) || + !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) { if (pf_status.debug >= PF_DEBUG_MISC) { printf("pf: BAD ICMP %d:%d ", icmptype, pd->hdr.icmp->icmp_code); @@ -4511,22 +4601,43 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } REASON_SET(reason, PFRES_BADSTATE); return (PF_DROP); + } else { + if (pf_status.debug >= PF_DEBUG_MISC) { + printf("pf: OK ICMP %d:%d ", + icmptype, pd->hdr.icmp->icmp_code); + pf_print_host(pd->src, 0, pd->af); + printf(" -> "); + pf_print_host(pd->dst, 0, pd->af); + printf(" state: "); + pf_print_state(*state); + printf(" seq=%u\n", seq); + } } - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_IN) { + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; + + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != th.th_sport) pf_change_icmp(pd2.src, &th.th_sport, - daddr, &(*state)->state_key->lan.addr, - (*state)->state_key->lan.port, NULL, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != th.th_dport) pf_change_icmp(pd2.dst, &th.th_dport, - saddr, &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.port, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } copyback = 1; } @@ -4569,37 +4680,38 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_UDP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = uh.uh_dport; - key.gwy.port = uh.uh_sport; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = uh.uh_dport; - key.ext.port = uh.uh_sport; - } - - STATE_LOOKUP(); - - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_IN) { + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[pd2.sidx] = uh.uh_sport; + key.port[pd2.didx] = uh.uh_dport; + + STATE_LOOKUP(kif, &key, direction, *state, m); + + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; + + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != uh.uh_sport) pf_change_icmp(pd2.src, &uh.uh_sport, - daddr, - &(*state)->state_key->lan.addr, - (*state)->state_key->lan.port, - &uh.uh_sum, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != uh.uh_dport) pf_change_icmp(pd2.dst, &uh.uh_dport, - saddr, - &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.port, &uh.uh_sum, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], &uh.uh_sum, pd2.ip_sum, icmpsum, pd->ip_sum, 1, pd2.af); - } + switch (pd2.af) { #ifdef INET case AF_INET: @@ -4620,7 +4732,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, } m_copyback(m, off2, sizeof(uh), &uh); } - return (PF_PASS); break; } @@ -4638,41 +4749,41 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_ICMP; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = 0; - key.gwy.port = iih.icmp_id; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = iih.icmp_id; - key.ext.port = 0; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[0] = key.port[1] = iih.icmp_id; + + STATE_LOOKUP(kif, &key, direction, *state, m); - STATE_LOOKUP(); + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_IN) { + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != iih.icmp_id) pf_change_icmp(pd2.src, &iih.icmp_id, - daddr, - &(*state)->state_key->lan.addr, - (*state)->state_key->lan.port, NULL, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != iih.icmp_id) pf_change_icmp(pd2.dst, &iih.icmp_id, - saddr, - &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.port, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET); - } + m_copyback(m, off, ICMP_MINLEN, pd->hdr.icmp); m_copyback(m, ipoff2, sizeof(h2), &h2); m_copyback(m, off2, ICMP_MINLEN, &iih); } - return (PF_PASS); break; } @@ -4691,42 +4802,43 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd2.af; key.proto = IPPROTO_ICMPV6; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = 0; - key.gwy.port = iih.icmp6_id; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = iih.icmp6_id; - key.ext.port = 0; - } + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[0] = key.port[1] = iih.icmp6_id; + + STATE_LOOKUP(kif, &key, direction, *state, m); - STATE_LOOKUP(); + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_IN) { + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af) || + nk->port[pd2.sidx] != iih.icmp6_id) pf_change_icmp(pd2.src, &iih.icmp6_id, - daddr, - &(*state)->state_key->lan.addr, - (*state)->state_key->lan.port, NULL, + daddr, &nk->addr[pd2.sidx], + nk->port[pd2.sidx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); - } else { + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af) || + nk->port[pd2.didx] != iih.icmp6_id) pf_change_icmp(pd2.dst, &iih.icmp6_id, - saddr, &(*state)->state_key->gwy.addr, - (*state)->state_key->gwy.port, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], + nk->port[pd2.didx], NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, AF_INET6); - } + m_copyback(m, off, sizeof(struct icmp6_hdr), pd->hdr.icmp6); m_copyback(m, ipoff2, sizeof(h2_6), &h2_6); m_copyback(m, off2, sizeof(struct icmp6_hdr), &iih); } - return (PF_PASS); break; } @@ -4734,36 +4846,33 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, default: { key.af = pd2.af; key.proto = pd2.proto; - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd2.dst, key.af); - PF_ACPY(&key.gwy.addr, pd2.src, key.af); - key.ext.port = 0; - key.gwy.port = 0; - } else { - PF_ACPY(&key.lan.addr, pd2.dst, key.af); - PF_ACPY(&key.ext.addr, pd2.src, key.af); - key.lan.port = 0; - key.ext.port = 0; - } - - STATE_LOOKUP(); - - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_IN) { - pf_change_icmp(pd2.src, NULL, - daddr, - &(*state)->state_key->lan.addr, - 0, NULL, + PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); + PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + key.port[0] = key.port[1] = 0; + + STATE_LOOKUP(kif, &key, direction, *state, m); + + /* translate source/destination address, if necessary */ + if ((*state)->key[PF_SK_WIRE] != + (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = + (*state)->key[pd->didx]; + + if (PF_ANEQ(pd2.src, + &nk->addr[pd2.sidx], pd2.af)) + pf_change_icmp(pd2.src, NULL, daddr, + &nk->addr[pd2.sidx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } else { - pf_change_icmp(pd2.dst, NULL, - saddr, - &(*state)->state_key->gwy.addr, - 0, NULL, + + if (PF_ANEQ(pd2.dst, + &nk->addr[pd2.didx], pd2.af)) + pf_change_icmp(pd2.src, NULL, + NULL, /* XXX Inbound NAT? */ + &nk->addr[pd2.didx], 0, NULL, pd2.ip_sum, icmpsum, pd->ip_sum, 0, pd2.af); - } + switch (pd2.af) { #ifdef INET case AF_INET: @@ -4783,7 +4892,6 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, #endif /* INET6 */ } } - return (PF_PASS); break; } @@ -4793,7 +4901,7 @@ pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif, int pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, - struct pf_pdesc *pd) + struct mbuf *m, struct pf_pdesc *pd) { struct pf_state_peer *src, *dst; struct pf_state_key_cmp key; @@ -4801,20 +4909,18 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, key.af = pd->af; key.proto = pd->proto; if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, pd->src, key.af); - PF_ACPY(&key.gwy.addr, pd->dst, key.af); - key.ext.port = 0; - key.gwy.port = 0; + PF_ACPY(&key.addr[0], pd->src, key.af); + PF_ACPY(&key.addr[1], pd->dst, key.af); + key.port[0] = key.port[1] = 0; } else { - PF_ACPY(&key.lan.addr, pd->src, key.af); - PF_ACPY(&key.ext.addr, pd->dst, key.af); - key.lan.port = 0; - key.ext.port = 0; + PF_ACPY(&key.addr[1], pd->src, key.af); + PF_ACPY(&key.addr[0], pd->dst, key.af); + key.port[1] = key.port[0] = 0; } - STATE_LOOKUP(); + STATE_LOOKUP(kif, &key, direction, *state, m); - if (direction == (*state)->state_key->direction) { + if (direction == (*state)->direction) { src = &(*state)->src; dst = &(*state)->dst; } else { @@ -4836,43 +4942,41 @@ pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif, (*state)->timeout = PFTM_OTHER_SINGLE; /* translate source/destination address, if necessary */ - if (STATE_TRANSLATE((*state)->state_key)) { - if (direction == PF_OUT) - switch (pd->af) { + if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) { + struct pf_state_key *nk = (*state)->key[pd->didx]; + + KASSERT(nk); + KASSERT(pd); + KASSERT(pd->src); + KASSERT(pd->dst); + switch (pd->af) { #ifdef INET - case AF_INET: + case AF_INET: + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, - (*state)->state_key->gwy.addr.v4.s_addr, + nk->addr[pd->sidx].v4.s_addr, 0); - break; -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - PF_ACPY(pd->src, - &(*state)->state_key->gwy.addr, pd->af); - break; -#endif /* INET6 */ - } - else - switch (pd->af) { -#ifdef INET - case AF_INET: + + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, - (*state)->state_key->lan.addr.v4.s_addr, + nk->addr[pd->didx].v4.s_addr, 0); + break; #endif /* INET */ #ifdef INET6 - case AF_INET6: - PF_ACPY(pd->dst, - &(*state)->state_key->lan.addr, pd->af); - break; + case AF_INET6: + if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET)) + PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + + if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET)) + PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); #endif /* INET6 */ - } + } } - return (PF_PASS); } @@ -5170,13 +5274,13 @@ pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp, if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_TCPV4_CSUM_OUT; /* Clr */ } } else if (m0->m_pkthdr.csum_flags & M_UDPV4_CSUM_OUT) { if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) || ifp->if_bridge != NULL) { in_delayed_cksum(m0); - m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clear */ + m0->m_pkthdr.csum_flags &= ~M_UDPV4_CSUM_OUT; /* Clr */ } } @@ -5453,6 +5557,34 @@ pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, return (0); } +struct pf_divert * +pf_find_divert(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) + return (NULL); + + return ((struct pf_divert *)(mtag + 1)); +} + +struct pf_divert * +pf_get_divert(struct mbuf *m) +{ + struct m_tag *mtag; + + if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { + mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), + M_NOWAIT); + if (mtag == NULL) + return (NULL); + bzero(mtag + 1, sizeof(struct pf_divert)); + m_tag_prepend(m, mtag); + } + + return ((struct pf_divert *)(mtag + 1)); +} + #ifdef INET int pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, @@ -5464,7 +5596,6 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct ip *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; struct pf_state *s = NULL; - struct pf_state_key *sk = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, dirndx, pqid = 0; @@ -5519,9 +5650,13 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, pd.src = (struct pf_addr *)&h->ip_src; pd.dst = (struct pf_addr *)&h->ip_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET); + pd.sport = pd.dport = NULL; pd.ip_sum = &h->ip_sum; + pd.proto_sum = NULL; pd.proto = h->ip_p; + pd.dir = dir; + pd.sidx = (dir == PF_IN) ? 0 : 1; + pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET; pd.tos = h->ip_tos; pd.tot_len = ntohs(h->ip_len); @@ -5621,7 +5756,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, } default: - action = pf_test_state_other(&s, dir, kif, &pd); + action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); @@ -5637,7 +5772,7 @@ pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, done: if (action == PF_PASS && h->ip_hl > 5 && - !((s && s->allow_opts) || r->allow_opts)) { + !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); log = 1; @@ -5648,6 +5783,11 @@ done: if ((s && s->tag) || r->rtableid) pf_tag_packet(m, s ? s->tag : 0, r->rtableid); +#if 0 + if (dir == PF_IN && s && s->key[PF_SK_STACK]) + m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; +#endif + #ifdef ALTQ if (action == PF_PASS && r->qid) { if (pqid || (pd.tos & IPTOS_LOWDELAY)) @@ -5671,6 +5811,16 @@ done: (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; + if (dir == PF_IN && action == PF_PASS && r->divert.port) { + struct pf_divert *divert; + + if ((divert = pf_get_divert(m))) { + m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; + divert->port = r->divert.port; + divert->addr.ipv4 = r->divert.addr.v4; + } + } + if (log) { struct pf_rule *lr; @@ -5695,7 +5845,6 @@ done: a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { - sk = s->state_key; if (s->nat_rule.ptr != NULL) { s->nat_rule.ptr->packets[dirndx]++; s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; @@ -5708,45 +5857,28 @@ done: s->nat_src_node->packets[dirndx]++; s->nat_src_node->bytes[dirndx] += pd.tot_len; } - dirndx = (dir == sk->direction) ? 0 : 1; + dirndx = (dir == s->direction) ? 0 : 1; s->packets[dirndx]++; s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; - if (nr != NULL) { - struct pf_addr *x; - /* - * XXX: we need to make sure that the addresses - * passed to pfr_update_stats() are the same than - * the addresses used during matching (pfr_match) - */ - if (r == &pf_default_rule) { - tr = nr; - x = (sk == NULL || sk->direction == dir) ? - &pd.baddr : &pd.naddr; - } else - x = (sk == NULL || sk->direction == dir) ? - &pd.naddr : &pd.baddr; - if (x == &pd.baddr || s == NULL) { - /* we need to change the address */ - if (dir == PF_OUT) - pd.src = x; - else - pd.dst = x; - } - } + if (nr != NULL && r == &pf_default_rule) + tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL || - sk->direction == dir) ? - pd.src : pd.dst, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.neg); + pfr_update_stats(tr->src.addr.p.tbl, + (s == NULL) ? pd.src : + &s->key[(s->direction == PF_IN)]-> + addr[(s->direction == PF_OUT)], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL || - sk->direction == dir) ? pd.dst : pd.src, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.neg); + pfr_update_stats(tr->dst.addr.p.tbl, + (s == NULL) ? pd.dst : + &s->key[(s->direction == PF_IN)]-> + addr[(s->direction == PF_IN)], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->dst.neg); } @@ -5773,7 +5905,6 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct ip6_hdr *h; struct pf_rule *a = NULL, *r = &pf_default_rule, *tr, *nr; struct pf_state *s = NULL; - struct pf_state_key *sk = NULL; struct pf_ruleset *ruleset = NULL; struct pf_pdesc pd; int off, terminal = 0, dirndx, rh_cnt = 0; @@ -5832,8 +5963,12 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, pd.src = (struct pf_addr *)&h->ip6_src; pd.dst = (struct pf_addr *)&h->ip6_dst; - PF_ACPY(&pd.baddr, dir == PF_OUT ? pd.src : pd.dst, AF_INET6); + pd.sport = pd.dport = NULL; pd.ip_sum = NULL; + pd.proto_sum = NULL; + pd.dir = dir; + pd.sidx = (dir == PF_IN) ? 0 : 1; + pd.didx = (dir == PF_IN) ? 1 : 0; pd.af = AF_INET6; pd.tos = 0; pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); @@ -5996,7 +6131,7 @@ pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, } default: - action = pf_test_state_other(&s, dir, kif, &pd); + action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { #if NPFSYNC pfsync_update_state(s); @@ -6018,7 +6153,7 @@ done: /* handle dangerous IPv6 extension headers. */ if (action == PF_PASS && rh_cnt && - !((s && s->allow_opts) || r->allow_opts)) { + !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) { action = PF_DROP; REASON_SET(&reason, PFRES_IPOPTIONS); log = 1; @@ -6029,6 +6164,11 @@ done: if ((s && s->tag) || r->rtableid) pf_tag_packet(m, s ? s->tag : 0, r->rtableid); +#if 0 + if (dir == PF_IN && s && s->key[PF_SK_STACK]) + m->m_pkthdr.pf.statekey = s->key[PF_SK_STACK]; +#endif + #ifdef ALTQ if (action == PF_PASS && r->qid) { if (pd.tos & IPTOS_LOWDELAY) @@ -6047,6 +6187,16 @@ done: IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)) m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; + if (dir == PF_IN && action == PF_PASS && r->divert.port) { + struct pf_divert *divert; + + if ((divert = pf_get_divert(m))) { + m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; + divert->port = r->divert.port; + divert->addr.ipv6 = r->divert.addr.v6; + } + } + if (log) { struct pf_rule *lr; @@ -6071,7 +6221,6 @@ done: a->bytes[dirndx] += pd.tot_len; } if (s != NULL) { - sk = s->state_key; if (s->nat_rule.ptr != NULL) { s->nat_rule.ptr->packets[dirndx]++; s->nat_rule.ptr->bytes[dirndx] += pd.tot_len; @@ -6084,44 +6233,26 @@ done: s->nat_src_node->packets[dirndx]++; s->nat_src_node->bytes[dirndx] += pd.tot_len; } - dirndx = (dir == sk->direction) ? 0 : 1; + dirndx = (dir == s->direction) ? 0 : 1; s->packets[dirndx]++; s->bytes[dirndx] += pd.tot_len; } tr = r; nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule; - if (nr != NULL) { - struct pf_addr *x; - /* - * XXX: we need to make sure that the addresses - * passed to pfr_update_stats() are the same than - * the addresses used during matching (pfr_match) - */ - if (r == &pf_default_rule) { - tr = nr; - x = (s == NULL || sk->direction == dir) ? - &pd.baddr : &pd.naddr; - } else { - x = (s == NULL || sk->direction == dir) ? - &pd.naddr : &pd.baddr; - } - if (x == &pd.baddr || s == NULL) { - if (dir == PF_OUT) - pd.src = x; - else - pd.dst = x; - } - } + if (nr != NULL && r == &pf_default_rule) + tr = nr; if (tr->src.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->src.addr.p.tbl, (sk == NULL || - sk->direction == dir) ? pd.src : pd.dst, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->src.neg); + pfr_update_stats(tr->src.addr.p.tbl, + (s == NULL) ? pd.src : + &s->key[(s->direction == PF_IN)]->addr[0], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->src.neg); if (tr->dst.addr.type == PF_ADDR_TABLE) - pfr_update_stats(tr->dst.addr.p.tbl, (sk == NULL || - sk->direction == dir) ? pd.dst : pd.src, pd.af, - pd.tot_len, dir == PF_OUT, r->action == PF_PASS, - tr->dst.neg); + pfr_update_stats(tr->dst.addr.p.tbl, + (s == NULL) ? pd.dst : + &s->key[(s->direction == PF_IN)]->addr[1], + pd.af, pd.tot_len, dir == PF_OUT, + r->action == PF_PASS, tr->dst.neg); } diff --git a/net/pf_if.c b/net/pf_if.c index a3f62e1ea526..e39a9d6501e6 100644 --- a/net/pf_if.c +++ b/net/pf_if.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_if.c,v 1.51 2007/11/07 17:28:40 mpf Exp $ */ +/* $OpenBSD: pf_if.c,v 1.54 2008/06/14 16:55:28 mk Exp $ */ /* * Copyright 2005 Henning Brauer <henning@openbsd.org> @@ -332,9 +332,9 @@ pfi_dynaddr_setup(struct pf_addr_wrap *aw, sa_family_t af) if (aw->type != PF_ADDR_DYNIFTL) return (0); - if ((dyn = pool_get(&pfi_addr_pl, PR_NOWAIT)) == NULL) + if ((dyn = pool_get(&pfi_addr_pl, PR_WAITOK | PR_LIMITFAIL | PR_ZERO)) + == NULL) return (1); - bzero(dyn, sizeof(*dyn)); s = splsoftnet(); if (!strcmp(aw->v.ifname, "self")) @@ -606,7 +606,7 @@ void pfi_update_status(const char *name, struct pf_status *pfs) { struct pfi_kif *p; - struct pfi_kif_cmp key; + struct pfi_kif_cmp key; struct ifg_member p_member, *ifgm; TAILQ_HEAD(, ifg_member) ifg_members; int i, j, k, s; diff --git a/net/pf_ioctl.c b/net/pf_ioctl.c index 9226258352bf..4c1e18c89d6e 100644 --- a/net/pf_ioctl.c +++ b/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.193 2007/12/02 12:08:04 pascoe Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.209 2008/06/29 08:42:15 mcbride Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -36,6 +36,7 @@ */ #include "pfsync.h" +#include "pflog.h" #include <sys/param.h> #include <sys/systm.h> @@ -109,10 +110,9 @@ int pf_setup_pfsync_matching(struct pf_ruleset *); void pf_hash_rule(MD5_CTX *, struct pf_rule *); void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); int pf_commit_rules(u_int32_t, int, char *); -void pf_state_export(struct pfsync_state *, - struct pf_state_key *, struct pf_state *); -void pf_state_import(struct pfsync_state *, - struct pf_state_key *, struct pf_state *); +int pf_addr_setup(struct pf_ruleset *, + struct pf_addr_wrap *, sa_family_t); +void pf_addr_copyout(struct pf_addr_wrap *); struct pf_rule pf_default_rule; struct rwlock pf_consistency_lock = RWLOCK_INITIALIZER("pfcnslk"); @@ -147,8 +147,10 @@ pfattach(int num) "pfsrctrpl", NULL); pool_init(&pf_state_pl, sizeof(struct pf_state), 0, 0, 0, "pfstatepl", NULL); - pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0, 0, 0, + pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0, 0, 0, "pfstatekeypl", NULL); + pool_init(&pf_state_item_pl, sizeof(struct pf_state_item), 0, 0, 0, + "pfstateitempl", NULL); pool_init(&pf_altq_pl, sizeof(struct pf_altq), 0, 0, 0, "pfaltqpl", &pool_allocator_nointr); pool_init(&pf_pooladdr_pl, sizeof(struct pf_pooladdr), 0, 0, 0, @@ -160,7 +162,7 @@ pfattach(int num) pool_sethardlimit(pf_pool_limits[PF_LIMIT_STATES].pp, pf_pool_limits[PF_LIMIT_STATES].limit, NULL, 0); - if (ptoa(physmem) <= 100*1024*1024) + if (physmem <= atop(100*1024*1024)) pf_pool_limits[PF_LIMIT_TABLE_ENTRIES].limit = PFR_KENTRY_HIWAT_SMALL; @@ -309,7 +311,7 @@ void pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) { if (rulequeue != NULL) { - if (rule->states <= 0) { + if (rule->states_cur <= 0) { /* * XXX - we need to remove the table *before* detaching * the rule to make sure the table code does not delete @@ -325,7 +327,7 @@ pf_rm_rule(struct pf_rulequeue *rulequeue, struct pf_rule *rule) rule->nr = -1; } - if (rule->states > 0 || rule->src_nodes > 0 || + if (rule->states_cur > 0 || rule->src_nodes > 0 || rule->entries.tqe_prev != NULL) return; pf_tag_unref(rule->tag); @@ -841,91 +843,6 @@ pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) return (0); } -void -pf_state_export(struct pfsync_state *sp, struct pf_state_key *sk, - struct pf_state *s) -{ - int secs = time_second; - bzero(sp, sizeof(struct pfsync_state)); - - /* copy from state key */ - sp->lan.addr = sk->lan.addr; - sp->lan.port = sk->lan.port; - sp->gwy.addr = sk->gwy.addr; - sp->gwy.port = sk->gwy.port; - sp->ext.addr = sk->ext.addr; - sp->ext.port = sk->ext.port; - sp->proto = sk->proto; - sp->af = sk->af; - sp->direction = sk->direction; - - /* copy from state */ - memcpy(&sp->id, &s->id, sizeof(sp->id)); - sp->creatorid = s->creatorid; - strlcpy(sp->ifname, s->kif->pfik_name, sizeof(sp->ifname)); - pf_state_peer_to_pfsync(&s->src, &sp->src); - pf_state_peer_to_pfsync(&s->dst, &sp->dst); - - sp->rule = s->rule.ptr->nr; - sp->nat_rule = (s->nat_rule.ptr == NULL) ? -1 : s->nat_rule.ptr->nr; - sp->anchor = (s->anchor.ptr == NULL) ? -1 : s->anchor.ptr->nr; - - pf_state_counter_to_pfsync(s->bytes[0], sp->bytes[0]); - pf_state_counter_to_pfsync(s->bytes[1], sp->bytes[1]); - pf_state_counter_to_pfsync(s->packets[0], sp->packets[0]); - pf_state_counter_to_pfsync(s->packets[1], sp->packets[1]); - sp->creation = secs - s->creation; - sp->expire = pf_state_expires(s); - sp->log = s->log; - sp->allow_opts = s->allow_opts; - sp->timeout = s->timeout; - - if (s->src_node) - sp->sync_flags |= PFSYNC_FLAG_SRCNODE; - if (s->nat_src_node) - sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; - - if (sp->expire > secs) - sp->expire -= secs; - else - sp->expire = 0; - -} - -void -pf_state_import(struct pfsync_state *sp, struct pf_state_key *sk, - struct pf_state *s) -{ - /* copy to state key */ - sk->lan.addr = sp->lan.addr; - sk->lan.port = sp->lan.port; - sk->gwy.addr = sp->gwy.addr; - sk->gwy.port = sp->gwy.port; - sk->ext.addr = sp->ext.addr; - sk->ext.port = sp->ext.port; - sk->proto = sp->proto; - sk->af = sp->af; - sk->direction = sp->direction; - - /* copy to state */ - memcpy(&s->id, &sp->id, sizeof(sp->id)); - s->creatorid = sp->creatorid; - pf_state_peer_from_pfsync(&sp->src, &s->src); - pf_state_peer_from_pfsync(&sp->dst, &s->dst); - - s->rule.ptr = &pf_default_rule; - s->nat_rule.ptr = NULL; - s->anchor.ptr = NULL; - s->rt_kif = NULL; - s->creation = time_second; - s->expire = time_second; - if (sp->expire > 0) - s->expire -= pf_default_rule.timeout[sp->timeout] - sp->expire; - s->pfsync_time = 0; - s->packets[0] = s->packets[1] = 0; - s->bytes[0] = s->bytes[1] = 0; -} - int pf_setup_pfsync_matching(struct pf_ruleset *rs) { @@ -967,6 +884,25 @@ pf_setup_pfsync_matching(struct pf_ruleset *rs) } int +pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr, + sa_family_t af) +{ + if (pfi_dynaddr_setup(addr, af) || + pf_tbladdr_setup(ruleset, addr)) + return (EINVAL); + + return (0); +} + +void +pf_addr_copyout(struct pf_addr_wrap *addr) +{ + pfi_dynaddr_copyout(addr); + pf_tbladdr_copyout(addr); + pf_rtlabel_copyout(addr); +} + +int pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) { struct pf_pooladdr *pa = NULL; @@ -1067,7 +1003,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } return (EACCES); case DIOCGETRULE: - if (((struct pfioc_rule *)addr)->action == PF_GET_CLR_CNTR) + if (((struct pfioc_rule *)addr)->action == + PF_GET_CLR_CNTR) return (EACCES); break; default: @@ -1136,7 +1073,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - rule = pool_get(&pf_rule_pl, PR_NOWAIT); + rule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); if (rule == NULL) { error = ENOMEM; break; @@ -1148,7 +1085,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rule->kif = NULL; TAILQ_INIT(&rule->rpool.list); /* initialize refcounting */ - rule->states = 0; + rule->states_cur = 0; rule->src_nodes = 0; rule->entries.tqe_prev = NULL; #ifndef INET @@ -1215,13 +1152,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pf_rtlabel_add(&rule->src.addr) || pf_rtlabel_add(&rule->dst.addr)) error = EBUSY; - if (pfi_dynaddr_setup(&rule->src.addr, rule->af)) - error = EINVAL; - if (pfi_dynaddr_setup(&rule->dst.addr, rule->af)) + if (pf_addr_setup(ruleset, &rule->src.addr, rule->af)) error = EINVAL; - if (pf_tbladdr_setup(ruleset, &rule->src.addr)) - error = EINVAL; - if (pf_tbladdr_setup(ruleset, &rule->dst.addr)) + if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af)) error = EINVAL; if (pf_anchor_setup(rule, ruleset, pr->anchor_call)) error = EINVAL; @@ -1318,12 +1251,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - pfi_dynaddr_copyout(&pr->rule.src.addr); - pfi_dynaddr_copyout(&pr->rule.dst.addr); - pf_tbladdr_copyout(&pr->rule.src.addr); - pf_tbladdr_copyout(&pr->rule.dst.addr); - pf_rtlabel_copyout(&pr->rule.src.addr); - pf_rtlabel_copyout(&pr->rule.dst.addr); + pf_addr_copyout(&pr->rule.src.addr); + pf_addr_copyout(&pr->rule.dst.addr); for (i = 0; i < PF_SKIP_COUNT; ++i) if (rule->skip[i].ptr == NULL) pr->rule.skip[i].nr = -1; @@ -1335,6 +1264,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) rule->evaluations = 0; rule->packets[0] = rule->packets[1] = 0; rule->bytes[0] = rule->bytes[1] = 0; + rule->states_tot = 0; } break; } @@ -1385,7 +1315,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } if (pcr->action != PF_CHANGE_REMOVE) { - newrule = pool_get(&pf_rule_pl, PR_NOWAIT); + newrule = pool_get(&pf_rule_pl, PR_WAITOK|PR_LIMITFAIL); if (newrule == NULL) { error = ENOMEM; break; @@ -1395,7 +1325,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) newrule->cpid = p->p_pid; TAILQ_INIT(&newrule->rpool.list); /* initialize refcounting */ - newrule->states = 0; + newrule->states_cur = 0; newrule->entries.tqe_prev = NULL; #ifndef INET if (newrule->af == AF_INET) { @@ -1459,13 +1389,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (pf_rtlabel_add(&newrule->src.addr) || pf_rtlabel_add(&newrule->dst.addr)) error = EBUSY; - if (pfi_dynaddr_setup(&newrule->src.addr, newrule->af)) - error = EINVAL; - if (pfi_dynaddr_setup(&newrule->dst.addr, newrule->af)) + if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af)) error = EINVAL; - if (pf_tbladdr_setup(ruleset, &newrule->src.addr)) - error = EINVAL; - if (pf_tbladdr_setup(ruleset, &newrule->dst.addr)) + if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af)) error = EINVAL; if (pf_anchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; @@ -1556,7 +1482,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCCLRSTATES: { struct pf_state *s, *nexts; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - int killed = 0; + u_int killed = 0; for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); @@ -1571,7 +1497,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) killed++; } } - psk->psk_af = killed; + psk->psk_killed = killed; #if NPFSYNC pfsync_clear_states(pf_status.hostid, psk->psk_ifname); #endif @@ -1581,21 +1507,41 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCKILLSTATES: { struct pf_state *s, *nexts; struct pf_state_key *sk; - struct pf_state_host *src, *dst; + struct pf_addr *srcaddr, *dstaddr; + u_int16_t srcport, dstport; struct pfioc_state_kill *psk = (struct pfioc_state_kill *)addr; - int killed = 0; + u_int killed = 0; + + if (psk->psk_pfcmp.id) { + if (psk->psk_pfcmp.creatorid == 0) + psk->psk_pfcmp.creatorid = pf_status.hostid; + if ((s = pf_find_state_byid(&psk->psk_pfcmp))) { +#if NPFSYNC > 0 + /* send immediate delete of state */ + pfsync_delete_state(s); + s->sync_flags |= PFSTATE_NOSYNC; +#endif + pf_unlink_state(s); + psk->psk_killed = 1; + } + break; + } for (s = RB_MIN(pf_state_tree_id, &tree_id); s; s = nexts) { nexts = RB_NEXT(pf_state_tree_id, &tree_id, s); - sk = s->state_key; + sk = s->key[PF_SK_WIRE]; - if (sk->direction == PF_OUT) { - src = &sk->lan; - dst = &sk->ext; + if (s->direction == PF_OUT) { + srcaddr = &sk->addr[1]; + dstaddr = &sk->addr[0]; + srcport = sk->port[0]; + dstport = sk->port[0]; } else { - src = &sk->ext; - dst = &sk->lan; + srcaddr = &sk->addr[0]; + dstaddr = &sk->addr[1]; + srcport = sk->port[0]; + dstport = sk->port[0]; } if ((!psk->psk_af || sk->af == psk->psk_af) && (!psk->psk_proto || psk->psk_proto == @@ -1603,19 +1549,21 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, - &src->addr, sk->af) && + srcaddr, sk->af) && PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, - &dst->addr, sk->af) && + dstaddr, sk->af) && (psk->psk_src.port_op == 0 || pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], - src->port)) && + srcport)) && (psk->psk_dst.port_op == 0 || pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], - dst->port)) && + dstport)) && + (!psk->psk_label[0] || (s->rule.ptr->label[0] && + !strcmp(psk->psk_label, s->rule.ptr->label))) && (!psk->psk_ifname[0] || !strcmp(psk->psk_ifname, s->kif->pfik_name))) { #if NPFSYNC > 0 @@ -1627,48 +1575,20 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) killed++; } } - psk->psk_af = killed; + psk->psk_killed = killed; break; } case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; - struct pfsync_state *sp = &ps->state; - struct pf_state *s; - struct pf_state_key *sk; - struct pfi_kif *kif; + struct pfsync_state *sp = &ps->state; if (sp->timeout >= PFTM_MAX && sp->timeout != PFTM_UNTIL_PACKET) { error = EINVAL; break; } - s = pool_get(&pf_state_pl, PR_NOWAIT); - if (s == NULL) { - error = ENOMEM; - break; - } - bzero(s, sizeof(struct pf_state)); - if ((sk = pf_alloc_state_key(s)) == NULL) { - pool_put(&pf_state_pl, s); - error = ENOMEM; - break; - } - pf_state_import(sp, sk, s); - kif = pfi_kif_get(sp->ifname); - if (kif == NULL) { - pool_put(&pf_state_pl, s); - pool_put(&pf_state_key_pl, sk); - error = ENOENT; - break; - } - if (pf_insert_state(kif, s)) { - pfi_kif_unref(kif, PFI_KIF_REF_NONE); - pool_put(&pf_state_pl, s); - error = EEXIST; - break; - } - pf_default_rule.states++; + error = pfsync_state_import(sp, PFSYNC_SI_IOCTL); break; } @@ -1686,7 +1606,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } - pf_state_export(&ps->state, s->state_key, s); + pfsync_state_export(&ps->state, s); break; } @@ -1711,9 +1631,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (state->timeout != PFTM_UNLINKED) { if ((nr+1) * sizeof(*p) > (unsigned)ps->ps_len) break; - - pf_state_export(pstore, - state->state_key, state); + pfsync_state_export(pstore, state); error = copyout(pstore, p, sizeof(*p)); if (error) { free(pstore, M_TEMP); @@ -1765,9 +1683,11 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) struct pf_state *state; struct pf_state_key_cmp key; int m = 0, direction = pnl->direction; + int sidx, didx; - key.af = pnl->af; - key.proto = pnl->proto; + /* NATLOOK src and dst are reversed, so reverse sidx/didx */ + sidx = (direction == PF_IN) ? 1 : 0; + didx = (direction == PF_IN) ? 0 : 1; if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || @@ -1777,44 +1697,23 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) (!pnl->dport || !pnl->sport))) error = EINVAL; else { - /* - * userland gives us source and dest of connection, - * reverse the lookup so we ask for what happens with - * the return traffic, enabling us to find it in the - * state tree. - */ - if (direction == PF_IN) { - PF_ACPY(&key.ext.addr, &pnl->daddr, pnl->af); - key.ext.port = pnl->dport; - PF_ACPY(&key.gwy.addr, &pnl->saddr, pnl->af); - key.gwy.port = pnl->sport; - state = pf_find_state_all(&key, PF_IN, &m); - } else { - PF_ACPY(&key.lan.addr, &pnl->daddr, pnl->af); - key.lan.port = pnl->dport; - PF_ACPY(&key.ext.addr, &pnl->saddr, pnl->af); - key.ext.port = pnl->sport; - state = pf_find_state_all(&key, PF_OUT, &m); - } + key.af = pnl->af; + key.proto = pnl->proto; + PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af); + key.port[sidx] = pnl->sport; + PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af); + key.port[didx] = pnl->dport; + + state = pf_find_state_all(&key, direction, &m); + if (m > 1) error = E2BIG; /* more than one state */ else if (state != NULL) { - sk = state->state_key; - if (direction == PF_IN) { - PF_ACPY(&pnl->rsaddr, &sk->lan.addr, - sk->af); - pnl->rsport = sk->lan.port; - PF_ACPY(&pnl->rdaddr, &pnl->daddr, - pnl->af); - pnl->rdport = pnl->dport; - } else { - PF_ACPY(&pnl->rdaddr, &sk->gwy.addr, - sk->af); - pnl->rdport = sk->gwy.port; - PF_ACPY(&pnl->rsaddr, &pnl->saddr, - pnl->af); - pnl->rsport = pnl->sport; - } + sk = state->key[sidx]; + PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); + pnl->rsport = sk->port[sidx]; + PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af); + pnl->rdport = sk->port[didx]; } else error = ENOENT; } @@ -1946,7 +1845,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EBUSY; break; } - altq = pool_get(&pf_altq_pl, PR_NOWAIT); + altq = pool_get(&pf_altq_pl, PR_WAITOK|PR_LIMITFAIL); if (altq == NULL) { error = ENOMEM; break; @@ -2086,7 +1985,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) error = EINVAL; break; } - pa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); + pa = pool_get(&pf_pooladdr_pl, PR_WAITOK|PR_LIMITFAIL); if (pa == NULL) { error = ENOMEM; break; @@ -2147,9 +2046,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } bcopy(pa, &pp->addr, sizeof(struct pf_pooladdr)); - pfi_dynaddr_copyout(&pp->addr.addr); - pf_tbladdr_copyout(&pp->addr.addr); - pf_rtlabel_copyout(&pp->addr.addr); + pf_addr_copyout(&pp->addr.addr); break; } @@ -2182,7 +2079,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } if (pca->action != PF_CHANGE_REMOVE) { - newpa = pool_get(&pf_pooladdr_pl, PR_NOWAIT); + newpa = pool_get(&pf_pooladdr_pl, + PR_WAITOK|PR_LIMITFAIL); if (newpa == NULL) { error = ENOMEM; break; @@ -2849,22 +2747,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) case DIOCKILLSRCNODES: { struct pf_src_node *sn; struct pf_state *s; - struct pfioc_src_node_kill *psnk = \ - (struct pfioc_src_node_kill *) addr; - int killed = 0; + struct pfioc_src_node_kill *psnk = + (struct pfioc_src_node_kill *)addr; + u_int killed = 0; RB_FOREACH(sn, pf_src_tree, &tree_src_tracking) { - if (PF_MATCHA(psnk->psnk_src.neg, \ - &psnk->psnk_src.addr.v.a.addr, \ - &psnk->psnk_src.addr.v.a.mask, \ - &sn->addr, sn->af) && - PF_MATCHA(psnk->psnk_dst.neg, \ - &psnk->psnk_dst.addr.v.a.addr, \ - &psnk->psnk_dst.addr.v.a.mask, \ - &sn->raddr, sn->af)) { + if (PF_MATCHA(psnk->psnk_src.neg, + &psnk->psnk_src.addr.v.a.addr, + &psnk->psnk_src.addr.v.a.mask, + &sn->addr, sn->af) && + PF_MATCHA(psnk->psnk_dst.neg, + &psnk->psnk_dst.addr.v.a.addr, + &psnk->psnk_dst.addr.v.a.mask, + &sn->raddr, sn->af)) { /* Handle state to src_node linkage */ if (sn->states != 0) { - RB_FOREACH(s, pf_state_tree_id, + RB_FOREACH(s, pf_state_tree_id, &tree_id) { if (s->src_node == sn) s->src_node = NULL; @@ -2881,7 +2779,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (killed > 0) pf_purge_expired_src_nodes(1); - psnk->psnk_af = killed; + psnk->psnk_killed = killed; break; } diff --git a/net/pf_norm.c b/net/pf_norm.c index 524f8c16469f..a6837a209fc0 100644 --- a/net/pf_norm.c +++ b/net/pf_norm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.111 2007/12/30 10:32:24 mglocker Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.113 2008/05/07 07:07:29 markus Exp $ */ /* * Copyright 2001 Niels Provos <provos@citi.umich.edu> @@ -827,6 +827,7 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, u_int16_t max; int ip_len; int ip_off; + int tag = -1; r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr); while (r != NULL) { @@ -847,6 +848,8 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, (struct pf_addr *)&h->ip_dst.s_addr, AF_INET, r->dst.neg, NULL)) r = r->skip[PF_SKIP_DST_ADDR].ptr; + else if (r->match_tag && !pf_match_tag(m, r, &tag)) + r = TAILQ_NEXT(r, entries); else break; } @@ -989,6 +992,17 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); } + /* Enforce tos */ + if (r->rule_flag & PFRULE_SET_TOS) { + u_int16_t ov, nv; + + ov = *(u_int16_t *)h; + h->ip_tos = r->set_tos; + nv = *(u_int16_t *)h; + + h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); + } + if (r->rule_flag & PFRULE_RANDOMID) { u_int16_t ip_id = h->ip_id; @@ -1008,6 +1022,16 @@ pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason, h->ip_ttl = r->min_ttl; h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0); } + /* Enforce tos */ + if (r->rule_flag & PFRULE_SET_TOS) { + u_int16_t ov, nv; + + ov = *(u_int16_t *)h; + h->ip_tos = r->set_tos; + nv = *(u_int16_t *)h; + + h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0); + } if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) pd->flags |= PFDESC_IP_REAS; return (PF_PASS); diff --git a/net/pf_osfp.c b/net/pf_osfp.c index d05a7be63810..75f33032e1a2 100644 --- a/net/pf_osfp.c +++ b/net/pf_osfp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_osfp.c,v 1.13 2007/09/01 18:49:27 henning Exp $ */ +/* $OpenBSD: pf_osfp.c,v 1.15 2008/06/14 02:22:13 henning Exp $ */ /* * Copyright (c) 2003 Mike Frantzen <frantzen@w4g.org> @@ -335,6 +335,7 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fpadd.fp_wscale = fpioc->fp_wscale; fpadd.fp_ttl = fpioc->fp_ttl; +#if 0 /* XXX RYAN wants to fix logging */ DPFPRINTF("adding osfp %s %s %s = %s%d:%d:%d:%s%d:0x%llx %d " "(TS=%s,M=%s%d,W=%s%d) %x\n", fpioc->fp_os.fp_class_nm, fpioc->fp_os.fp_version_nm, @@ -358,17 +359,19 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) (fpadd.fp_flags & PF_OSFP_WSCALE_DC) ? "*" : "", fpadd.fp_wscale, fpioc->fp_os.fp_os); - +#endif if ((fp = pf_osfp_find_exact(&pf_osfp_list, &fpadd))) { SLIST_FOREACH(entry, &fp->fp_oses, fp_entry) { if (PF_OSFP_ENTRY_EQ(entry, &fpioc->fp_os)) return (EEXIST); } - if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) + if ((entry = pool_get(&pf_osfp_entry_pl, + PR_WAITOK|PR_LIMITFAIL)) == NULL) return (ENOMEM); } else { - if ((fp = pool_get(&pf_osfp_pl, PR_NOWAIT)) == NULL) + if ((fp = pool_get(&pf_osfp_pl, + PR_WAITOK|PR_LIMITFAIL)) == NULL) return (ENOMEM); memset(fp, 0, sizeof(*fp)); fp->fp_tcpopts = fpioc->fp_tcpopts; @@ -380,7 +383,8 @@ pf_osfp_add(struct pf_osfp_ioctl *fpioc) fp->fp_wscale = fpioc->fp_wscale; fp->fp_ttl = fpioc->fp_ttl; SLIST_INIT(&fp->fp_oses); - if ((entry = pool_get(&pf_osfp_entry_pl, PR_NOWAIT)) == NULL) { + if ((entry = pool_get(&pf_osfp_entry_pl, + PR_WAITOK|PR_LIMITFAIL)) == NULL) { pool_put(&pf_osfp_pl, fp); return (ENOMEM); } diff --git a/net/pf_table.c b/net/pf_table.c index eebe03bcb7ec..c8ac76fc53da 100644 --- a/net/pf_table.c +++ b/net/pf_table.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_table.c,v 1.72 2007/12/20 20:07:41 reyk Exp $ */ +/* $OpenBSD: pf_table.c,v 1.78 2008/06/14 03:50:14 art Exp $ */ /* * Copyright (c) 2002 Cedric Berger @@ -127,6 +127,7 @@ struct pfr_walktree { struct pool pfr_ktable_pl; struct pool pfr_kentry_pl; struct pool pfr_kentry_pl2; +struct pool pfr_kcounters_pl; struct sockaddr_in pfr_sin; struct sockaddr_in6 pfr_sin6; union sockaddr_union pfr_mask; @@ -190,11 +191,13 @@ void pfr_initialize(void) { pool_init(&pfr_ktable_pl, sizeof(struct pfr_ktable), 0, 0, 0, - "pfrktable", &pool_allocator_oldnointr); + "pfrktable", NULL); pool_init(&pfr_kentry_pl, sizeof(struct pfr_kentry), 0, 0, 0, - "pfrkentry", &pool_allocator_oldnointr); + "pfrkentry", NULL); pool_init(&pfr_kentry_pl2, sizeof(struct pfr_kentry), 0, 0, 0, "pfrkentry2", NULL); + pool_init(&pfr_kcounters_pl, sizeof(struct pfr_kcounters), 0, 0, 0, + "pfrkcounters", NULL); pfr_sin.sin_len = sizeof(pfr_sin); pfr_sin.sin_family = AF_INET; @@ -808,12 +811,11 @@ pfr_create_kentry(struct pfr_addr *ad, int intr) struct pfr_kentry *ke; if (intr) - ke = pool_get(&pfr_kentry_pl2, PR_NOWAIT); + ke = pool_get(&pfr_kentry_pl2, PR_NOWAIT | PR_ZERO); else - ke = pool_get(&pfr_kentry_pl, PR_NOWAIT); + ke = pool_get(&pfr_kentry_pl, PR_WAITOK|PR_ZERO|PR_LIMITFAIL); if (ke == NULL) return (NULL); - bzero(ke, sizeof(*ke)); if (ad->pfra_af == AF_INET) FILLIN_SIN(ke->pfrke_sa.sin, ad->pfra_ip4addr); @@ -840,6 +842,8 @@ pfr_destroy_kentries(struct pfr_kentryworkq *workq) void pfr_destroy_kentry(struct pfr_kentry *ke) { + if (ke->pfrke_counters) + pool_put(&pfr_kcounters_pl, ke->pfrke_counters); if (ke->pfrke_intrpool) pool_put(&pfr_kentry_pl2, ke); else @@ -924,8 +928,10 @@ pfr_clstats_kentries(struct pfr_kentryworkq *workq, long tzero, int negchange) s = splsoftnet(); if (negchange) p->pfrke_not = !p->pfrke_not; - bzero(p->pfrke_packets, sizeof(p->pfrke_packets)); - bzero(p->pfrke_bytes, sizeof(p->pfrke_bytes)); + if (p->pfrke_counters) { + pool_put(&pfr_kcounters_pl, p->pfrke_counters); + p->pfrke_counters = NULL; + } splx(s); p->pfrke_tzero = tzero; } @@ -988,9 +994,9 @@ pfr_route_kentry(struct pfr_ktable *kt, struct pfr_kentry *ke) s = splsoftnet(); if (KENTRY_NETWORK(ke)) { pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net); - rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node); + rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0); } else - rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node); + rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0); splx(s); return (rn == NULL ? -1 : 0); @@ -1075,10 +1081,16 @@ pfr_walktree(struct radix_node *rn, void *arg) pfr_copyout_addr(&as.pfras_a, ke); s = splsoftnet(); - bcopy(ke->pfrke_packets, as.pfras_packets, - sizeof(as.pfras_packets)); - bcopy(ke->pfrke_bytes, as.pfras_bytes, - sizeof(as.pfras_bytes)); + if (ke->pfrke_counters) { + bcopy(ke->pfrke_counters->pfrkc_packets, + as.pfras_packets, sizeof(as.pfras_packets)); + bcopy(ke->pfrke_counters->pfrkc_bytes, + as.pfras_bytes, sizeof(as.pfras_bytes)); + } else { + bzero(as.pfras_packets, sizeof(as.pfras_packets)); + bzero(as.pfras_bytes, sizeof(as.pfras_bytes)); + as.pfras_a.pfra_fback = PFR_FB_NOCOUNT; + } splx(s); as.pfras_tzero = ke->pfrke_tzero; @@ -1888,10 +1900,9 @@ pfr_create_ktable(struct pfr_table *tbl, long tzero, int attachruleset) struct pfr_ktable *kt; struct pf_ruleset *rs; - kt = pool_get(&pfr_ktable_pl, PR_NOWAIT); + kt = pool_get(&pfr_ktable_pl, PR_WAITOK | PR_ZERO | PR_LIMITFAIL); if (kt == NULL) return (NULL); - bzero(kt, sizeof(*kt)); kt->pfrkt_t = *tbl; if (attachruleset) { @@ -2043,9 +2054,15 @@ pfr_update_stats(struct pfr_ktable *kt, struct pf_addr *a, sa_family_t af, } kt->pfrkt_packets[dir_out][op_pass]++; kt->pfrkt_bytes[dir_out][op_pass] += len; - if (ke != NULL && op_pass != PFR_OP_XPASS) { - ke->pfrke_packets[dir_out][op_pass]++; - ke->pfrke_bytes[dir_out][op_pass] += len; + if (ke != NULL && op_pass != PFR_OP_XPASS && + (kt->pfrkt_flags & PFR_TFLAG_COUNTERS)) { + if (ke->pfrke_counters == NULL) + ke->pfrke_counters = pool_get(&pfr_kcounters_pl, + PR_NOWAIT | PR_ZERO); + if (ke->pfrke_counters != NULL) { + ke->pfrke_counters->pfrkc_packets[dir_out][op_pass]++; + ke->pfrke_counters->pfrkc_bytes[dir_out][op_pass] += len; + } } } diff --git a/net/pfvar.h b/net/pfvar.h index 7f2bf5e88ab8..f8103d88976e 100644 --- a/net/pfvar.h +++ b/net/pfvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar.h,v 1.259 2007/12/02 12:08:04 pascoe Exp $ */ +/* $OpenBSD: pfvar.h,v 1.276 2008/07/03 15:46:23 henning Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -69,6 +69,7 @@ enum { PF_CHANGE_NONE, PF_CHANGE_ADD_HEAD, PF_CHANGE_ADD_TAIL, PF_CHANGE_ADD_BEFORE, PF_CHANGE_ADD_AFTER, PF_CHANGE_REMOVE, PF_CHANGE_GET_TICKET }; enum { PF_GET_NONE, PF_GET_CLR_CNTR }; +enum { PF_SK_WIRE, PF_SK_STACK, PF_SK_BOTH }; /* * Note about PFTM_*: real indices into pf_rule.timeout[] come before @@ -328,10 +329,10 @@ struct pfi_dynaddr { ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ - !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ + !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ ((aw)->type == PF_ADDR_RANGE && \ !pf_match_addr_range(&(aw)->v.a.addr, \ - &(aw)->v.a.mask, (x), (af))) || \ + &(aw)->v.a.mask, (x), (af))) || \ ((aw)->type == PF_ADDR_ADDRMASK && \ !PF_AZERO(&(aw)->v.a.mask, (af)) && \ !PF_MATCHA(0, &(aw)->v.a.addr, \ @@ -538,7 +539,8 @@ struct pf_rule { int rtableid; u_int32_t timeout[PFTM_MAX]; - u_int32_t states; + u_int32_t states_cur; + u_int32_t states_tot; u_int32_t max_states; u_int32_t src_nodes; u_int32_t max_src_nodes; @@ -590,12 +592,18 @@ struct pf_rule { u_int8_t rt; u_int8_t return_ttl; u_int8_t tos; + u_int8_t set_tos; u_int8_t anchor_relative; u_int8_t anchor_wildcard; #define PF_FLUSH 0x01 #define PF_FLUSH_GLOBAL 0x02 u_int8_t flush; + + struct { + struct pf_addr addr; + u_int16_t port; + } divert; }; /* rule flags */ @@ -614,9 +622,11 @@ struct pf_rule { #define PFRULE_FRAGDROP 0x0400 /* drop funny fragments */ #define PFRULE_RANDOMID 0x0800 #define PFRULE_REASSEMBLE_TCP 0x1000 +#define PFRULE_SET_TOS 0x2000 /* rule flags again */ #define PFRULE_IFBOUND 0x00010000 /* if-bound */ +#define PFRULE_STATESLOPPY 0x00020000 /* sloppy state tracking */ #define PFSTATE_HIWAT 10000 /* default state table size */ #define PFSTATE_ADAPT_START 6000 /* default adaptive timeout start */ @@ -674,63 +684,64 @@ struct pf_state_host { }; struct pf_state_peer { + struct pf_state_scrub *scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ + u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ - u_int16_t mss; /* Maximum segment size option */ u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ - struct pf_state_scrub *scrub; /* state is scrubbed */ - u_int8_t pad[3]; + u_int8_t pad[1]; }; TAILQ_HEAD(pf_state_queue, pf_state); /* keep synced with struct pf_state_key, used in RB_FIND */ struct pf_state_key_cmp { - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; + struct pf_addr addr[2]; + u_int16_t port[2]; sa_family_t af; u_int8_t proto; - u_int8_t direction; - u_int8_t pad; + u_int8_t pad[2]; }; -TAILQ_HEAD(pf_statelist, pf_state); +struct pf_state_item { + TAILQ_ENTRY(pf_state_item) entry; + struct pf_state *s; +}; + +TAILQ_HEAD(pf_statelisthead, pf_state_item); struct pf_state_key { - struct pf_state_host lan; - struct pf_state_host gwy; - struct pf_state_host ext; + struct pf_addr addr[2]; + u_int16_t port[2]; sa_family_t af; u_int8_t proto; - u_int8_t direction; - u_int8_t pad; + u_int8_t pad[2]; - RB_ENTRY(pf_state_key) entry_lan_ext; - RB_ENTRY(pf_state_key) entry_ext_gwy; - struct pf_statelist states; - u_short refcnt; /* same size as if_index */ + RB_ENTRY(pf_state_key) entry; + struct pf_statelisthead states; + struct pf_state_key *reverse; + struct inpcb *inp; }; - /* keep synced with struct pf_state, used in RB_FIND */ struct pf_state_cmp { - u_int64_t id; - u_int32_t creatorid; - u_int32_t pad; + u_int64_t id; + u_int32_t creatorid; + u_int8_t direction; + u_int8_t pad[3]; }; struct pf_state { u_int64_t id; u_int32_t creatorid; - u_int32_t pad; + u_int8_t direction; + u_int8_t pad[3]; TAILQ_ENTRY(pf_state) entry_list; - TAILQ_ENTRY(pf_state) next; RB_ENTRY(pf_state) entry_id; struct pf_state_peer src; struct pf_state_peer dst; @@ -738,7 +749,7 @@ struct pf_state { union pf_rule_ptr anchor; union pf_rule_ptr nat_rule; struct pf_addr rt_addr; - struct pf_state_key *state_key; + struct pf_state_key *key[2]; /* addresses stack and wire */ struct pfi_kif *kif; struct pfi_kif *rt_kif; struct pf_src_node *src_node; @@ -746,11 +757,13 @@ struct pf_state { u_int64_t packets[2]; u_int64_t bytes[2]; u_int32_t creation; - u_int32_t expire; + u_int32_t expire; u_int32_t pfsync_time; u_int16_t tag; u_int8_t log; - u_int8_t allow_opts; + u_int8_t state_flags; +#define PFSTATE_ALLOWOPTS 0x01 +#define PFSTATE_SLOPPY 0x02 u_int8_t timeout; u_int8_t sync_flags; #define PFSTATE_NOSYNC 0x01 @@ -765,17 +778,11 @@ struct pf_state { struct pfsync_state_scrub { u_int16_t pfss_flags; u_int8_t pfss_ttl; /* stashed TTL */ -#define PFSYNC_SCRUB_FLAG_VALID 0x01 +#define PFSYNC_SCRUB_FLAG_VALID 0x01 u_int8_t scrub_flag; u_int32_t pfss_ts_mod; /* timestamp modulation */ } __packed; -struct pfsync_state_host { - struct pf_addr addr; - u_int16_t port; - u_int16_t pad[3]; -} __packed; - struct pfsync_state_peer { struct pfsync_state_scrub scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ @@ -788,12 +795,15 @@ struct pfsync_state_peer { u_int8_t pad[6]; } __packed; +struct pfsync_state_key { + struct pf_addr addr[2]; + u_int16_t port[2]; +}; + struct pfsync_state { u_int32_t id[2]; char ifname[IFNAMSIZ]; - struct pfsync_state_host lan; - struct pfsync_state_host gwy; - struct pfsync_state_host ext; + struct pfsync_state_key key[2]; struct pfsync_state_peer src; struct pfsync_state_peer dst; struct pf_addr rt_addr; @@ -809,40 +819,41 @@ struct pfsync_state { u_int8_t proto; u_int8_t direction; u_int8_t log; - u_int8_t allow_opts; + u_int8_t state_flags; u_int8_t timeout; u_int8_t sync_flags; u_int8_t updates; } __packed; -#define PFSYNC_FLAG_COMPRESS 0x01 +#define PFSYNC_FLAG_COMPRESS 0x01 #define PFSYNC_FLAG_STALE 0x02 #define PFSYNC_FLAG_SRCNODE 0x04 #define PFSYNC_FLAG_NATSRCNODE 0x08 -/* for copies to/from userland via pf_ioctl() */ -#define pf_state_peer_to_pfsync(s,d) do { \ - (d)->seqlo = (s)->seqlo; \ - (d)->seqhi = (s)->seqhi; \ - (d)->seqdiff = (s)->seqdiff; \ - (d)->max_win = (s)->max_win; \ - (d)->mss = (s)->mss; \ +/* for copies to/from network byte order */ +/* ioctl interface also uses network byte order */ +#define pf_state_peer_hton(s,d) do { \ + (d)->seqlo = htonl((s)->seqlo); \ + (d)->seqhi = htonl((s)->seqhi); \ + (d)->seqdiff = htonl((s)->seqdiff); \ + (d)->max_win = htons((s)->max_win); \ + (d)->mss = htons((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ if ((s)->scrub) { \ (d)->scrub.pfss_flags = \ - (s)->scrub->pfss_flags & PFSS_TIMESTAMP; \ + htons((s)->scrub->pfss_flags & PFSS_TIMESTAMP); \ (d)->scrub.pfss_ttl = (s)->scrub->pfss_ttl; \ - (d)->scrub.pfss_ts_mod = (s)->scrub->pfss_ts_mod; \ + (d)->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);\ (d)->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; \ } \ } while (0) -#define pf_state_peer_from_pfsync(s,d) do { \ - (d)->seqlo = (s)->seqlo; \ - (d)->seqhi = (s)->seqhi; \ - (d)->seqdiff = (s)->seqdiff; \ - (d)->max_win = (s)->max_win; \ +#define pf_state_peer_ntoh(s,d) do { \ + (d)->seqlo = ntohl((s)->seqlo); \ + (d)->seqhi = ntohl((s)->seqhi); \ + (d)->seqdiff = ntohl((s)->seqdiff); \ + (d)->max_win = ntohs((s)->max_win); \ (d)->mss = ntohs((s)->mss); \ (d)->state = (s)->state; \ (d)->wscale = (s)->wscale; \ @@ -851,19 +862,23 @@ struct pfsync_state { (d)->scrub->pfss_flags = \ ntohs((s)->scrub.pfss_flags) & PFSS_TIMESTAMP; \ (d)->scrub->pfss_ttl = (s)->scrub.pfss_ttl; \ - (d)->scrub->pfss_ts_mod = (s)->scrub.pfss_ts_mod; \ + (d)->scrub->pfss_ts_mod = ntohl((s)->scrub.pfss_ts_mod);\ } \ } while (0) -#define pf_state_counter_to_pfsync(s,d) do { \ - d[0] = (s>>32)&0xffffffff; \ - d[1] = s&0xffffffff; \ +#define pf_state_counter_hton(s,d) do { \ + d[0] = htonl((s>>32)&0xffffffff); \ + d[1] = htonl(s&0xffffffff); \ } while (0) -#define pf_state_counter_from_pfsync(s) \ +#define pf_state_counter_from_pfsync(s) \ (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1])) - +#define pf_state_counter_ntoh(s,d) do { \ + d = ntohl(s[0]); \ + d = d<<32; \ + d += ntohl(s[1]); \ +} while (0) TAILQ_HEAD(pf_rulequeue, pf_rule); @@ -910,9 +925,11 @@ RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare); #define PFR_TFLAG_INACTIVE 0x00000008 #define PFR_TFLAG_REFERENCED 0x00000010 #define PFR_TFLAG_REFDANCHOR 0x00000020 -#define PFR_TFLAG_USRMASK 0x00000003 +#define PFR_TFLAG_COUNTERS 0x00000040 +/* Adjust masks below when adding flags. */ +#define PFR_TFLAG_USRMASK 0x00000043 #define PFR_TFLAG_SETMASK 0x0000003C -#define PFR_TFLAG_ALLMASK 0x0000003F +#define PFR_TFLAG_ALLMASK 0x0000007F struct pfr_table { char pfrt_anchor[MAXPATHLEN]; @@ -923,7 +940,7 @@ struct pfr_table { enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, - PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_MAX }; + PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX }; struct pfr_addr { union { @@ -964,13 +981,23 @@ struct pfr_tstats { #define pfrts_name pfrts_t.pfrt_name #define pfrts_flags pfrts_t.pfrt_flags +struct pfr_kcounters { + u_int64_t pfrkc_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; + u_int64_t pfrkc_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; +}; + SLIST_HEAD(pfr_kentryworkq, pfr_kentry); struct pfr_kentry { struct radix_node pfrke_node[2]; union sockaddr_union pfrke_sa; - u_int64_t pfrke_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; - u_int64_t pfrke_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; SLIST_ENTRY(pfr_kentry) pfrke_workq; + union { + + struct pfr_kcounters *pfrke_counters; +#if 0 + struct pfr_kroute *pfrke_route; +#endif + } u; long pfrke_tzero; u_int8_t pfrke_af; u_int8_t pfrke_net; @@ -978,6 +1005,9 @@ struct pfr_kentry { u_int8_t pfrke_mark; u_int8_t pfrke_intrpool; }; +#define pfrke_counters u.pfrke_counters +#define pfrke_route u.pfrke_route + SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); @@ -1006,9 +1036,8 @@ struct pfr_ktable { #define pfrkt_nomatch pfrkt_ts.pfrts_nomatch #define pfrkt_tzero pfrkt_ts.pfrts_tzero -RB_HEAD(pf_state_tree_lan_ext, pf_state_key); -RB_PROTOTYPE(pf_state_tree_lan_ext, pf_state_key, - entry_lan_ext, pf_state_compare_lan_ext); +RB_HEAD(pf_state_tree, pf_state_key); +RB_PROTOTYPE(pf_state_tree, pf_state_key, entry, pf_state_compare_key); RB_HEAD(pf_state_tree_ext_gwy, pf_state_key); RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key, @@ -1017,8 +1046,7 @@ RB_PROTOTYPE(pf_state_tree_ext_gwy, pf_state_key, RB_HEAD(pfi_ifhead, pfi_kif); /* state tables */ -extern struct pf_state_tree_lan_ext pf_statetbl_lan_ext; -extern struct pf_state_tree_ext_gwy pf_statetbl_ext_gwy; +extern struct pf_state_tree pf_statetbl; /* keep synced with pfi_kif, used in RB_FIND */ struct pfi_kif_cmp { @@ -1065,15 +1093,19 @@ struct pf_pdesc { #endif /* INET6 */ void *any; } hdr; - struct pf_addr baddr; /* address before translation */ - struct pf_addr naddr; /* address after translation */ + struct pf_rule *nat_rule; /* nat/rdr rule applied to packet */ - struct pf_addr *src; - struct pf_addr *dst; struct ether_header *eh; - u_int16_t *ip_sum; + struct pf_addr *src; /* src address */ + struct pf_addr *dst; /* dst address */ + u_int16_t *sport; + u_int16_t *dport; + u_int32_t p_len; /* total length of payload */ + + u_int16_t *ip_sum; + u_int16_t *proto_sum; u_int16_t flags; /* Let SCRUB trigger behavior in * state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ @@ -1081,6 +1113,9 @@ struct pf_pdesc { sa_family_t af; u_int8_t proto; u_int8_t tos; + u_int8_t dir; /* direction */ + u_int8_t sidx; /* key index for source */ + u_int8_t didx; /* key index for destination */ }; /* flags for RDR options */ @@ -1282,6 +1317,14 @@ struct pf_tagname { int ref; }; +struct pf_divert { + union { + struct in_addr ipv4; + struct in6_addr ipv6; + } addr; + u_int16_t port; +}; + #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ #define PFFRAG_FRAG_HIWAT 1000 /* Number of fragmented packets */ #define PFFRAG_FRCENT_HIWAT 50000 /* Number of fragment cache entries */ @@ -1336,19 +1379,21 @@ struct pfioc_state { }; struct pfioc_src_node_kill { - /* XXX returns the number of src nodes killed in psnk_af */ sa_family_t psnk_af; struct pf_rule_addr psnk_src; struct pf_rule_addr psnk_dst; + u_int psnk_killed; }; struct pfioc_state_kill { - /* XXX returns the number of states killed in psk_af */ + struct pf_state_cmp psk_pfcmp; sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; char psk_ifname[IFNAMSIZ]; + char psk_label[PF_RULE_LABEL_SIZE]; + u_int psk_killed; }; struct pfioc_states { @@ -1562,15 +1607,17 @@ extern void pf_tbladdr_remove(struct pf_addr_wrap *); extern void pf_tbladdr_copyout(struct pf_addr_wrap *); extern void pf_calc_skip_steps(struct pf_rulequeue *); extern struct pool pf_src_tree_pl, pf_rule_pl; -extern struct pool pf_state_pl, pf_state_key_pl, pf_altq_pl, - pf_pooladdr_pl; +extern struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl, + pf_altq_pl, pf_pooladdr_pl; extern struct pool pf_state_scrub_pl; extern void pf_purge_thread(void *); extern void pf_purge_expired_src_nodes(int); extern void pf_purge_expired_states(u_int32_t); extern void pf_unlink_state(struct pf_state *); extern void pf_free_state(struct pf_state *); -extern int pf_insert_state(struct pfi_kif *, +extern int pf_state_insert(struct pfi_kif *, + struct pf_state_key *, + struct pf_state_key *, struct pf_state *); extern int pf_insert_src_node(struct pf_src_node **, struct pf_rule *, struct pf_addr *, @@ -1590,6 +1637,7 @@ extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, u_int8_t); void pf_rm_rule(struct pf_rulequeue *, struct pf_rule *); +struct pf_divert *pf_find_divert(struct mbuf *); #ifdef INET int pf_test(int, struct ifnet *, struct mbuf **, struct ether_header *); @@ -1636,8 +1684,8 @@ void pf_purge_expired_fragments(void); int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *); int pf_rtlabel_match(struct pf_addr *, sa_family_t, struct pf_addr_wrap *); int pf_socket_lookup(int, struct pf_pdesc *); -struct pf_state_key * - pf_alloc_state_key(struct pf_state *); +struct pf_state_key *pf_alloc_state_key(int); +int pf_state_key_attach(struct pf_state_key *, struct pf_state *, int); void pfr_initialize(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, @@ -1697,6 +1745,7 @@ int pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); +int pf_match_tag(struct mbuf *, struct pf_rule *, int *); u_int16_t pf_tagname2tag(char *); void pf_tag2tagname(u_int16_t, char *); void pf_tag_ref(u_int16_t); |