aboutsummaryrefslogtreecommitdiff
path: root/sys/netinet
diff options
context:
space:
mode:
authorGleb Smirnoff <glebius@FreeBSD.org>2011-12-20 13:53:31 +0000
committerGleb Smirnoff <glebius@FreeBSD.org>2011-12-20 13:53:31 +0000
commitf08535f8727db100757115023ae19d0bc1eac194 (patch)
tree5763c925081c47070f96599b2dbbbeee072455c2 /sys/netinet
parent73889c808a4d1867f75a87d7e8d78e66120356e8 (diff)
Restore a feature that was present in 5.x and 6.x, and was cleared in
7.x, 8.x and 9.x with pf(4) imports: pfsync(4) should suppress CARP preemption, while it is running its bulk update. However, reimplement the feature in more elegant manner, that is partially inspired by newer OpenBSD: - Rename term "suppression" to "demotion", to match with OpenBSD. - Keep a global demotion factor, that can be raised by several conditions, for now these are: - interface goes down - carp(4) has problems with ip_output() or ip6_output() - pfsync performs bulk update - Unlike in OpenBSD the demotion factor isn't a counter, but is actual value added to advskew. The adjustment values for particular error conditions are also configurable, and their defaults are maximum advskew value, so a single failure bumps demotion to maximum. This is for POLA compatibility, and should satisfy most users. - Demotion factor is a writable sysctl, so user can do foot shooting, if he desires to.
Notes
Notes: svn path=/head/; revision=228736
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/ip_carp.c126
-rw-r--r--sys/netinet/ip_carp.h19
2 files changed, 72 insertions, 73 deletions
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index 2875537228d5..a5e0cb7882d4 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
+#include <sys/taskqueue.h>
#include <net/ethernet.h>
#include <net/fddi.h>
@@ -185,22 +186,30 @@ static int proto_reg[] = {-1, -1};
* dereferencing our function pointers.
*/
-int carp_suppress_preempt = 0;
-int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, };
+static int carp_allow = 1; /* Accept incoming CARP packets. */
+static int carp_preempt = 0; /* Preempt slower nodes. */
+static int carp_log = 1; /* Log level. */
+static int carp_demotion = 0; /* Global advskew demotion. */
+static int carp_senderr_adj = CARP_MAXSKEW; /* Send error demotion factor */
+static int carp_ifdown_adj = CARP_MAXSKEW; /* Iface down demotion factor */
+
SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
-SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
- &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
-SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
- &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
-SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
- &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
-SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
- &carp_suppress_preempt, 0, "Preemption is suppressed");
-
-struct carpstats carpstats;
-SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
- &carpstats, carpstats,
- "CARP statistics (struct carpstats, netinet/ip_carp.h)");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, &carp_allow, 0,
+ "Accept incoming CARP packets");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, &carp_preempt, 0,
+ "High-priority backup preemption mode");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, &carp_log, 0,
+ "CARP log level");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, demotion, CTLFLAG_RW, &carp_demotion, 0,
+ "Demotion factor (skew of advskew)");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
+ &carp_senderr_adj, 0, "Send error demotion factor adjustment");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
+ &carp_ifdown_adj, 0, "Interface down demotion factor adjustment");
+
+static struct carpstats carpstats;
+SYSCTL_STRUCT(_net_inet_carp, OID_AUTO, stats, CTLFLAG_RW, &carpstats,
+ carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
#define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \
NULL, MTX_DEF)
@@ -216,12 +225,12 @@ SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
#define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx)
#define CARP_LOG(...) do { \
- if (carp_opts[CARPCTL_LOG] > 0) \
+ if (carp_log > 0) \
log(LOG_INFO, "carp: " __VA_ARGS__); \
} while (0)
#define CARP_DEBUG(...) do { \
- if (carp_opts[CARPCTL_LOG] > 1) \
+ if (carp_log > 1) \
log(LOG_DEBUG, __VA_ARGS__); \
} while (0)
@@ -241,6 +250,10 @@ SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
CIF_LOCK_ASSERT(ifp->if_carp); \
TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
+#define DEMOTE_ADVSKEW(sc) \
+ (((sc)->sc_advskew + carp_demotion > CARP_MAXSKEW) ? \
+ CARP_MAXSKEW : ((sc)->sc_advskew + carp_demotion))
+
static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
static struct carp_softc
*carp_alloc(struct ifnet *);
@@ -257,9 +270,13 @@ static void carp_send_ad(void *);
static void carp_send_ad_locked(struct carp_softc *);
static void carp_addroute(struct carp_softc *);
static void carp_delroute(struct carp_softc *);
+static void carp_send_ad_all(void *, int);
+static void carp_demote_adj(int, char *);
static LIST_HEAD(, carp_softc) carp_list;
static struct mtx carp_mtx;
+static struct task carp_sendall_task =
+ TASK_INITIALIZER(0, carp_send_ad_all, NULL);
static __inline uint16_t
carp_cksum(struct mbuf *m, int len)
@@ -390,7 +407,7 @@ carp_input(struct mbuf *m, int hlen)
CARPSTATS_INC(carps_ipackets);
- if (!carp_opts[CARPCTL_ALLOW]) {
+ if (!carp_allow) {
m_freem(m);
return;
}
@@ -473,7 +490,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
CARPSTATS_INC(carps_ipackets6);
- if (!carp_opts[CARPCTL_ALLOW]) {
+ if (!carp_allow) {
m_freem(m);
return (IPPROTO_DONE);
}
@@ -578,10 +595,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
sc->sc_counter = tmp_counter;
sc_tv.tv_sec = sc->sc_advbase;
- if (carp_suppress_preempt && sc->sc_advskew < 240)
- sc_tv.tv_usec = 240 * 1000000 / 256;
- else
- sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
ch_tv.tv_sec = ch->carp_advbase;
ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
@@ -610,8 +624,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
* If we're pre-empting masters who advertise slower than us,
* and this one claims to be slower, treat him as down.
*/
- if (carp_opts[CARPCTL_PREEMPT] &&
- timevalcmp(&sc_tv, &ch_tv, <)) {
+ if (carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
"(preempting a slower master)\n",
sc->sc_vhid,
@@ -679,26 +692,23 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
return (0);
}
+/*
+ * To avoid LORs and possible recursions this function shouldn't
+ * be called directly, but scheduled via taskqueue.
+ */
static void
-carp_send_ad_all(struct carp_softc *badsc)
+carp_send_ad_all(void *ctx __unused, int pending __unused)
{
struct carp_softc *sc;
- /*
- * Avoid LOR and recursive call to carp_send_ad_locked().
- */
- CARP_UNLOCK(badsc);
-
mtx_lock(&carp_mtx);
LIST_FOREACH(sc, &carp_list, sc_next)
- if (sc != badsc && sc->sc_state == MASTER) {
+ if (sc->sc_state == MASTER) {
CARP_LOCK(sc);
carp_send_ad_locked(sc);
CARP_UNLOCK(sc);
}
mtx_unlock(&carp_mtx);
-
- CARP_LOCK(badsc);
}
static void
@@ -724,10 +734,7 @@ carp_send_ad_locked(struct carp_softc *sc)
CARP_LOCK_ASSERT(sc);
- if (!carp_suppress_preempt || sc->sc_advskew > 240)
- advskew = sc->sc_advskew;
- else
- advskew = 240;
+ advskew = DEMOTE_ADVSKEW(sc);
tv.tv_sec = sc->sc_advbase;
tv.tv_usec = advskew * 1000000 / 256;
@@ -797,17 +804,15 @@ carp_send_ad_locked(struct carp_softc *sc)
&sc->sc_carpdev->if_carp->cif_imo, NULL)) {
if (sc->sc_sendad_errors < INT_MAX)
sc->sc_sendad_errors++;
- if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
- carp_suppress_preempt++;
- if (carp_suppress_preempt == 1)
- carp_send_ad_all(sc);
- }
+ if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS)
+ carp_demote_adj(carp_senderr_adj, "send error");
sc->sc_sendad_success = 0;
} else {
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
if (++sc->sc_sendad_success >=
CARP_SENDAD_MIN_SUCCESS) {
- carp_suppress_preempt--;
+ carp_demote_adj(-carp_senderr_adj,
+ "send ok");
sc->sc_sendad_errors = 0;
}
} else
@@ -875,17 +880,16 @@ carp_send_ad_locked(struct carp_softc *sc)
&sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)) {
if (sc->sc_sendad_errors < INT_MAX)
sc->sc_sendad_errors++;
- if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
- carp_suppress_preempt++;
- if (carp_suppress_preempt == 1)
- carp_send_ad_all(sc);
- }
+ if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS)
+ carp_demote_adj(carp_senderr_adj,
+ "send6 error");
sc->sc_sendad_success = 0;
} else {
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
if (++sc->sc_sendad_success >=
CARP_SENDAD_MIN_SUCCESS) {
- carp_suppress_preempt--;
+ carp_demote_adj(-carp_senderr_adj,
+ "send6 ok");
sc->sc_sendad_errors = 0;
}
} else
@@ -1479,6 +1483,8 @@ carp_destroy(struct carp_softc *sc)
mtx_unlock(&carp_mtx);
CARP_LOCK(sc);
+ if (sc->sc_suppress)
+ carp_demote_adj(-carp_ifdown_adj, "vhid removed");
callout_drain(&sc->sc_ad_tmo);
#ifdef INET
callout_drain(&sc->sc_md_tmo);
@@ -1914,21 +1920,25 @@ carp_sc_state(struct carp_softc *sc)
#endif
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
- if (!sc->sc_suppress) {
- carp_suppress_preempt++;
- if (carp_suppress_preempt == 1)
- carp_send_ad_all(sc);
- }
+ if (!sc->sc_suppress)
+ carp_demote_adj(carp_ifdown_adj, "interface down");
sc->sc_suppress = 1;
} else {
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
if (sc->sc_suppress)
- carp_suppress_preempt--;
+ carp_demote_adj(-carp_ifdown_adj, "interface up");
sc->sc_suppress = 0;
}
}
+static void
+carp_demote_adj(int adj, char *reason)
+{
+ carp_demotion += adj;
+ CARP_LOG("demoted by %d to %d (%s)\n", adj, carp_demotion, reason);
+ taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
+}
#ifdef INET
extern struct domain inetdomain;
@@ -1986,6 +1996,9 @@ carp_mod_cleanup(void)
carp_linkstate_p = NULL;
carp_forus_p = NULL;
carp_output_p = NULL;
+ carp_demote_adj_p = NULL;
+ mtx_unlock(&carp_mtx);
+ taskqueue_drain(taskqueue_swi, &carp_sendall_task);
mtx_destroy(&carp_mtx);
}
@@ -2003,6 +2016,7 @@ carp_mod_load(void)
carp_ioctl_p = carp_ioctl;
carp_attach_p = carp_attach;
carp_detach_p = carp_detach;
+ carp_demote_adj_p = carp_demote_adj;
#ifdef INET6
carp_iamatch6_p = carp_iamatch6;
carp_macmatch6_p = carp_macmatch6;
diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h
index d8b82a8d00fc..7be91c015e6e 100644
--- a/sys/netinet/ip_carp.h
+++ b/sys/netinet/ip_carp.h
@@ -133,29 +133,13 @@ struct carpreq {
#define CARP_STATES "INIT", "BACKUP", "MASTER"
#define CARP_MAXSTATE 2
int carpr_advskew;
+#define CARP_MAXSKEW 240
int carpr_advbase;
unsigned char carpr_key[CARP_KEY_LEN];
};
#define SIOCSVH _IOWR('i', 245, struct ifreq)
#define SIOCGVH _IOWR('i', 246, struct ifreq)
-/*
- * Names for CARP sysctl objects
- */
-#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */
-#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */
-#define CARPCTL_LOG 3 /* log bad packets */
-#define CARPCTL_STATS 4 /* statistics (read-only) */
-#define CARPCTL_MAXID 5
-
-#define CARPCTL_NAMES { \
- { 0, 0 }, \
- { "allow", CTLTYPE_INT }, \
- { "preempt", CTLTYPE_INT }, \
- { "log", CTLTYPE_INT }, \
- { "stats", CTLTYPE_STRUCT }, \
-}
-
#ifdef _KERNEL
int carp_ioctl(struct ifreq *, u_long, struct thread *);
int carp_attach(struct ifaddr *, int);
@@ -175,6 +159,7 @@ extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
extern int (*carp_attach_p)(struct ifaddr *, int);
extern void (*carp_detach_p)(struct ifaddr *);
extern void (*carp_linkstate_p)(struct ifnet *);
+extern void (*carp_demote_adj_p)(int, char *);
/* net/if_bridge.c net/if_ethersubr.c */
extern int (*carp_forus_p)(struct ifnet *, u_char *);
/* net/if_ethersubr.c */