diff options
Diffstat (limited to 'lib/kadm5/ipropd_slave.c')
-rw-r--r-- | lib/kadm5/ipropd_slave.c | 725 |
1 files changed, 519 insertions, 206 deletions
diff --git a/lib/kadm5/ipropd_slave.c b/lib/kadm5/ipropd_slave.c index 38e9a7b7108c..868dc35cbb28 100644 --- a/lib/kadm5/ipropd_slave.c +++ b/lib/kadm5/ipropd_slave.c @@ -37,6 +37,8 @@ RCSID("$Id$"); static const char *config_name = "ipropd-slave"; +static int verbose; + static krb5_log_facility *log_facility; static char five_min[] = "5 min"; static char *server_time_lost = five_min; @@ -51,9 +53,10 @@ connect_to_master (krb5_context context, const char *master, struct addrinfo *ai, *a; struct addrinfo hints; int error; + int one = 1; int s = -1; - memset (&hints, 0, sizeof(hints)); + memset(&hints, 0, sizeof(hints)); hints.ai_socktype = SOCK_STREAM; if (port_str == NULL) { @@ -61,7 +64,7 @@ connect_to_master (krb5_context context, const char *master, port_str = port; } - error = getaddrinfo (master, port_str, &hints, &ai); + error = getaddrinfo(master, port_str, &hints, &ai); if (error) { krb5_warnx(context, "Failed to get address of to %s: %s", master, gai_strerror(error)); @@ -75,24 +78,27 @@ connect_to_master (krb5_context context, const char *master, if (error) strlcpy(node, "[unknown-addr]", sizeof(node)); - s = socket (a->ai_family, a->ai_socktype, a->ai_protocol); + s = socket(a->ai_family, a->ai_socktype, a->ai_protocol); if (s < 0) continue; - if (connect (s, a->ai_addr, a->ai_addrlen) < 0) { + if (connect(s, a->ai_addr, a->ai_addrlen) < 0) { krb5_warn(context, errno, "connection failed to %s[%s]", master, node); - close (s); + close(s); continue; } krb5_warnx(context, "connection successful " "to master: %s[%s]", master, node); break; } - freeaddrinfo (ai); + freeaddrinfo(ai); if (a == NULL) return -1; + if (setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)) < 0) + krb5_warn(context, errno, "setsockopt(SO_KEEPALIVE) failed"); + return s; } @@ -107,6 +113,7 @@ get_creds(krb5_context context, const char *keytab_str, krb5_creds creds; char *server; char keytab_buf[256]; + int aret; if (keytab_str == NULL) { ret = krb5_kt_default_name (context, keytab_buf, sizeof(keytab_buf)); @@ -127,8 +134,8 @@ get_creds(krb5_context context, const char *keytab_str, ret = krb5_get_init_creds_opt_alloc(context, &init_opts); if (ret) krb5_err(context, 1, ret, "krb5_get_init_creds_opt_alloc"); - asprintf (&server, "%s/%s", IPROP_NAME, serverhost); - if (server == NULL) + aret = asprintf (&server, "%s/%s", IPROP_NAME, serverhost); + if (aret == -1 || server == NULL) krb5_errx (context, 1, "malloc: no memory"); ret = krb5_get_init_creds_keytab(context, &creds, client, keytab, @@ -143,7 +150,7 @@ get_creds(krb5_context context, const char *keytab_str, ret = krb5_cc_new_unique(context, krb5_cc_type_memory, NULL, cache); if(ret) krb5_err(context, 1, ret, "krb5_cc_new_unique"); - ret = krb5_cc_initialize(context, *cache, client); + ret = krb5_cc_initialize(context, *cache, creds.client); if(ret) krb5_err(context, 1, ret, "krb5_cc_initialize"); ret = krb5_cc_store_cred(context, *cache, &creds); @@ -154,200 +161,331 @@ get_creds(krb5_context context, const char *keytab_str, } static krb5_error_code -ihave (krb5_context context, krb5_auth_context auth_context, - int fd, uint32_t version) +ihave(krb5_context context, krb5_auth_context auth_context, + int fd, uint32_t version) { int ret; u_char buf[8]; krb5_storage *sp; krb5_data data; - sp = krb5_storage_from_mem (buf, 8); - krb5_store_int32 (sp, I_HAVE); - krb5_store_int32 (sp, version); - krb5_storage_free (sp); + sp = krb5_storage_from_mem(buf, 8); + ret = krb5_store_uint32(sp, I_HAVE); + if (ret == 0) + ret = krb5_store_uint32(sp, version); + krb5_storage_free(sp); data.length = 8; data.data = buf; - ret = krb5_write_priv_message(context, auth_context, &fd, &data); - if (ret) - krb5_warn (context, ret, "krb5_write_message"); + if (ret == 0) { + if (verbose) + krb5_warnx(context, "telling master we are at %u", version); + + ret = krb5_write_priv_message(context, auth_context, &fd, &data); + if (ret) + krb5_warn(context, ret, "krb5_write_message"); + } return ret; } -static void +#ifndef EDQUOT +/* There's no EDQUOT on WIN32, for example */ +#define EDQUOT ENOSPC +#endif + +static int +append_to_log_file(krb5_context context, + kadm5_server_context *server_context, + krb5_storage *sp, off_t start, ssize_t slen) +{ + size_t len; + ssize_t sret; + off_t log_off; + int ret, ret2; + void *buf; + + if (verbose) + krb5_warnx(context, "appending diffs to log"); + + if (slen == 0) + return 0; + if (slen < 0) + return EINVAL; + len = slen; + if (len != slen) + return EOVERFLOW; + + buf = malloc(len); + if (buf == NULL && len != 0) { + krb5_warn(context, errno, "malloc: no memory"); + return ENOMEM; + } + + if (krb5_storage_seek(sp, start, SEEK_SET) != start) { + krb5_errx(context, IPROPD_RESTART, + "krb5_storage_seek() failed"); /* can't happen */ + } + sret = krb5_storage_read(sp, buf, len); + if (sret < 0) + return errno; + if (len != (size_t)sret) { + /* Can't happen */ + krb5_errx(context, IPROPD_RESTART, + "short krb5_storage_read() from memory buffer"); + } + log_off = lseek(server_context->log_context.log_fd, 0, SEEK_CUR); + if (log_off == -1) + return errno; + + /* + * Use net_write() so we get an errno if less that len bytes were + * written. + */ + sret = net_write(server_context->log_context.log_fd, buf, len); + free(buf); + if (sret != slen) + ret = errno; + else + ret = fsync(server_context->log_context.log_fd); + if (ret == 0) + return 0; + + /* + * Attempt to recover from this. First, truncate the log file + * and reset the fd offset. Failure to do this -> unlink the + * log file and re-create it. Since we're the slave, we ought to be + * able to recover from the log being unlinked... + */ + if (ftruncate(server_context->log_context.log_fd, log_off) == -1 || + lseek(server_context->log_context.log_fd, log_off, SEEK_SET) == -1) { + (void) kadm5_log_end(server_context); + if (unlink(server_context->log_context.log_file) == -1) { + krb5_err(context, IPROPD_FATAL, errno, + "Failed to recover from failure to write log " + "entries from master to disk"); + } + ret2 = kadm5_log_init(server_context); + if (ret2) { + krb5_err(context, IPROPD_RESTART_SLOW, ret2, + "Failed to initialize log to recover from " + "failure to write log entries from master to disk"); + } + } + if (ret == ENOSPC || ret == EDQUOT || ret == EFBIG) { + /* Unlink the file in these cases. */ + krb5_warn(context, IPROPD_RESTART_SLOW, + "Failed to write log entries from master to disk"); + (void) kadm5_log_end(server_context); + if (unlink(server_context->log_context.log_file) == -1) { + krb5_err(context, IPROPD_FATAL, errno, + "Failed to recover from failure to write log " + "entries from master to disk"); + } + ret2 = kadm5_log_init(server_context); + if (ret2) { + krb5_err(context, IPROPD_RESTART_SLOW, ret2, + "Failed to initialize log to recover from " + "failure to write log entries from master to disk"); + } + return ret; + } + /* + * All other errors we treat as fatal here. This includes, for + * example, EIO and EPIPE (sorry, can't log to pipes nor sockets). + */ + krb5_err(context, IPROPD_FATAL, ret, + "Failed to write log entries from master to disk"); +} + +static int receive_loop (krb5_context context, krb5_storage *sp, kadm5_server_context *server_context) { int ret; - off_t left, right; - void *buf; - int32_t vers, vers2; - ssize_t sret; + off_t left, right, off; + uint32_t len, vers; + + if (verbose) + krb5_warnx(context, "receiving diffs"); /* - * Seek to the current version of the local database. + * Seek to the first entry in the message from the master that is + * past the current version of the local database. */ do { - int32_t len, timestamp, tmp; - enum kadm_ops op; - - if(krb5_ret_int32 (sp, &vers) != 0) - return; - krb5_ret_int32 (sp, ×tamp); - krb5_ret_int32 (sp, &tmp); - op = tmp; - krb5_ret_int32 (sp, &len); - if ((uint32_t)vers <= server_context->log_context.version) - krb5_storage_seek(sp, len + 8, SEEK_CUR); - } while((uint32_t)vers <= server_context->log_context.version); + uint32_t timestamp; + uint32_t op; + + if ((ret = krb5_ret_uint32(sp, &vers)) == HEIM_ERR_EOF) { + krb5_warnx(context, "master sent no new iprop entries"); + return 0; + } + + /* + * TODO We could do more to validate the entries from the master + * here. And we could use/reuse more kadm5_log_*() code here. + * + * Alternatively we should trust that the master sent us exactly + * what we needed and just write this to the log file and let + * kadm5_log_recover() do the rest. + */ + if (ret || krb5_ret_uint32(sp, ×tamp) != 0 || + krb5_ret_uint32(sp, &op) != 0 || + krb5_ret_uint32(sp, &len) != 0) { + + /* + * This shouldn't happen. Reconnecting probably won't help + * if it does happen, but by reconnecting we get a chance to + * connect to a new master if a new one is configured. + */ + krb5_warnx(context, "iprop entries from master were truncated"); + return EINVAL; + } + if (vers > server_context->log_context.version) { + break; + } + off = krb5_storage_seek(sp, 0, SEEK_CUR); + if (krb5_storage_seek(sp, len + 8, SEEK_CUR) != off + len + 8) { + krb5_warnx(context, "iprop entries from master were truncated"); + return EINVAL; + } + if (verbose) { + krb5_warnx(context, "diff contains old log record version " + "%u %lld %u length %u", + vers, (long long)timestamp, op, len); + } + } while(vers <= server_context->log_context.version); /* - * Read up rest of the entires into the memory... + * Read the remaining entries into memory... */ - left = krb5_storage_seek (sp, -16, SEEK_CUR); - right = krb5_storage_seek (sp, 0, SEEK_END); - buf = malloc (right - left); - if (buf == NULL && (right - left) != 0) - krb5_errx (context, 1, "malloc: no memory"); + /* SEEK_CUR is a header into the first entry we care about */ + left = krb5_storage_seek(sp, -16, SEEK_CUR); + right = krb5_storage_seek(sp, 0, SEEK_END); + if (right - left < 24 + len) { + krb5_warnx(context, "iprop entries from master were truncated"); + return EINVAL; + } /* * ...and then write them out to the on-disk log. */ - krb5_storage_seek (sp, left, SEEK_SET); - krb5_storage_read (sp, buf, right - left); - sret = write (server_context->log_context.log_fd, buf, right-left); - if (sret != right - left) - krb5_err(context, 1, errno, "Failed to write log to disk"); - ret = fsync (server_context->log_context.log_fd); + + ret = append_to_log_file(context, server_context, sp, left, right - left); if (ret) - krb5_err(context, 1, errno, "Failed to sync log to disk"); - free (buf); + return ret; /* - * Go back to the startpoint and start to commit the entires to - * the database. + * Replay the new entries. */ - krb5_storage_seek (sp, left, SEEK_SET); - - for(;;) { - int32_t len, len2, timestamp, tmp; - off_t cur, cur2; - enum kadm_ops op; - - if(krb5_ret_int32 (sp, &vers) != 0) - break; - ret = krb5_ret_int32 (sp, ×tamp); - if (ret) krb5_errx(context, 1, "entry %ld: too short", (long)vers); - ret = krb5_ret_int32 (sp, &tmp); - if (ret) krb5_errx(context, 1, "entry %ld: too short", (long)vers); - op = tmp; - ret = krb5_ret_int32 (sp, &len); - if (ret) krb5_errx(context, 1, "entry %ld: too short", (long)vers); - if (len < 0) - krb5_errx(context, 1, "log is corrupted, " - "negative length of entry version %ld: %ld", - (long)vers, (long)len); - cur = krb5_storage_seek(sp, 0, SEEK_CUR); - - krb5_warnx (context, "replaying entry %d", (int)vers); - - ret = kadm5_log_replay (server_context, - op, vers, len, sp); - if (ret) { - const char *s = krb5_get_error_message(server_context->context, ret); - krb5_warnx (context, - "kadm5_log_replay: %ld. Lost entry entry, " - "Database out of sync ?: %s (%d)", - (long)vers, s ? s : "unknown error", ret); - krb5_free_error_message(context, s); - } - - { - /* - * Make sure the krb5_log_replay does the right thing wrt - * reading out data from the sp. - */ - cur2 = krb5_storage_seek(sp, 0, SEEK_CUR); - if (cur + len != cur2) - krb5_errx(context, 1, - "kadm5_log_reply version: %ld didn't read the whole entry", - (long)vers); - } - - if (krb5_ret_int32 (sp, &len2) != 0) - krb5_errx(context, 1, "entry %ld: postamble too short", (long)vers); - if(krb5_ret_int32 (sp, &vers2) != 0) - krb5_errx(context, 1, "entry %ld: postamble too short", (long)vers); + if (verbose) + krb5_warnx(context, "replaying entries from master"); + ret = kadm5_log_recover(server_context, kadm_recover_replay); + if (ret) { + krb5_warn(context, ret, "replay failed"); + return ret; + } - if (len != len2) - krb5_errx(context, 1, "entry %ld: len != len2", (long)vers); - if (vers != vers2) - krb5_errx(context, 1, "entry %ld: vers != vers2", (long)vers); + ret = kadm5_log_get_version(server_context, &vers); + if (ret) { + krb5_warn(context, ret, + "could not get log version after applying diffs!"); + return ret; } + if (verbose) + krb5_warnx(context, "slave at version %u", vers); - /* - * Update version - */ + if (vers != server_context->log_context.version) { + krb5_warnx(context, "slave's log_context version (%u) is " + "inconsistent with log's version (%u)", + server_context->log_context.version, vers); + } - server_context->log_context.version = vers; + return 0; } -static void -receive (krb5_context context, - krb5_storage *sp, - kadm5_server_context *server_context) +static int +receive(krb5_context context, + krb5_storage *sp, + kadm5_server_context *server_context) { - int ret; + krb5_error_code ret, ret2; ret = server_context->db->hdb_open(context, server_context->db, O_RDWR | O_CREAT, 0600); if (ret) - krb5_err (context, 1, ret, "db->open"); + krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->open"); - receive_loop (context, sp, server_context); + ret2 = receive_loop(context, sp, server_context); + if (ret2) + krb5_warn(context, ret2, "receive from ipropd-master had errors"); - ret = server_context->db->hdb_close (context, server_context->db); + ret = server_context->db->hdb_close(context, server_context->db); if (ret) - krb5_err (context, 1, ret, "db->close"); + krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->close"); + + return ret2; } static void -send_im_here (krb5_context context, int fd, - krb5_auth_context auth_context) +send_im_here(krb5_context context, int fd, + krb5_auth_context auth_context) { krb5_storage *sp; krb5_data data; - int ret; + krb5_error_code ret; - ret = krb5_data_alloc (&data, 4); + ret = krb5_data_alloc(&data, 4); if (ret) - krb5_err (context, 1, ret, "send_im_here"); + krb5_err(context, IPROPD_RESTART, ret, "send_im_here"); sp = krb5_storage_from_data (&data); if (sp == NULL) - krb5_errx (context, 1, "krb5_storage_from_data"); - krb5_store_int32(sp, I_AM_HERE); + krb5_errx(context, IPROPD_RESTART, "krb5_storage_from_data"); + ret = krb5_store_uint32(sp, I_AM_HERE); krb5_storage_free(sp); - ret = krb5_write_priv_message(context, auth_context, &fd, &data); - krb5_data_free(&data); + if (ret == 0) { + ret = krb5_write_priv_message(context, auth_context, &fd, &data); + krb5_data_free(&data); + + if (ret) + krb5_err(context, IPROPD_RESTART, ret, "krb5_write_priv_message"); + + if (verbose) + krb5_warnx(context, "pinged master"); + } + + return; +} +static void +reinit_log(krb5_context context, + kadm5_server_context *server_context, + uint32_t vno) +{ + krb5_error_code ret; + + if (verbose) + krb5_warnx(context, "truncating log on slave"); + + ret = kadm5_log_reinit(server_context, vno); if (ret) - krb5_err (context, 1, ret, "krb5_write_priv_message"); + krb5_err(context, IPROPD_RESTART_SLOW, ret, "kadm5_log_reinit"); } + static krb5_error_code -receive_everything (krb5_context context, int fd, - kadm5_server_context *server_context, - krb5_auth_context auth_context) +receive_everything(krb5_context context, int fd, + kadm5_server_context *server_context, + krb5_auth_context auth_context) { int ret; krb5_data data; - int32_t vno = 0; - int32_t opcode; + uint32_t vno = 0; + uint32_t opcode; krb5_storage *sp; char *dbname; @@ -355,36 +493,39 @@ receive_everything (krb5_context context, int fd, krb5_warnx(context, "receive complete database"); - asprintf(&dbname, "%s-NEW", server_context->db->hdb_name); + ret = asprintf(&dbname, "%s-NEW", server_context->db->hdb_name); + if (ret == -1) + krb5_err(context, IPROPD_RESTART, ENOMEM, "asprintf"); ret = hdb_create(context, &mydb, dbname); if(ret) - krb5_err(context,1, ret, "hdb_create"); + krb5_err(context, IPROPD_RESTART, ret, "hdb_create"); free(dbname); - ret = hdb_set_master_keyfile (context, - mydb, server_context->config.stash_file); + ret = hdb_set_master_keyfile(context, + mydb, server_context->config.stash_file); if(ret) - krb5_err(context,1, ret, "hdb_set_master_keyfile"); + krb5_err(context, IPROPD_RESTART, ret, "hdb_set_master_keyfile"); /* I really want to use O_EXCL here, but given that I can't easily clean up on error, I won't */ ret = mydb->hdb_open(context, mydb, O_RDWR | O_CREAT | O_TRUNC, 0600); if (ret) - krb5_err (context, 1, ret, "db->open"); + krb5_err(context, IPROPD_RESTART, ret, "db->open"); sp = NULL; + krb5_data_zero(&data); do { ret = krb5_read_priv_message(context, auth_context, &fd, &data); if (ret) { - krb5_warn (context, ret, "krb5_read_priv_message"); + krb5_warn(context, ret, "krb5_read_priv_message"); goto cleanup; } - sp = krb5_storage_from_data (&data); + sp = krb5_storage_from_data(&data); if (sp == NULL) - krb5_errx (context, 1, "krb5_storage_from_data"); - krb5_ret_int32 (sp, &opcode); + krb5_errx(context, IPROPD_RESTART, "krb5_storage_from_data"); + krb5_ret_uint32(sp, &opcode); if (opcode == ONE_PRINC) { krb5_data fake_data; hdb_entry_ex entry; @@ -396,69 +537,116 @@ receive_everything (krb5_context context, int fd, memset(&entry, 0, sizeof(entry)); - ret = hdb_value2entry (context, &fake_data, &entry.entry); + ret = hdb_value2entry(context, &fake_data, &entry.entry); if (ret) - krb5_err (context, 1, ret, "hdb_value2entry"); + krb5_err(context, IPROPD_RESTART, ret, "hdb_value2entry"); ret = mydb->hdb_store(server_context->context, mydb, 0, &entry); if (ret) - krb5_err (context, 1, ret, "hdb_store"); + krb5_err(context, IPROPD_RESTART_SLOW, ret, "hdb_store"); - hdb_free_entry (context, &entry); - krb5_data_free (&data); + hdb_free_entry(context, &entry); + krb5_data_free(&data); } else if (opcode == NOW_YOU_HAVE) ; else - krb5_errx (context, 1, "strange opcode %d", opcode); + krb5_errx(context, 1, "strange opcode %d", opcode); } while (opcode == ONE_PRINC); if (opcode != NOW_YOU_HAVE) - krb5_errx (context, 1, "receive_everything: strange %d", opcode); + krb5_errx(context, IPROPD_RESTART_SLOW, + "receive_everything: strange %d", opcode); - krb5_ret_int32 (sp, &vno); + krb5_ret_uint32(sp, &vno); krb5_storage_free(sp); - ret = kadm5_log_reinit (server_context); - if (ret) - krb5_err(context, 1, ret, "kadm5_log_reinit"); + reinit_log(context, server_context, vno); - ret = kadm5_log_set_version (server_context, vno - 1); + ret = mydb->hdb_close(context, mydb); if (ret) - krb5_err (context, 1, ret, "kadm5_log_set_version"); + krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->close"); - ret = kadm5_log_nop (server_context); + ret = mydb->hdb_rename(context, mydb, server_context->db->hdb_name); if (ret) - krb5_err (context, 1, ret, "kadm5_log_nop"); + krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->rename"); - ret = mydb->hdb_rename (context, mydb, server_context->db->hdb_name); - if (ret) - krb5_err (context, 1, ret, "db->rename"); + + return 0; cleanup: - krb5_data_free (&data); + krb5_data_free(&data); - ret = mydb->hdb_close (context, mydb); if (ret) - krb5_err (context, 1, ret, "db->close"); + krb5_err(context, IPROPD_RESTART_SLOW, ret, "db->close"); - ret = mydb->hdb_destroy (context, mydb); + ret = mydb->hdb_destroy(context, mydb); if (ret) - krb5_err (context, 1, ret, "db->destroy"); + krb5_err(context, IPROPD_RESTART, ret, "db->destroy"); krb5_warnx(context, "receive complete database, version %ld", (long)vno); return ret; } +static void +slave_status(krb5_context context, + const char *file, + const char *status, ...) + __attribute__ ((__format__ (__printf__, 3, 4))); + + +static void +slave_status(krb5_context context, + const char *file, + const char *fmt, ...) +{ + char *status; + char *fmt2; + va_list args; + int len; + + if (asprintf(&fmt2, "%s\n", fmt) == -1 || fmt2 == NULL) { + (void) unlink(file); + return; + } + va_start(args, fmt); + len = vasprintf(&status, fmt2, args); + free(fmt2); + va_end(args); + if (len < 0 || status == NULL) { + (void) unlink(file); + return; + } + krb5_warnx(context, "slave status change: %s", status); + + rk_dumpdata(file, status, len); + free(status); +} + +static void +is_up_to_date(krb5_context context, const char *file, + kadm5_server_context *server_context) +{ + krb5_error_code ret; + char buf[80]; + ret = krb5_format_time(context, time(NULL), buf, sizeof(buf), 1); + if (ret) { + unlink(file); + return; + } + slave_status(context, file, "up-to-date with version: %lu at %s", + (unsigned long)server_context->log_context.version, buf); +} + +static char *status_file; static char *config_file; static char *realm; static int version_flag; static int help_flag; static char *keytab_str; static char *port_str; -#ifdef SUPPORT_DETACH -static int detach_from_console = 0; -#endif +static int detach_from_console; +static int daemon_child = -1; static struct getargs args[] = { { "config-file", 'c', arg_string, &config_file, NULL, NULL }, @@ -467,14 +655,17 @@ static struct getargs args[] = { "keytab to get authentication from", "kspec" }, { "time-lost", 0, arg_string, &server_time_lost, "time before server is considered lost", "time" }, + { "status-file", 0, arg_string, &status_file, + "file to write out status into", "file" }, { "port", 0, arg_string, &port_str, "port ipropd-slave will connect to", "port"}, -#ifdef SUPPORT_DETACH { "detach", 0, arg_flag, &detach_from_console, "detach from console", NULL }, -#endif + { "daemon-child", 0 , arg_integer, &daemon_child, + "private argument, do not use", NULL }, { "hostname", 0, arg_string, rk_UNCONST(&slave_str), "hostname of slave (if not same as hostname)", "hostname" }, + { "verbose", 0, arg_flag, &verbose, NULL, NULL }, { "version", 0, arg_flag, &version_flag, NULL, NULL }, { "help", 0, arg_flag, &help_flag, NULL, NULL } }; @@ -491,7 +682,7 @@ usage(int status) int main(int argc, char **argv) { - krb5_error_code ret; + krb5_error_code ret, ret2; krb5_context context; krb5_auth_context auth_context; void *kadm_handle; @@ -507,21 +698,27 @@ main(int argc, char **argv) time_t reconnect_max; time_t reconnect; time_t before = 0; + int restarter_fd = -1; const char *master; setprogname(argv[0]); - if(getarg(args, num_args, argc, argv, &optidx)) + if (getarg(args, num_args, argc, argv, &optidx)) usage(1); - if(help_flag) + if (help_flag) usage(0); - if(version_flag) { + + if (version_flag) { print_version(NULL); exit(0); } + if (detach_from_console && daemon_child == -1) + roken_detach_prep(argc, argv, "--daemon-child"); + rk_pidfile(NULL); + ret = krb5_init_context(&context); if (ret) errx (1, "krb5_init_context failed: %d", ret); @@ -551,15 +748,17 @@ main(int argc, char **argv) master = argv[0]; -#ifdef SUPPORT_DETACH - if (detach_from_console) - daemon(0, 0); -#endif - pidfile (NULL); - krb5_openlog (context, "ipropd-slave", &log_facility); + if (status_file == NULL) { + if (asprintf(&status_file, "%s/ipropd-slave-status", hdb_db_dir(context)) < 0 || status_file == NULL) + krb5_errx(context, 1, "can't allocate status file buffer"); + } + + krb5_openlog(context, "ipropd-slave", &log_facility); krb5_set_warn_dest(context, log_facility); - ret = krb5_kt_register(context, &hdb_kt_ops); + slave_status(context, status_file, "bootstrapping"); + + ret = krb5_kt_register(context, &hdb_get_kt_ops); if(ret) krb5_err(context, 1, ret, "krb5_kt_register"); @@ -567,6 +766,8 @@ main(int argc, char **argv) if (time_before_lost < 0) krb5_errx (context, 1, "couldn't parse time: %s", server_time_lost); + slave_status(context, status_file, "getting credentials from keytab/database"); + memset(&conf, 0, sizeof(conf)); if(realm) { conf.mask |= KADM5_CONFIG_REALM; @@ -583,6 +784,8 @@ main(int argc, char **argv) server_context = (kadm5_server_context *)kadm_handle; + slave_status(context, status_file, "creating log file"); + ret = kadm5_log_init (server_context); if (ret) krb5_err (context, 1, ret, "kadm5_log_init"); @@ -605,10 +808,26 @@ main(int argc, char **argv) 10, &backoff); reconnect = reconnect_min; + slave_status(context, status_file, "ipropd-slave started"); + + roken_detach_finish(NULL, daemon_child); + restarter_fd = restarter(context, NULL); + while (!exit_flag) { + struct timeval to; time_t now, elapsed; + fd_set readset; int connected = FALSE; +#ifndef NO_LIMIT_FD_SETSIZE + if (restarter_fd >= FD_SETSIZE) + krb5_errx(context, IPROPD_RESTART, "fd too large"); +#endif + + FD_ZERO(&readset); + if (restarter_fd > -1) + FD_SET(restarter_fd, &readset); + now = time(NULL); elapsed = now - before; @@ -616,10 +835,17 @@ main(int argc, char **argv) time_t left = reconnect - elapsed; krb5_warnx(context, "sleeping %d seconds before " "retrying to connect", (int)left); - sleep(left); + to.tv_sec = left; + to.tv_usec = 0; + if (select(restarter_fd + 1, &readset, NULL, NULL, &to) == 1) { + exit_flag = SIGTERM; + continue; + } } before = now; + slave_status(context, status_file, "connecting to master: %s\n", master); + master_fd = connect_to_master (context, master, port_str); if (master_fd < 0) goto retry; @@ -632,6 +858,8 @@ main(int argc, char **argv) krb5_cc_destroy(context, ccache); get_creds(context, keytab_str, &ccache, master); } + if (verbose) + krb5_warnx(context, "authenticating to master"); ret = krb5_sendauth (context, &auth_context, &master_fd, IPROP_VERSION, NULL, server, AP_OPTS_MUTUAL_REQUIRED, NULL, NULL, @@ -644,32 +872,41 @@ main(int argc, char **argv) krb5_warnx(context, "ipropd-slave started at version: %ld", (long)server_context->log_context.version); - ret = ihave (context, auth_context, master_fd, - server_context->log_context.version); + ret = ihave(context, auth_context, master_fd, + server_context->log_context.version); if (ret) goto retry; connected = TRUE; + if (verbose) + krb5_warnx(context, "connected to master"); + + slave_status(context, status_file, "connected to master, waiting instructions"); + while (connected && !exit_flag) { krb5_data out; krb5_storage *sp; - int32_t tmp; - fd_set readset; - struct timeval to; + uint32_t tmp; + int max_fd; #ifndef NO_LIMIT_FD_SETSIZE if (master_fd >= FD_SETSIZE) - krb5_errx (context, 1, "fd too large"); + krb5_errx(context, IPROPD_RESTART, "fd too large"); + if (restarter_fd >= FD_SETSIZE) + krb5_errx(context, IPROPD_RESTART, "fd too large"); + max_fd = max(restarter_fd, master_fd); #endif FD_ZERO(&readset); FD_SET(master_fd, &readset); + if (restarter_fd != -1) + FD_SET(restarter_fd, &readset); to.tv_sec = time_before_lost; to.tv_usec = 0; - ret = select (master_fd + 1, + ret = select (max_fd + 1, &readset, NULL, NULL, &to); if (ret < 0) { if (errno == EINTR) @@ -677,35 +914,101 @@ main(int argc, char **argv) else krb5_err (context, 1, errno, "select"); } - if (ret == 0) - krb5_errx (context, 1, "server didn't send a message " - "in %d seconds", time_before_lost); + if (ret == 0) { + krb5_warnx(context, "server didn't send a message " + "in %d seconds", time_before_lost); + connected = FALSE; + continue; + } + + if (restarter_fd > -1 && FD_ISSET(restarter_fd, &readset)) { + if (verbose) + krb5_warnx(context, "slave restarter exited"); + exit_flag = SIGTERM; + } + + if (!FD_ISSET(master_fd, &readset)) + continue; + + if (verbose) + krb5_warnx(context, "message from master"); ret = krb5_read_priv_message(context, auth_context, &master_fd, &out); if (ret) { - krb5_warn (context, ret, "krb5_read_priv_message"); + krb5_warn(context, ret, "krb5_read_priv_message"); connected = FALSE; continue; } sp = krb5_storage_from_mem (out.data, out.length); - krb5_ret_int32 (sp, &tmp); + if (sp == NULL) + krb5_err(context, IPROPD_RESTART, errno, "krb5_storage_from_mem"); + ret = krb5_ret_uint32(sp, &tmp); + if (ret == HEIM_ERR_EOF) { + krb5_warn(context, ret, "master sent zero-length message"); + connected = FALSE; + continue; + } + if (ret != 0) { + krb5_warn(context, ret, "couldn't read master's message"); + connected = FALSE; + continue; + } + + ret = kadm5_log_init(server_context); + if (ret) { + krb5_err(context, IPROPD_RESTART, ret, "kadm5_log_init while " + "handling a message from the master"); + } switch (tmp) { case FOR_YOU : - receive (context, sp, server_context); - ret = ihave (context, auth_context, master_fd, - server_context->log_context.version); - if (ret) + if (verbose) + krb5_warnx(context, "master sent us diffs"); + ret2 = receive(context, sp, server_context); + if (ret2) + krb5_warn(context, ret2, + "receive from ipropd-master had errors"); + ret = ihave(context, auth_context, master_fd, + server_context->log_context.version); + if (ret || ret2) connected = FALSE; + + /* + * If it returns an error, receive() may nonetheless + * have committed some entries successfully, so we must + * update the slave_status even if there were errors. + */ + is_up_to_date(context, status_file, server_context); break; case TELL_YOU_EVERYTHING : - ret = receive_everything (context, master_fd, server_context, - auth_context); - if (ret) + if (verbose) + krb5_warnx(context, "master sent us a full dump"); + ret = receive_everything(context, master_fd, server_context, + auth_context); + if (ret == 0) { + ret = ihave(context, auth_context, master_fd, + server_context->log_context.version); + } + if (ret) connected = FALSE; + else + is_up_to_date(context, status_file, server_context); break; case ARE_YOU_THERE : - send_im_here (context, master_fd, auth_context); + if (verbose) + krb5_warnx(context, "master sent us a ping"); + is_up_to_date(context, status_file, server_context); + ret = ihave(context, auth_context, master_fd, + server_context->log_context.version); + if (ret) + connected = FALSE; + + send_im_here(context, master_fd, auth_context); + break; + case YOU_HAVE_LAST_VERSION: + if (verbose) + krb5_warnx(context, "master tells us we are up to date"); + is_up_to_date(context, status_file, server_context); break; case NOW_YOU_HAVE : case I_HAVE : @@ -719,9 +1022,12 @@ main(int argc, char **argv) krb5_data_free (&out); } + + slave_status(context, status_file, "disconnected from master"); retry: if (connected == FALSE) krb5_warnx (context, "disconnected for server"); + if (exit_flag) krb5_warnx (context, "got an exit signal"); @@ -729,8 +1035,15 @@ main(int argc, char **argv) close(master_fd); reconnect += backoff; - if (reconnect > reconnect_max) + if (reconnect > reconnect_max) { + slave_status(context, status_file, "disconnected from master for a long time"); reconnect = reconnect_max; + } + } + + if (status_file) { + /* XXX It'd be better to leave it saying we're not here */ + unlink(status_file); } if (0); |