diff options
author | Don Brady <don.brady@delphix.com> | 2019-06-22 23:51:46 +0000 |
---|---|---|
committer | Brian Behlendorf <behlendorf1@llnl.gov> | 2019-06-22 23:51:46 +0000 |
commit | 186898bbb580a830c02d994e961d717f7cf5dcca (patch) | |
tree | 3af5af5af4d7bed1bafb671c86f3876f01e0dc57 /module/zfs/zcp.c | |
parent | cb9e5b7e84654a8c7dba0f9a0d1227f3c8fa1012 (diff) | |
download | src-186898bbb580a830c02d994e961d717f7cf5dcca.tar.gz src-186898bbb580a830c02d994e961d717f7cf5dcca.zip |
OpenZFS 9425 - channel programs can be interrupted
Problem Statement
=================
ZFS Channel program scripts currently require a timeout, so that hung or
long-running scripts return a timeout error instead of causing ZFS to get
wedged. This limit can currently be set up to 100 million Lua instructions.
Even with a limit in place, it would be desirable to have a sys admin
(support engineer) be able to cancel a script that is taking a long time.
Proposed Solution
=================
Make it possible to abort a channel program by sending an interrupt signal.In
the underlying txg_wait_sync function, switch the cv_wait to a cv_wait_sig to
catch the signal. Once a signal is encountered, the dsl_sync_task function can
install a Lua hook that will get called before the Lua interpreter executes a
new line of code. The dsl_sync_task can resume with a standard txg_wait_sync
call and wait for the txg to complete. Meanwhile, the hook will abort the
script and indicate that the channel program was canceled. The kernel returns
a EINTR to indicate that the channel program run was canceled.
Porting notes: Added missing return value from cv_wait_sig()
Authored by: Don Brady <don.brady@delphix.com>
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com>
Reviewed by: Matt Ahrens <matt@delphix.com>
Reviewed by: Sara Hartse <sara.hartse@delphix.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Robert Mustacchi <rm@joyent.com>
Ported-by: Don Brady <don.brady@delphix.com>
Signed-off-by: Don Brady <don.brady@delphix.com>
OpenZFS-issue: https://www.illumos.org/issues/9425
OpenZFS-commit: https://github.com/illumos/illumos-gate/commit/d0cb1fb926
Closes #8904
Diffstat (limited to 'module/zfs/zcp.c')
-rw-r--r-- | module/zfs/zcp.c | 185 |
1 files changed, 99 insertions, 86 deletions
diff --git a/module/zfs/zcp.c b/module/zfs/zcp.c index 4894df11d5fb..1aeea131449e 100644 --- a/module/zfs/zcp.c +++ b/module/zfs/zcp.c @@ -118,21 +118,6 @@ static int zcp_nvpair_value_to_lua(lua_State *, nvpair_t *, char *, int); static int zcp_lua_to_nvlist_impl(lua_State *, int, nvlist_t *, const char *, int); -typedef struct zcp_alloc_arg { - boolean_t aa_must_succeed; - int64_t aa_alloc_remaining; - int64_t aa_alloc_limit; -} zcp_alloc_arg_t; - -typedef struct zcp_eval_arg { - lua_State *ea_state; - zcp_alloc_arg_t *ea_allocargs; - cred_t *ea_cred; - nvlist_t *ea_outnvl; - int ea_result; - uint64_t ea_instrlimit; -} zcp_eval_arg_t; - /* * The outer-most error callback handler for use with lua_pcall(). On * error Lua will call this callback with a single argument that @@ -452,7 +437,7 @@ zcp_lua_to_nvlist_helper(lua_State *state) static void zcp_convert_return_values(lua_State *state, nvlist_t *nvl, - const char *key, zcp_eval_arg_t *evalargs) + const char *key, int *result) { int err; VERIFY3U(1, ==, lua_gettop(state)); @@ -464,7 +449,7 @@ zcp_convert_return_values(lua_State *state, nvlist_t *nvl, err = lua_pcall(state, 3, 0, 0); /* zcp_lua_to_nvlist_helper */ if (err != 0) { zcp_lua_to_nvlist(state, 1, nvl, ZCP_RET_ERROR); - evalargs->ea_result = SET_ERROR(ECHRNG); + *result = SET_ERROR(ECHRNG); } } @@ -791,19 +776,32 @@ zcp_lua_alloc(void *ud, void *ptr, size_t osize, size_t nsize) static void zcp_lua_counthook(lua_State *state, lua_Debug *ar) { - /* - * If we're called, check how many instructions the channel program has - * executed so far, and compare against the limit. - */ lua_getfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); zcp_run_info_t *ri = lua_touserdata(state, -1); + /* + * Check if we were canceled while waiting for the + * txg to sync or from our open context thread + */ + if (ri->zri_canceled || + (!ri->zri_sync && issig(JUSTLOOKING) && issig(FORREAL))) { + ri->zri_canceled = B_TRUE; + (void) lua_pushstring(state, "Channel program was canceled."); + (void) lua_error(state); + /* Unreachable */ + } + + /* + * Check how many instructions the channel program has + * executed so far, and compare against the limit. + */ ri->zri_curinstrs += zfs_lua_check_instrlimit_interval; if (ri->zri_maxinstrs != 0 && ri->zri_curinstrs > ri->zri_maxinstrs) { ri->zri_timed_out = B_TRUE; (void) lua_pushstring(state, "Channel program timed out."); (void) lua_error(state); + /* Unreachable */ } } @@ -816,31 +814,25 @@ zcp_panic_cb(lua_State *state) } static void -zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) +zcp_eval_impl(dmu_tx_t *tx, zcp_run_info_t *ri) { int err; - zcp_run_info_t ri; - lua_State *state = evalargs->ea_state; + lua_State *state = ri->zri_state; VERIFY3U(3, ==, lua_gettop(state)); + /* finish initializing our runtime state */ + ri->zri_pool = dmu_tx_pool(tx); + ri->zri_tx = tx; + list_create(&ri->zri_cleanup_handlers, sizeof (zcp_cleanup_handler_t), + offsetof(zcp_cleanup_handler_t, zch_node)); + /* * Store the zcp_run_info_t struct for this run in the Lua registry. * Registry entries are not directly accessible by the Lua scripts but * can be accessed by our callbacks. */ - ri.zri_space_used = 0; - ri.zri_pool = dmu_tx_pool(tx); - ri.zri_cred = evalargs->ea_cred; - ri.zri_tx = tx; - ri.zri_timed_out = B_FALSE; - ri.zri_sync = sync; - list_create(&ri.zri_cleanup_handlers, sizeof (zcp_cleanup_handler_t), - offsetof(zcp_cleanup_handler_t, zch_node)); - ri.zri_curinstrs = 0; - ri.zri_maxinstrs = evalargs->ea_instrlimit; - - lua_pushlightuserdata(state, &ri); + lua_pushlightuserdata(state, ri); lua_setfield(state, LUA_REGISTRYINDEX, ZCP_RUN_INFO_KEY); VERIFY3U(3, ==, lua_gettop(state)); @@ -857,7 +849,7 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * off control to the channel program. Channel programs that use too * much memory should die with ENOSPC. */ - evalargs->ea_allocargs->aa_must_succeed = B_FALSE; + ri->zri_allocargs->aa_must_succeed = B_FALSE; /* * Call the Lua function that open-context passed us. This pops the @@ -869,14 +861,14 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) /* * Let Lua use KM_SLEEP while we interpret the return values. */ - evalargs->ea_allocargs->aa_must_succeed = B_TRUE; + ri->zri_allocargs->aa_must_succeed = B_TRUE; /* * Remove the error handler callback from the stack. At this point, * there shouldn't be any cleanup handler registered in the handler * list (zri_cleanup_handlers), regardless of whether it ran or not. */ - list_destroy(&ri.zri_cleanup_handlers); + list_destroy(&ri->zri_cleanup_handlers); lua_remove(state, 1); switch (err) { @@ -896,16 +888,16 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) int return_count = lua_gettop(state); if (return_count == 1) { - evalargs->ea_result = 0; - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_RETURN, evalargs); + ri->zri_result = 0; + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_RETURN, &ri->zri_result); } else if (return_count > 1) { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); lua_settop(state, 0); (void) lua_pushfstring(state, "Multiple return " "values not supported"); - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); } break; } @@ -919,19 +911,20 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * stack. */ VERIFY3U(1, ==, lua_gettop(state)); - if (ri.zri_timed_out) { - evalargs->ea_result = SET_ERROR(ETIME); + if (ri->zri_timed_out) { + ri->zri_result = SET_ERROR(ETIME); + } else if (ri->zri_canceled) { + ri->zri_result = SET_ERROR(EINTR); } else { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); } - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); - if (evalargs->ea_result == ETIME && - evalargs->ea_outnvl != NULL) { - (void) nvlist_add_uint64(evalargs->ea_outnvl, - ZCP_ARG_INSTRLIMIT, ri.zri_curinstrs); + if (ri->zri_result == ETIME && ri->zri_outnvl != NULL) { + (void) nvlist_add_uint64(ri->zri_outnvl, + ZCP_ARG_INSTRLIMIT, ri->zri_curinstrs); } break; } @@ -943,14 +936,16 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * return the error message. */ VERIFY3U(1, ==, lua_gettop(state)); - if (ri.zri_timed_out) { - evalargs->ea_result = SET_ERROR(ETIME); + if (ri->zri_timed_out) { + ri->zri_result = SET_ERROR(ETIME); + } else if (ri->zri_canceled) { + ri->zri_result = SET_ERROR(EINTR); } else { - evalargs->ea_result = SET_ERROR(ECHRNG); + ri->zri_result = SET_ERROR(ECHRNG); } - zcp_convert_return_values(state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); break; } case LUA_ERRMEM: @@ -958,7 +953,7 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) * Lua ran out of memory while running the channel program. * There's not much we can do. */ - evalargs->ea_result = SET_ERROR(ENOSPC); + ri->zri_result = SET_ERROR(ENOSPC); break; default: VERIFY0(err); @@ -966,21 +961,35 @@ zcp_eval_impl(dmu_tx_t *tx, boolean_t sync, zcp_eval_arg_t *evalargs) } static void -zcp_pool_error(zcp_eval_arg_t *evalargs, const char *poolname) +zcp_pool_error(zcp_run_info_t *ri, const char *poolname) { - evalargs->ea_result = SET_ERROR(ECHRNG); - lua_settop(evalargs->ea_state, 0); - (void) lua_pushfstring(evalargs->ea_state, "Could not open pool: %s", + ri->zri_result = SET_ERROR(ECHRNG); + lua_settop(ri->zri_state, 0); + (void) lua_pushfstring(ri->zri_state, "Could not open pool: %s", poolname); - zcp_convert_return_values(evalargs->ea_state, evalargs->ea_outnvl, - ZCP_RET_ERROR, evalargs); + zcp_convert_return_values(ri->zri_state, ri->zri_outnvl, + ZCP_RET_ERROR, &ri->zri_result); + +} + +/* + * This callback is called when txg_wait_synced_sig encountered a signal. + * The txg_wait_synced_sig will continue to wait for the txg to complete + * after calling this callback. + */ +/* ARGSUSED */ +static void +zcp_eval_sig(void *arg, dmu_tx_t *tx) +{ + zcp_run_info_t *ri = arg; + ri->zri_canceled = B_TRUE; } static void zcp_eval_sync(void *arg, dmu_tx_t *tx) { - zcp_eval_arg_t *evalargs = arg; + zcp_run_info_t *ri = arg; /* * Open context should have setup the stack to contain: @@ -988,15 +997,14 @@ zcp_eval_sync(void *arg, dmu_tx_t *tx) * 2: Script to run (converted to a Lua function) * 3: nvlist input to function (converted to Lua table or nil) */ - VERIFY3U(3, ==, lua_gettop(evalargs->ea_state)); + VERIFY3U(3, ==, lua_gettop(ri->zri_state)); - zcp_eval_impl(tx, B_TRUE, evalargs); + zcp_eval_impl(tx, ri); } static void -zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) +zcp_eval_open(zcp_run_info_t *ri, const char *poolname) { - int error; dsl_pool_t *dp; dmu_tx_t *tx; @@ -1004,11 +1012,11 @@ zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) /* * See comment from the same assertion in zcp_eval_sync(). */ - VERIFY3U(3, ==, lua_gettop(evalargs->ea_state)); + VERIFY3U(3, ==, lua_gettop(ri->zri_state)); error = dsl_pool_hold(poolname, FTAG, &dp); if (error != 0) { - zcp_pool_error(evalargs, poolname); + zcp_pool_error(ri, poolname); return; } @@ -1023,7 +1031,7 @@ zcp_eval_open(zcp_eval_arg_t *evalargs, const char *poolname) */ tx = dmu_tx_create_dd(dp->dp_mos_dir); - zcp_eval_impl(tx, B_FALSE, evalargs); + zcp_eval_impl(tx, ri); dmu_tx_abort(tx); @@ -1036,7 +1044,7 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, { int err; lua_State *state; - zcp_eval_arg_t evalargs; + zcp_run_info_t runinfo; if (instrlimit > zfs_lua_max_instrlimit) return (SET_ERROR(EINVAL)); @@ -1136,24 +1144,29 @@ zcp_eval(const char *poolname, const char *program, boolean_t sync, } VERIFY3U(3, ==, lua_gettop(state)); - evalargs.ea_state = state; - evalargs.ea_allocargs = &allocargs; - evalargs.ea_instrlimit = instrlimit; - evalargs.ea_cred = CRED(); - evalargs.ea_outnvl = outnvl; - evalargs.ea_result = 0; + runinfo.zri_state = state; + runinfo.zri_allocargs = &allocargs; + runinfo.zri_outnvl = outnvl; + runinfo.zri_result = 0; + runinfo.zri_cred = CRED(); + runinfo.zri_timed_out = B_FALSE; + runinfo.zri_canceled = B_FALSE; + runinfo.zri_sync = sync; + runinfo.zri_space_used = 0; + runinfo.zri_curinstrs = 0; + runinfo.zri_maxinstrs = instrlimit; if (sync) { - err = dsl_sync_task(poolname, NULL, - zcp_eval_sync, &evalargs, 0, ZFS_SPACE_CHECK_ZCP_EVAL); + err = dsl_sync_task_sig(poolname, NULL, zcp_eval_sync, + zcp_eval_sig, &runinfo, 0, ZFS_SPACE_CHECK_ZCP_EVAL); if (err != 0) - zcp_pool_error(&evalargs, poolname); + zcp_pool_error(&runinfo, poolname); } else { - zcp_eval_open(&evalargs, poolname); + zcp_eval_open(&runinfo, poolname); } lua_close(state); - return (evalargs.ea_result); + return (runinfo.zri_result); } /* |