/*****************************************************************************\
* spank.h - Stackable Plug-in Architecture for Node job Kontrol
*****************************************************************************
* Copyright (C) 2002-2007 The Regents of the University of California.
* Copyright (C) 2008-2010 Lawrence Livermore National Security.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* CODE-OCEC-09-009. All rights reserved.
*
* This file is part of Slurm, a resource management program.
* For details, see .
* Please also read the included file: DISCLAIMER.
*
* Slurm is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* In addition, as a special exception, the copyright holders give permission
* to link the code of portions of this program with the OpenSSL library under
* certain conditions as described in each individual source file, and
* distribute linked combinations including the two. You must obey the GNU
* General Public License in all respects for all of the code used other than
* OpenSSL. If you modify file(s) with this exception, you may extend this
* exception to your version of the file(s), but you are not obligated to do
* so. If you do not wish to do so, delete this exception statement from your
* version. If you delete this exception statement from all source files in
* the program, then also delete it here.
*
* Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with Slurm; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
\*****************************************************************************/
#ifndef SPANK_H
#define SPANK_H
#include
#include
/* SPANK handle. Plug-in's context for running Slurm job
*/
typedef struct spank_handle * spank_t;
/* Prototype for all spank plugin operations
*/
typedef int (spank_f) (spank_t spank, int ac, char *argv[]);
/* SPANK plugin operations. SPANK plugin should have at least one of
* these functions defined non-NULL.
*
* Plug-in callbacks are completed at the following points in slurmd:
*
* slurmd
* `-> init()
* |
* `-> job_prolog()
* |
* | `-> slurmstepd
* | `-> init ()
* | -> process spank options
* | -> init_post_opt ()
* | + drop privileges (initgroups(), seteuid(), chdir())
* | `-> user_init ()
* | + for each task
* | | + fork ()
* | | |
* | | + reclaim privileges
* | | `-> task_init_privileged ()
* | | |
* | | + become_user ()
* | | `-> task_init ()
* | | |
* | | + execve ()
* | |
* | + reclaim privileges
* | + for each task
* | | `-> task_post_fork ()
* | |
* | + for each task
* | | + wait ()
* | | `-> task_exit ()
* | `-> exit ()
* |
* `---> job_epilog()
* |
* `-> slurmd_exit()
*
* In srun only the init(), init_post_opt() and local_user_init(), and exit()
* callbacks are used.
*
* In sbatch/salloc only the init(), init_post_opt(), and exit() callbacks
* are used.
*
* In slurmd proper, only the init(), slurmd_exit(), and
* job_prolog/epilog callbacks are used.
*
*/
extern spank_f slurm_spank_init;
extern spank_f slurm_spank_job_prolog;
extern spank_f slurm_spank_init_post_opt;
extern spank_f slurm_spank_local_user_init;
extern spank_f slurm_spank_user_init;
extern spank_f slurm_spank_task_init_privileged;
extern spank_f slurm_spank_task_init;
extern spank_f slurm_spank_task_post_fork;
extern spank_f slurm_spank_task_exit;
extern spank_f slurm_spank_job_epilog;
extern spank_f slurm_spank_slurmd_exit;
extern spank_f slurm_spank_exit;
/* Items which may be obtained from the spank handle using the
* spank_get_item () call. The expected list of variable arguments may
* be found in the comments below.
*
* For example, S_JOB_NCPUS takes (uint16_t *), a pointer to uint16_t, so
* the get item call would look like:
*
* uint16_t ncpus;
* spank_err_t rc = spank_get_item (spank, S_JOB_NCPUS, &ncpus);
*
* while S_JOB_PID_TO_GLOBAL_ID takes (pid_t, uint32_t *), so it would
* be called as:
*
* uint32_t global_id;
* spank_err_t rc;
* rc = spank_get_item (spank, S_JOB_PID_TO_GLOBAL_ID, pid, &global_id);
*/
enum spank_item {
S_JOB_UID, /* User id (uid_t *) */
S_JOB_GID, /* Primary group id (gid_t *) */
S_JOB_ID, /* Slurm job id (uint32_t *) */
S_JOB_STEPID, /* Slurm job step id (uint32_t *) */
S_JOB_NNODES, /* Total number of nodes in job (uint32_t *) */
S_JOB_NODEID, /* Relative id of this node (uint32_t *) */
S_JOB_LOCAL_TASK_COUNT, /* Number of local tasks (uint32_t *) */
S_JOB_TOTAL_TASK_COUNT, /* Total number of tasks in job (uint32_t *) */
S_JOB_NCPUS, /* Number of CPUs used by this job (uint16_t *) */
S_JOB_ARGV, /* Command args (int *, char ***) */
S_JOB_ENV, /* Job env array (char ***) */
S_TASK_ID, /* Local task id (int *) */
S_TASK_GLOBAL_ID, /* Global task id (uint32_t *) */
S_TASK_EXIT_STATUS, /* Exit status of task if exited (int *) */
S_TASK_PID, /* Task pid (pid_t *) */
S_JOB_PID_TO_GLOBAL_ID, /* global task id from pid (pid_t, uint32_t *) */
S_JOB_PID_TO_LOCAL_ID, /* local task id from pid (pid_t, uint32_t *) */
S_JOB_LOCAL_TO_GLOBAL_ID,/* local id to global id (uint32_t, uint32_t *) */
S_JOB_GLOBAL_TO_LOCAL_ID,/* global id to local id (uint32_t, uint32_t *) */
S_JOB_SUPPLEMENTARY_GIDS,/* Array of suppl. gids (gid_t **, int *) */
S_SLURM_VERSION, /* Current Slurm version (char **) */
S_SLURM_VERSION_MAJOR, /* Slurm version major release (char **) */
S_SLURM_VERSION_MINOR, /* Slurm version minor release (char **) */
S_SLURM_VERSION_MICRO, /* Slurm version micro release (char **) */
S_STEP_CPUS_PER_TASK, /* CPUs allocated per task (=1 if --overcommit
* option is used, uint32_t *) */
S_JOB_ALLOC_CORES, /* Job allocated cores in list format (char **) */
S_JOB_ALLOC_MEM, /* Job allocated memory in MB (uint64_t *) */
S_STEP_ALLOC_CORES, /* Step alloc'd cores in list format (char **) */
S_STEP_ALLOC_MEM, /* Step alloc'd memory in MB (uint64_t *) */
S_SLURM_RESTART_COUNT, /* Job restart count (uint32_t *) */
S_JOB_ARRAY_ID, /* Slurm job array id (uint32_t *) or 0 */
S_JOB_ARRAY_TASK_ID, /* Slurm job array task id (uint32_t *) */
};
typedef enum spank_item spank_item_t;
/*
* SPANK error codes match the Slurm internal error codes and the inherited
* POSIX error codes.
*/
typedef slurm_err_t spank_err_t;
/*
* SPANK plugin context
*/
enum spank_context {
S_CTX_ERROR, /* Error obtaining current context */
S_CTX_LOCAL, /* Local context (srun) */
S_CTX_REMOTE, /* Remote context (slurmstepd) */
S_CTX_ALLOCATOR, /* Allocator context (sbatch/salloc) */
S_CTX_SLURMD, /* slurmd context */
S_CTX_JOB_SCRIPT /* prolog/epilog context */
};
#define HAVE_S_CTX_SLURMD 1 /* slurmd context supported */
#define HAVE_S_CTX_JOB_SCRIPT 1 /* job script (prolog/epilog) supported */
typedef enum spank_context spank_context_t;
/*
* SPANK plugin options
*/
/*
* SPANK option callback. `val' is an integer value provided by
* the plugin to distinguish between plugin-local options, `optarg'
* is an argument passed by the user (if applicable), and `remote'
* specifies whether this call is being made locally (e.g. in srun)
* or remotely (e.g. in slurmstepd/slurmd).
*/
typedef int (*spank_opt_cb_f) (int val, const char *optarg, int remote);
struct spank_option {
char * name; /* long option provided by plugin */
char * arginfo; /* one word description of argument if required */
char * usage; /* Usage text */
int has_arg; /* Does option require argument? */
int val; /* value to return using callback */
spank_opt_cb_f cb; /* Callback function to check option value */
};
/*
* Plugins may export a spank_options option table as symbol "spank_options".
* This method only works in "local" and "remote" mode. To register options
* in "allocator" mode (sbatch/salloc), use the preferred
* spank_option_register function described below.
*/
extern struct spank_option spank_options [];
/*
* SPANK plugin option table must end with the following entry:
*/
#define SPANK_OPTIONS_TABLE_END { NULL, NULL, NULL, 0, 0, NULL }
/*
* Maximum allowed length of SPANK option name:
*/
#define SPANK_OPTION_MAXLEN 75
/* SPANK interface prototypes
*/
#ifdef __cplusplus
extern "C" {
#endif
/*
* Return the string representation of a spank_err_t error code.
*/
const char *spank_strerror (spank_err_t err);
/*
* Determine whether a given spank plugin symbol is supported
* in this version of SPANK interface.
*
* Returns:
* = 1 The symbol is supported
* = 0 The symbol is not supported
* = -1 Invalid argument
*/
int spank_symbol_supported (const char *symbol);
/*
* Determine whether plugin is loaded in "remote" context
*
* Returns:
* = 1 remote context, i.e. plugin is loaded in /slurmstepd.
* = 0 not remote context
* < 0 spank handle was not valid.
*/
int spank_remote (spank_t spank);
/*
* Return the context in which the calling plugin is loaded.
*
* Returns the spank_context for the calling plugin, or SPANK_CTX_ERROR
* if the current context cannot be determined.
*/
spank_context_t spank_context (void);
/*
* Register a plugin-provided option dynamically. This function
* is only valid when called from slurm_spank_init(), and must
* be guaranteed to be called in all contexts in which it is
* used (local, remote, allocator).
*
* This function is the only method to register options in
* allocator context.
*
* May be called multiple times to register many options.
*
* Returns ESPANK_SUCCESS on successful registration of the option
* or ESPANK_BAD_ARG if not called from slurm_spank_init().
*/
spank_err_t spank_option_register (spank_t spank, struct spank_option *opt);
/*
* Check whether spank plugin option [opt] has been activated.
* If the option takes an argument, then the option argument
* (if found) will be returned in *optarg.
* This function can be invoked from the following functions:
* slurm_spank_job_prolog, slurm_spank_local_user_init, slurm_spank_user_init,
* slurm_spank_task_init_privileged, slurm_spank_task_init,
* slurm_spank_task_exit, and slurm_spank_job_epilog.
*
* Returns
* ESPANK_SUCCESS if the option was used by user. In this case
* *optarg will contain the option argument if opt->has_arg != 0.
* ESPANK_ERROR if the option wasn't used.
* ESPANK_BAD_ARG if an invalid argument was passed to the function,
* such as NULL opt, NULL opt->name, or NULL optarg when opt->has_arg != 0.
* ESPANK_NOT_AVAIL if called from improper context.
*/
spank_err_t spank_option_getopt (spank_t spank, struct spank_option *opt,
char **optarg);
/* Get the value for the current job or task item specified,
* storing the result in the subsequent pointer argument(s).
* Refer to the spank_item_t comments for argument types.
* For S_JOB_ARGV, S_JOB_ENV, and S_SLURM_VERSION* items
* the result returned to the caller should not be freed or
* modified.
*
* Returns ESPANK_SUCCESS on success, ESPANK_NOTASK if an S_TASK*
* item is requested from outside a task context, ESPANK_BAD_ARG
* if invalid args are passed to spank_get_item or spank_get_item
* is called from an invalid context, and ESPANK_NOT_REMOTE
* if not called from slurmstepd context or spank_local_user_init.
*/
spank_err_t spank_get_item (spank_t spank, spank_item_t item, ...);
/* Place a copy of environment variable "var" from the job's environment
* into buffer "buf" of size "len."
*
* Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
* ESPANK_BAD_ARG = spank handle invalid or len < 0.
* ESPANK_ENV_NOEXIST = environment variable doesn't exist in job's env.
* ESPANK_NOSPACE = buffer too small, truncation occurred.
* ESPANK_NOT_REMOTE = not called in remote context (i.e. from slurmd).
*/
spank_err_t spank_getenv (spank_t spank, const char *var, char *buf, int len);
/*
* Set the environment variable "var" to "val" in the environment of
* the current job or task in the spank handle. If overwrite != 0 an
* existing value for var will be overwritten.
*
* Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
* ESPANK_ENV_EXISTS = var exists in job env and overwrite == 0.
* ESPANK_BAD_ARG = spank handle invalid or var/val are NULL.
* ESPANK_NOT_REMOTE = not called from slurmstepd.
*/
spank_err_t spank_setenv (spank_t spank, const char *var, const char *val,
int overwrite);
/*
* Unset environment variable "var" in the environment of current job or
* task in the spank handle.
*
* Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
* ESPANK_BAD_ARG = spank handle invalid or var is NULL.
* ESPANK_NOT_REMOTE = not called from slurmstepd.
*/
spank_err_t spank_unsetenv (spank_t spank, const char *var);
/*
* Set an environment variable "name" to "value" in the "job control"
* environment, which is an extra set of environment variables
* included in the environment of the Slurm prolog and epilog
* programs. Environment variables set via this function will
* be prepended with SPANK_ to differentiate them from other env
* vars, and to avoid security issues.
*
* Returns ESPANK_SUCCESS on success, o/w/ spank_err_t on failure:
* ESPANK_ENV_EXISTS = var exists in control env and overwrite == 0.
* ESPANK_NOT_LOCAL = not called in local context
*/
spank_err_t spank_job_control_setenv (spank_t sp, const char *name,
const char *value, int overwrite);
/*
* Place a copy of environment variable "name" from the job control
* environment into a buffer buf of size len.
*
* Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
* ESPANK_BAD_ARG = invalid spank handle or len <= 0
* ESPANK_ENV_NOEXIST = environment var does not exist in control env
* ESPANK_NOSPACE = buffer too small, truncation occurred.
* ESPANK_NOT_LOCAL = not called in local context
*/
spank_err_t spank_job_control_getenv (spank_t sp, const char *name,
char *buf, int len);
/*
* Unset environment variable "name" in the job control environment.
*
* Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
* ESPANK_BAD_ARG = invalid spank handle or var is NULL
* ESPANK_NOT_LOCAL = not called in local context
*/
spank_err_t spank_job_control_unsetenv (spank_t sp, const char *name);
/*
* Slurm logging functions which are exported to plugins.
*/
extern void slurm_info (const char *format, ...)
__attribute__ ((format (printf, 1, 2)));
extern void slurm_error (const char *format, ...)
__attribute__ ((format (printf, 1, 2)));
extern void slurm_verbose (const char *format, ...)
__attribute__ ((format (printf, 1, 2)));
extern void slurm_debug (const char *format, ...)
__attribute__ ((format (printf, 1, 2)));
extern void slurm_debug2 (const char *format, ...)
__attribute__ ((format (printf, 1, 2)));
extern void slurm_debug3 (const char *format, ...)
__attribute__ ((format (printf, 1, 2)));
/*
* Print at the same log level as error(), but without prefixing the message
* with "error: ". Useful to report back to srun commands from SPANK plugins,
* as info() will only go to the logs.
*/
extern void slurm_spank_log(const char *, ...)
__attribute__ ((format (printf, 1, 2)));
#ifdef __cplusplus
}
#endif
/*
* All spank plugins must issue the following for the Slurm plugin
* loader.
*/
#define SPANK_PLUGIN(__name, __ver) \
const char plugin_name [] = #__name; \
const char plugin_type [] = "spank"; \
const unsigned int plugin_version = SLURM_VERSION_NUMBER; \
const unsigned int spank_plugin_version = __ver;
#endif /* !SPANK_H */