Classes | Defines | Functions | Variables

service_template.c File Reference

#include <stdio.h>
#include <time.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <sys/param.h>
#include "utility.h"
#include "problem.h"
#include "comm_protocol.h"
#include "comm_basics.h"
#include "comm_data.h"
#include "comm_encode.h"
#include "gs_pm_model.h"
Include dependency graph for service_template.c:

Go to the source code of this file.

Classes

struct  gs_service_info_t

Defines

#define REQUEST_ID_LEN   64
#define REQUEST_ID_TEMPLATE   "gsrequest_%s_%d_XXXXXXXXXXXX"

Functions

int gs_read_server_from_file (char *, gs_server_t *)
int gs_service_read_coeff (gs_service_info_t *, gs_server_t *)
int gs_problem_service (gs_problem_t *)
int gs_service_blocking_request (gs_service_info_t *)
int gs_service_nonblocking_request (gs_service_info_t *)
int gs_service_batch_request (gs_service_info_t *)
void gs_dummy_signal_handler (int)
void gs_service_sigterm_handler (int)
double gs_read_service_et (char *)
double gs_pm_problem_service (gs_service_info_t *)
double gs_agent_get_server_score (gs_problem_t *, gs_server_t *)
int service_template (int argc, char *argv[])
void gs_batch_service_sigterm_handler (int sig)
int gs_exec_batch_service (gs_service_info_t *s)
int gs_wait_for_batch_job_completion (gs_service_info_t *s)
int gs_get_category_names (gs_pm_model_t *model, gs_problem_t *prob, char ***arr)
int gs_get_param_exprs (gs_pm_model_t *model, char *comp_model, char ***arr)
int gs_gen_expr (int i, int numrows, char **cat_names, char **param_expr, double **cat_mat, double **coef_mat, gs_pm_model_t *model, FILE *cf)
int gs_generate_pm_expr (gs_pm_model_t *model, char *comp_model, gs_problem_t *prob, FILE *cf)
int gs_update_perf_model (gs_service_info_t *s, char *model_fname, char *coef_fname, double elapsed_time)

Variables

pid_t gs_service_pid = 0

Detailed Description

This file contains a generic service template for the end service.

Definition in file service_template.c.


Define Documentation

#define REQUEST_ID_LEN   64

Definition at line 35 of file service_template.c.

#define REQUEST_ID_TEMPLATE   "gsrequest_%s_%d_XXXXXXXXXXXX"

Definition at line 36 of file service_template.c.


Function Documentation

double gs_agent_get_server_score ( gs_problem_t *  problem,
gs_server_t *  server 
)

Gets the server computation time estimate for the given problem.

Definition at line 178 of file agent_scheduler_eval.c.

{
  double score;

  score = gs_agent_perf_model_score(problem, server);

  if(score < 0)
    score = gs_agent_complexity_score(problem, server);

  return score;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void gs_batch_service_sigterm_handler ( int  sig  ) 

Signal handler for the SIGTERM delivered when the batch service is cancelled. Here we want to kill the real service process, which will depend on what kind of batch system we're using on the back-end.

Parameters:
sig -- the signal caught

Definition at line 417 of file service_template.c.

{
  if(gs_service_pid > 0) {
    if(kill(gs_service_pid, sig) < 0)
      ERRPRINTF("Failed to kill batch service process [pid = %d]\n", gs_service_pid);
  }

  return;
}

Here is the caller graph for this function:

void gs_dummy_signal_handler ( int  sig  ) 

Signal handler for the SIGCHLD delivered when the service terminates. Originally I tried just ignoring the signal, but then on certain systems the subsequent call to waitpid() failed.

Parameters:
sig -- the signal caught

Definition at line 383 of file service_template.c.

{
  return;
}

Here is the caller graph for this function:

int gs_exec_batch_service ( gs_service_info_t s  ) 

Exec a batch request.

Returns:
0 on success, -1 on failure.

Definition at line 808 of file service_template.c.

{
  int status;
  char *cmd, *orig_exe, *new_exe;

  unlink("gs_batch_id");

#ifdef __CYGWIN__
  cmd = dstring_sprintf("%s/service/%s/gs_submit %s/service/%s/%s_batch_service > gs_batch_id", 
            s->gridsolve_root, s->problem_name, s->gridsolve_root, s->problem_name, s->problem_name);
#else
  new_exe = dstring_sprintf("%s/%s/%s_batch_service", s->cwd, 
    s->request_id, s->problem_name);
  orig_exe = dstring_sprintf("%s/service/%s/%s_batch_service", 
    s->gridsolve_root, s->problem_name, s->problem_name);

  if(symlink(orig_exe, new_exe) < 0) {
    ERRPRINTF("failed to create symlink (%s -> %s)\n",
      new_exe, orig_exe);
    return -1;
  }
  cmd = dstring_sprintf("%s/service/%s/gs_submit %s > gs_batch_id", 
    s->gridsolve_root, s->problem_name, new_exe);
#endif

  if(!cmd) {
    ERRPRINTF("failed to create command string\n");
    return -1;
  }

  DBGPRINTF("cmd: %s\n", cmd);
  status = system(cmd);

  if((status < 0) || (WEXITSTATUS(status) != 0)) {
    ERRPRINTF("command failed: '%s'\n", cmd);
    return -1;
  }

  return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int gs_gen_expr ( int  i,
int  numrows,
char **  cat_names,
char **  param_expr,
double **  cat_mat,
double **  coef_mat,
gs_pm_model_t *  model,
FILE *  cf 
)

Definition at line 1122 of file service_template.c.

{
  int j;

  if(i == numrows) {
    fprintf(cf, "-1");
    return 0;
  }

  fprintf(cf, "(");
  for(j=0;j<model->nb_categories;j++) {
    fprintf(cf, "(%s == %g)", cat_names[j], cat_mat[i][j]);

    if(j<model->nb_categories-1)
      fprintf(cf, " && ");
  }
  fprintf(cf, ")");

  fprintf(cf, "?");

  fprintf(cf, "(");
  for(j=0;j<model->nb_params;j++) {
    fprintf(cf, " (%g * (%s)) ", coef_mat[i][j], param_expr[j]);

    if(j<model->nb_params-1)
      fprintf(cf, " + ");
  }
  fprintf(cf, ")");
  fprintf(cf, ":");
  fprintf(cf, "(");
  gs_gen_expr(i+1, numrows, cat_names, param_expr, cat_mat, coef_mat, model, cf);
  fprintf(cf, ")");

  return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int gs_generate_pm_expr ( gs_pm_model_t *  model,
char *  comp_model,
gs_problem_t *  prob,
FILE *  cf 
)

Definition at line 1160 of file service_template.c.

{
  char **cat_names, **param_expr;
  double **cat_mat, **coef_mat;
  int numrows;
  
  numrows = gs_pm_all_models(model, &cat_mat, &coef_mat);

  if(gs_get_category_names(model, prob, &cat_names) < 0) {
    ERRPRINTF("Error getting category names\n");
    return -1;
  }

  if(gs_get_param_exprs(model, comp_model, &param_expr) < 0) {
    ERRPRINTF("Error getting category names\n");
    if(cat_names)
      free(cat_names);
    return -1;
  }

  if(numrows > 0) {
    gs_gen_expr(0, numrows, cat_names, param_expr, cat_mat, coef_mat, model, cf);
    fprintf(cf, "\n");
  }

  free(cat_names);
  free(param_expr);

  return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int gs_get_category_names ( gs_pm_model_t *  model,
gs_problem_t *  prob,
char ***  arr 
)

Definition at line 1063 of file service_template.c.

{
  gs_argument_t *argptr;
  char **cat_names;
  int i;

  cat_names = (char **)malloc(model->nb_categories * sizeof(char *));

  if(!cat_names)
    return -1;

  i = 0;
  for(argptr=prob->arglist; argptr != NULL; argptr=argptr->next) {
    if(argptr->arg_enum) {
      cat_names[i] = argptr->name;
      i++;
    }
  }

  *arr = cat_names;

  return 0;
}

Here is the caller graph for this function:

int gs_get_param_exprs ( gs_pm_model_t *  model,
char *  comp_model,
char ***  arr 
)

Definition at line 1088 of file service_template.c.

{
  char *cm_copy, *cp, **pexp;
  int i;
  
  cm_copy = strdup(comp_model);
  pexp = (char **)malloc(model->nb_params * sizeof(char *));
        
  if(!cm_copy || !pexp) {
    if(cm_copy) free(cm_copy);
    if(pexp) free(pexp);
    return -1;
  }
        
  cp = cm_copy;
  i = 0;

  while(cp) {
    pexp[i] = cp;
    i++;
    cp = strchr(cp, ';');

    if(cp) {
      *cp = 0;
      cp++;
    }
  }

  *arr = pexp;

  return 0;
}

Here is the caller graph for this function:

double gs_pm_problem_service ( gs_service_info_t s  ) 

Runs the service in timed mode and updates the performance model if it exists.

Parameters:
s - pointer to service info struct

Definition at line 1350 of file service_template.c.

{
  double start_time, elapsed_time;

  start_time = usertime();
  gs_problem_service(s->problem);
  elapsed_time = usertime() - start_time;

#ifdef GS_PM_DISABLE
  if(strcmp(s->infodir, "-") != 0) {
    /* don't bother adding entries where the elapsed time is zero */

    if(elapsed_time > 0.0) {
      char *model_fname, *coef_fname;

      model_fname = dstring_sprintf("%s/%s.mdl", s->infodir, s->problem->name);
      if(!model_fname)
        return -1.0;

      coef_fname = dstring_sprintf("%s/%s.coe", s->infodir, s->problem->name);
      if(!coef_fname) {
        free(model_fname);
        return -1.0;
      }

      gs_update_perf_model(s, model_fname, coef_fname, elapsed_time);

      free(model_fname);
      free(coef_fname);
    }
  }
#endif

  return elapsed_time;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int gs_problem_service ( gs_problem_t *   ) 

Here is the caller graph for this function:

int gs_read_server_from_file ( char *  ,
gs_server_t *   
)
double gs_read_service_et ( char *  file  ) 

Reads the elapsed time written on the second line of the specified file. Normally this will be the "done" timestamp file.

Parameters:
file -- the filename
Returns:
the service elapsed time

Definition at line 782 of file service_template.c.

{
  double service_et;
  FILE *dfile;

  service_et = 0.0;
  dfile = fopen(file, "r");
  if(dfile) {
    char buf[128];
    fgets(buf, 128, dfile); /* skip timestamp */
    if(fgets(buf, 128, dfile))
      service_et = atof(buf);

    fclose(dfile);
  }

  return service_et;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int gs_service_batch_request ( gs_service_info_t s  ) 

Services batch requests (PBS, LSF, etc.).

Parameters:
s - pointer to service info struct
Returns:
0 on success, -1 on failure.

Definition at line 915 of file service_template.c.

{
  char *problemstr = NULL;
  double service_et;
  FILE *xmlfile;
  pid_t pid;

  gs_service_pid = 0;

  if((gs_signal(SIGCHLD, gs_dummy_signal_handler) == SIG_ERR) ||
     (gs_signal(SIGTERM, gs_batch_service_sigterm_handler) == SIG_ERR)) {
    ERRPRINTF("Error: could not ignore SIGCHLD\n");
    s->err = GS_SVC_ERR_SIGNALS;
    return -1;
  }

  /* first save the problem struct to a file */
  xmlfile = fopen(GS_BATCH_XML, "w");
  
  if(!xmlfile) {
    ERRPRINTF("Could not create xml file.\n");
    s->err = GS_SVC_ERR_CREATE_XML;
    return -1;
  }
  
  if(gs_encode_problem(&problemstr, s->problem) < 0) {
    ERRPRINTF("Could not encode problem.\n");
    s->err = GS_SVC_ERR_PROBLEM_ENC;
    return -1;
  }
   
  fprintf(xmlfile, "%s\n", problemstr);

  fclose(xmlfile);
    
  /* then save the args */

  if(gs_save_input_args_to_file("input", s->problem, s->my_dsig, GS_CALL_FROM_C, 
       s->problem->major) < 0) {
    ERRPRINTF("Error saving input args.\n");
    s->err = GS_SVC_ERR_CREAT_DATA_FILE;
    return -1;
  }

  /* fork a child process to execute the batch service */
  pid = fork();

  if(pid == -1) {
    ERRPRINTF("Failed to fork\n");
    s->err = GS_SVC_ERR_FORK;
    return -1;
  }

  if(pid == 0) {
    if(gs_exec_batch_service(s) < 0)
      _exit(s->err);

    _exit(0);
  }
  else {
    int cstat_loc, status;
    pid_t child;

    gs_service_pid = pid;

    /* this is the parent.  */

    child = waitpid(pid, &cstat_loc, 0);

    if(child < 0) {
      ERRPRINTF("Error waiting for batch service process %d.\n", (int)pid);
      s->err = GS_SVC_ERR_WAITPID;
      return -1;
    }

    if(WIFEXITED(cstat_loc) == 0) {
      ERRPRINTF("batch service process %d did not terminate.\n", (int)pid);
      s->err = GS_SVC_ERR_ABNORMAL_EXIT;
      return -1;
    }

    status = WEXITSTATUS(cstat_loc);

    if(status != 0) {
      ERRPRINTF("batch service process %d terminated abnormally (status %d).\n",
        (int)pid, (char)status);
      s->err = (char)status > 0 ? (char)status : GS_SVC_ERR_UNSPECIFIED;
      return -1;
    }

    if(gs_wait_for_batch_job_completion(s) < 0) {
      ERRPRINTF("Failed to wait for job completion.\n");
      s->err = GS_SVC_ERR_WAITPID;
      return -1;
    }

    if(s->blocking) {
      char filename[5];
      int fd;

      sprintf(filename, "data");
      if((fd = open(filename, O_RDONLY)) == -1) {
        ERRPRINTF("failed to open output data\n");
        s->err = GS_SVC_ERR_OPEN_DATA_FILE;
        return -1;
      }

      if(gs_restore_output_args_from_file(fd, s->problem, s->my_dsig) < 0) {
        ERRPRINTF("failed to restore output data from disk\n");
        close(fd);
        s->err = GS_SVC_ERR_RESTORE_ARGS;
        return -1;
      }

      close(fd);

      if(gs_send_tag(s->sock, GS_PROT_OK) < 0) {
        ERRPRINTF("Error sending tag.\n");
        s->err = GS_SVC_ERR_IO;
        return -1;
      }

      if(gs_send_output_args(s->sock, s->problem, s->my_dsig) < 0) {
        ERRPRINTF("Error sending output args.\n");
        s->err = GS_SVC_ERR_IO;
        return -1;
      }

      if(gs_create_timestamp_file(".", "retrieved", 0.0))
        ERRPRINTF("Warning: failed to create 'retrieved' file.\n");
    }

    service_et = gs_read_service_et("done");

    if(gs_decrement_job_count(s->srv_job_count) < 0)
      ERRPRINTF("Warning: failed to decrement job count.\n");

    if(gs_notify_agent_problem_complete(s->agent, s->agentport, s->problem_name,
         s->srv_cid, s->cli_username, s->cli_hostname, s->cli_cid, s->request_id,
         s->agent_taskid, service_et) < 0)
      ERRPRINTF("Warning: failed sending problem solve notification.\n");
  }

  return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int gs_service_blocking_request ( gs_service_info_t s  ) 

services blocking requests.

Parameters:
s - pointer to service info struct
Returns:
0 on success, -1 on failure.

Definition at line 516 of file service_template.c.

{
  double service_et;

  service_et = gs_pm_problem_service(s);

  if(gs_decrement_job_count(s->srv_job_count) < 0)
    ERRPRINTF("Warning: failed to decrement job count.\n");

  /* since the service might have changed the working directory
   * try to chdir back to the request subdirectory
   */
  if((chdir(s->cwd) < 0) || (chdir(s->request_id) < 0)) {
    char *origcwd, *newcwd;

    ERRPRINTF("Could not cd back to request directory '%s/%s'.\n",
      s->cwd, s->request_id);
    gs_send_tag(s->sock, GS_SVC_ERR_CHDIR);

    /* check whether working directory has changed.  if not,
     * goto service_abnormal_exit so that we can attempt to
     * write the cancelled file.  otherwise, just exit without
     * writing since we'd be writing it in the wrong location.
     */
    origcwd = dstring_sprintf("%s/%s", s->cwd, s->request_id);
    newcwd = getcwd(NULL, MAXPATHLEN);

    if(!strcmp(newcwd, origcwd))
      return -1;

    exit(-1);
  }

  if(gs_send_tag(s->sock, GS_PROT_OK) < 0) {
    ERRPRINTF("Error sending tag.\n");
    return -1;
  }



#ifdef GS_SMART_GRIDSOLVE
  int pid;
    if(s->problem->has_smart_arg_comm==1){
      if(gs_smart_send_output_args_to_client(s->sock ,s->problem, s->my_dsig)<0){
        ERRPRINTF("SMART : Error sending smart sending arguments\n");
        return -1;
      }
      pid=fork();
      if(pid==-1){
        ERRPRINTF("SMART: Out of memory could not fork\n");
        return -1;
      }

      if(pid==0){ 
       gs_server_t * src_server = (gs_server_t *)calloc(1,sizeof(gs_server_t));
       if(gs_service_read_coeff(s, src_server) < 0) {
         free(src_server);
         src_server = NULL;
       }

        if(gs_smart_send_output_args_remotely(s->sock ,src_server, s->problem, s->my_dsig)<0){
          ERRPRINTF("SMART : Error sending smart sending arguments\n");
          return -1;
        }
        _exit(0);
      }
    }
    else{
      if(gs_send_output_args(s->sock, s->problem, s->my_dsig) < 0) {
        ERRPRINTF("Error sending output args.\n");
        return -1;
      }
    }
#else
  if(gs_send_output_args(s->sock, s->problem, s->my_dsig) < 0) {
    ERRPRINTF("Error sending output args.\n");
    return -1;
  }
#endif



  if(gs_notify_agent_problem_complete(s->agent, s->agentport, s->problem_name,
       s->srv_cid, s->cli_username, s->cli_hostname, s->cli_cid, s->request_id,
       s->agent_taskid, service_et) < 0)
    ERRPRINTF("Warning: failed sending problem solve notification.\n");

  if(gs_create_timestamp_file(".", "retrieved", 0.0))
    ERRPRINTF("Warning: failed to create 'retrieved' file.\n");

  return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int gs_service_nonblocking_request ( gs_service_info_t s  ) 

services non-blocking requests.

Parameters:
s - pointer to service info struct
Returns:
0 on success, -1 on failure.

Definition at line 618 of file service_template.c.

{
  char *problemstr = NULL;
  FILE *xmlfile;
  double service_et;
  pid_t pid;
  int fd;

  gs_service_pid = 0;

  /* make sure SIGCHLD is caught so that it is not delivered to 
   * the mfork library.  I tried ignoring it (SIG_IGN) but then
   * waitpid() failed on some systems.
   */

  if((gs_signal(SIGCHLD, gs_dummy_signal_handler) == SIG_ERR) ||
     (gs_signal(SIGTERM, gs_service_sigterm_handler) == SIG_ERR)) {
    ERRPRINTF("Error: could not set signal handlers\n");
    s->err = GS_SVC_ERR_SIGNALS;
    return -1;
  }

  /* fork a child process to execute the service */

  pid = fork();

  if(pid == -1) {
    ERRPRINTF("Failed to fork\n");
    s->err = GS_SVC_ERR_FORK;
    return -1;
  }

  if(pid == 0) {
    /* this is the child.  execute the service and save the results. */

    setbuf(stdout, NULL);
    setbuf(stderr, NULL);

    service_et = gs_pm_problem_service(s);

    if((chdir(s->cwd) < 0) || (chdir(s->request_id) < 0)) {
      ERRPRINTF("Could not cd back to request directory '%s/%s'.\n", 
        s->cwd, s->request_id);
      _exit(GS_SVC_ERR_CHDIR);
    }

    xmlfile = fopen("problem.xml", "w");

    if(!xmlfile) {
      ERRPRINTF("Could not create xml file.\n");
      _exit(GS_SVC_ERR_CREATE_XML);
    }

    if(gs_encode_problem(&problemstr, s->problem) < 0) {
      ERRPRINTF("Could not encode problem.\n");
      _exit(GS_SVC_ERR_PROBLEM_ENC);
    }

    fprintf(xmlfile, "%s\n", problemstr);

    fclose(xmlfile);

    fd = open("data", O_WRONLY | O_CREAT, 0600);

    if(fd < 0) {
      ERRPRINTF("Could not create data file.\n");
      _exit(GS_SVC_ERR_CREAT_DATA_FILE);
    }


#ifdef GS_SMART_GRIDSOLVE
  if(s->problem->has_smart_arg_comm==1){
   gs_server_t * src_server = (gs_server_t *)calloc(1,sizeof(gs_server_t));
   if(gs_service_read_coeff(s, src_server) < 0) {
     free(src_server);
     src_server = NULL;
    }
    if(gs_smart_save_output_args_to_file(s->sock, src_server, fd, s->problem, s->my_dsig) < 0) {
      ERRPRINTF("Error sending output args.\n");
      _exit(GS_SVC_ERR_IO);
    }
    
  }
  else{
    if(gs_save_output_args_to_file(fd, s->problem, s->my_dsig) < 0) {
      ERRPRINTF("Error sending output args.\n");
      _exit(GS_SVC_ERR_IO);
    }
  }

#else
    if(gs_save_output_args_to_file(fd, s->problem, s->my_dsig) < 0) {
      ERRPRINTF("Error sending output args.\n");
      _exit(GS_SVC_ERR_IO);
    }

#endif

    close(fd);

    if(gs_create_timestamp_file(".", "done", service_et) < 0) {
      ERRPRINTF("Could not create completion file.\n");
      _exit(GS_SVC_ERR_COMPLETION_FILE);
    }

    _exit(0);
  }
  else {
    pid_t child;
    int cstat_loc, status;

    gs_service_pid = pid;

    /* this is the parent.  wait for the child (service) to complete
     * and check its status to determine if it was successful or not.
     */

    child = waitpid(pid, &cstat_loc, 0);

    if(child < 0) {
      ERRPRINTF("Error waiting for service process %d.\n", (int)pid);
      s->err = GS_SVC_ERR_WAITPID;
      return -1;
    }

    if(WIFEXITED(cstat_loc) == 0) {
      ERRPRINTF("service process %d did not terminate.\n", (int)pid);
      s->err = GS_SVC_ERR_ABNORMAL_EXIT;
      return -1;
    }

    status = WEXITSTATUS(cstat_loc);

    if(status != 0) {
      ERRPRINTF("service process %d terminated abnormally (status %d).\n",
        (int)pid, (char)status);
      s->err = (char)status > 0 ? (char)status : GS_SVC_ERR_UNSPECIFIED;
      return -1;
    }

    service_et = gs_read_service_et("done");

    if(gs_decrement_job_count(s->srv_job_count) < 0)
      ERRPRINTF("Warning: failed to decrement job count.\n");

    if(gs_notify_agent_problem_complete(s->agent, s->agentport, s->problem_name,
         s->srv_cid, s->cli_username, s->cli_hostname, s->cli_cid, s->request_id,
         s->agent_taskid, service_et) < 0)
      ERRPRINTF("Warning: failed sending problem solve notification.\n");
  }

  return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int gs_service_read_coeff ( gs_service_info_t s,
gs_server_t *  server 
)

Initializes server struct and reads coefficient string (if present) for the service being invoked.

Parameters:
s -- service info struct
server -- server struct (filled out upon return)
Returns:
0 on success, -1 on failure.

Definition at line 438 of file service_template.c.

{
  char *server_xml, *service_coeff, *service_model;
  int mfd;

  server_xml = dstring_sprintf(GS_SERVER_XML_TEMPLATE, s->infodir);

  if(!server_xml) {
    s->err = GS_SVC_ERR_MALLOC;
    return -1;
  }

  if(gs_read_server_from_file(server_xml, server) < 0) {
    s->err = GS_SVC_ERR_MISSING_SV_XML;
    return -1;
  }

  server->workload = gs_get_workload();

  service_model = dstring_sprintf("%s/%s.mdl", s->infodir, s->problem_name);

  if(!service_model) {
    s->err = GS_SVC_ERR_MALLOC;
    return -1;
  }

  service_coeff = dstring_sprintf("%s/%s.coe", s->infodir, s->problem_name);

  if(!service_coeff) {
    s->err = GS_SVC_ERR_MALLOC;
    return -1;
  }

  mfd = open(service_model, O_RDONLY, 0600);

  if(mfd < 0) {
    server->perf_expr = strdup(GS_NO_MODEL_UPDATE);
    return 0;
  }

  /* note we're obtaining a lock on the model file, not the
   * coefficient file.  the model file is the one that will
   * be locked by the service processes when both files are
   * updated.
   */

  if(gs_lock_fd(mfd, F_RDLCK) < 0) {
    close(mfd);
    server->perf_expr = strdup(GS_NO_MODEL_UPDATE);
    return 0;
  }

  if(gs_get_contents_of_file(service_coeff, &(server->perf_expr)) < 0) {
    ERRPRINTF("Warning: failed to read coefficient file '%s'\n", service_coeff);
    gs_unlock_fd(mfd);
    close(mfd);
    server->perf_expr = strdup(GS_NO_MODEL_UPDATE);
    return 0;
  }

  if(server->perf_expr[strlen(server->perf_expr)-1] == '\n')
    server->perf_expr[strlen(server->perf_expr)-1] = 0;

  gs_unlock_fd(mfd);
  close(mfd);

  return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void gs_service_sigterm_handler ( int  sig  ) 

Signal handler for the SIGTERM delivered when the service is cancelled. Here we want to kill the real service process.

Parameters:
sig -- the signal caught

Definition at line 397 of file service_template.c.

{
  if(gs_service_pid > 0) {
    if(kill(gs_service_pid, sig) < 0)
      ERRPRINTF("Failed to kill service process [pid = %d]\n", gs_service_pid);
  }

  return;
}

Here is the caller graph for this function:

int gs_update_perf_model ( gs_service_info_t s,
char *  model_fname,
char *  coef_fname,
double  elapsed_time 
)

Updates the performance model with the execution time from the completed run.

Parameters:
s - pointer to service info struct
model_fname - filename of the performance model for this service
elapsed_time - the elapsed time for the just completed run
Returns:
0 on success, -1 on failure.

Definition at line 1202 of file service_template.c.

{
  int i, new_model, num_expr, fd;
  char *comp_model, *cm_copy, *tok;
  gs_arg_enum_t *arg_enum = NULL;
  gs_argument_t *argptr;
  gs_pm_model_t *model;
  struct stat stbuf;
  icl_hash_t *symtab;
  FILE *coef_file;
  double j;

  model = NULL;

  comp_model = gs_problem_getinfo(s->problem, "COMPLEXITY_MODEL", NULL);

  if(!comp_model)
    return 0;

  new_model = stat(model_fname, &stbuf) < 0;

  if((fd = gs_open_locked_file(model_fname, F_WRLCK, O_RDWR | O_CREAT)) < 0) {
    ERRPRINTF("Warning: failed to open perf model file '%s'.\n", model_fname);
    return -1;
  }

  if(new_model) {
    int num_categories = 0;

    /* model does not exist yet, so create one now */

    num_expr = 1;
    for(i=0;i<strlen(comp_model);i++)
      if(comp_model[i] == ';')
        num_expr++;

    for(argptr=s->problem->arglist; argptr != NULL; argptr=argptr->next)
      if(argptr->arg_enum)
        num_categories++;

    model = gs_pm_init_model(num_categories, num_expr, GS_PM_MAX_RUNS);
  }
  else {
    /* model already exists, so load from disk */

    model = gs_pm_load(fd);
  }

  if(!model) {
    ERRPRINTF("Failed to intialize model\n");
    gs_unlock_fd(fd);
    close(fd);
    return -1;
  }

  if(gs_construct_scalar_hashtable(&symtab, s->problem, GS_IN) < 0) {
    ERRPRINTF("Failed to construct hash table for scalars\n");
    gs_unlock_fd(fd);
    close(fd);
    return -1;
  }

  /* dup since strtok will clobber original */
  cm_copy = strdup(comp_model);

  if(!cm_copy) {
    ERRPRINTF("strdup failed\n");
    icl_hash_destroy(symtab, NULL, NULL);
    gs_unlock_fd(fd);
    close(fd);
    return -1;
  }

  for(i=0, tok=NULL; (tok = strtok(tok ? NULL : cm_copy, ";")); i++) {
    if(gs_expr_d(tok, &(model->params[i]), symtab) < 0)
      ERRPRINTF("Warning: failed to evaluate model expression '%s'\n", tok);
  }

  i = 0;
  for(argptr=s->problem->arglist; argptr != NULL; argptr=argptr->next) {

    j = 0.0;

    if(argptr->arg_enum) {
      int found_enum_match = 0;

      for(arg_enum=argptr->arg_enum; arg_enum != NULL; arg_enum=arg_enum->next) {
        if((strcmp(arg_enum->val, "other") == 0) ||
           ((argptr->datatype == GS_CHAR) && !strncmp(argptr->data, arg_enum->val, 1)) ||
           ((argptr->datatype != GS_CHAR) && (argptr->expr_val == atof(arg_enum->val))))
        {
          found_enum_match = 1;
          model->categories[i] = j;
          break;
        }

        j += 1.0;
      }

      if(!found_enum_match) {
        ERRPRINTF("No match in model for arg %s\n", argptr->name);
        icl_hash_destroy(symtab, NULL, NULL);
        gs_unlock_fd(fd);
        close(fd);
        return -1;
      }

      i++;
    }
  }

  gs_pm_store_timing(elapsed_time, model);

  lseek(fd, 0, SEEK_SET);

  /* write model to disk.. */
  if(gs_pm_save(model, fd) < 0) {
    ERRPRINTF("Failed to save model to disk\n");
    icl_hash_destroy(symtab, NULL, NULL);
    gs_unlock_fd(fd);
    close(fd);
    return -1;
  }

  coef_file = fopen(coef_fname, "w");

  if(coef_file) {
    gs_generate_pm_expr(model, comp_model, s->problem, coef_file);
    fclose(coef_file);
  }

  gs_pm_free_model(model);
  icl_hash_destroy(symtab, NULL, NULL);
  gs_unlock_fd(fd);
  close(fd);

  return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int gs_wait_for_batch_job_completion ( gs_service_info_t s  ) 

Waits for a previous batch job to complete.

Definition at line 854 of file service_template.c.

{
  char buf[256], *cmd;
  int status;
  FILE *f;

  /* just in case the submit script changed the current directory,
   * change it back to the request dir.
   */
  if((chdir(s->cwd) < 0) || (chdir(s->request_id) < 0)) {
    ERRPRINTF("can't cd back to %s/%s\n", s->cwd, s->request_id);
    return -1;
  }

  if((f = fopen("gs_batch_id", "r")) == NULL) {
    ERRPRINTF("failed to open file gs_batch_id\n");
    return -1;
  }

  if(!fgets(buf, 256, f)) {
    ERRPRINTF("failed to read ID from file gs_batch_id\n");
    return -1;
  }

  fclose(f);

  buf[strlen(buf)-1] = '\0';

  cmd = dstring_sprintf("%s/service/%s/gs_probe %s", s->gridsolve_root, s->problem_name, buf);

  if(!cmd) {
    ERRPRINTF("malloc");
    return -1;
  }
 
  for(;;) {
    status = system(cmd);

    if(status < 0) {
      ERRPRINTF("command failed: '%s'\n", cmd);
      return -1;
    }

    if(WEXITSTATUS(status) != 0)
      break;

    sleep(5);
  }

  return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int service_template ( int  argc,
char *  argv[] 
)

This is a generic "service template" which forms the basis for the executable service that is forked/execed by the server.

When processing/compiling an IDL file, the compiler will link to this service_template. A main routine will then call this service_template, passing all of its arguments.

For a specific problem P, the server process will call the appropriate service program P_service with the appropriate command line arguements (as described below). The P_service program will call service_template passing all its arguments. This service_template routine will talk to the client to get the input arguments, call the actual service wrapper, and send the output arguments back to the client.

This routine expects the following arguments to be sent from the server.

argv[0] -- name of service executable. argv[1] -- name of the problem to be solved. argv[2] -- tag: either GS_PROT_PROBLEM_SOLVE_ASSIGNED or GS_PROT_PROBLEM_SOLVE, depending on whether this is an assigned server request or not. this is currently just the same tag as the client sends to the server. argv[3] -- client data signature (as sent by client to server). argv[4] -- sock: the socket descriptor already opened between the client and server. argv[5] -- gridsolve_root: path to root of GridSolve argv[6] -- gridsolve_arch: architecture string argv[7] -- blocking: either "0" (non-blocking) or "1" (blocking). argv[8] -- hostname of agent argv[9] -- agent port argv[10] -- server component ID (in printable string form) argv[11] -- client user name argv[12] -- client host name argv[13] -- client component ID (in printable string form) argv[14] -- server info dir (where timing info will be stored) argv[15] -- task id assigned by the agent (-1 for assigned server requests) argv[16] -- agent's estimated execution time for this job

Definition at line 137 of file service_template.c.

{
  gs_service_info_t sinfo;
  gs_server_t *server;
  char *service_xml;
  double est_time;
  char *cwd;

  sinfo.err = GS_SVC_ERR_UNSPECIFIED;

  if(argc != 17) {
    fprintf(stderr, "Bad usage.  Anyway, don't use this\n");
    fprintf(stderr, "from the command line.\n");
    exit(-1);
  }

  sinfo.problem_name = strdup(argv[1]);
  sinfo.tag = atoi(argv[2]);
  sinfo.client_dsig = atoi(argv[3]);
  sinfo.sock = atoi(argv[4]);
  sinfo.gridsolve_root = argv[5];
  sinfo.gridsolve_arch = argv[6];
  sinfo.blocking = atoi(argv[7]);
  sinfo.agent = strdup(argv[8]);
  sinfo.agentport = atoi(argv[9]);
  sinfo.srv_cid = strdup(argv[10]);
  sinfo.cli_username = strdup(argv[11]);
  sinfo.cli_hostname = strdup(argv[12]);
  sinfo.cli_cid = strdup(argv[13]);
  sinfo.infodir = strdup(argv[14]);
  sinfo.agent_taskid = atoi(argv[15]);
  sinfo.agent_est_time = atof(argv[16]);

  server = (gs_server_t *) malloc(sizeof(gs_server_t));

  if(!server) {
    gs_send_tag(sinfo.sock, GS_SVC_ERR_MALLOC);
    exit(-1);
  }

  if(gs_service_read_coeff(&sinfo, server) < 0) {
    free(server);
    server = NULL;
  }

  service_xml = dstring_sprintf("%s/service/%s/%s.xml", sinfo.gridsolve_root,
                                sinfo.problem_name, sinfo.problem_name);

  if(!service_xml) {
    gs_send_tag(sinfo.sock, GS_SVC_ERR_MALLOC);
    exit(-1);
  }

  sinfo.problem = (gs_problem_t *) malloc(sizeof(gs_problem_t));

  if(!sinfo.problem) {
    gs_send_tag(sinfo.sock, GS_SVC_ERR_MALLOC);
    exit(-1);
  }

  snprintf(sinfo.srv_job_count, FN_LEN, "%s/%s.%d", sinfo.infodir,
    GS_SERVER_JOB_COUNT_FILE_PREFIX, getppid());

  /* Look for the service corresponding to the requested problem */
  if(gs_read_problem_from_file(service_xml, sinfo.problem) < 0) {
    ERRPRINTF("Error loading service: '%s'.\n", service_xml);
    gs_send_tag(sinfo.sock, GS_SVC_ERR_MISSING_XML);
    exit(-1);
  }
  else {
    sinfo.my_dsig = pvmgetdsig();

    cwd = CALLOC(MAXPATHLEN, sizeof(char));
    if (cwd == NULL) exit(-1);
    if (getcwd(cwd, MAXPATHLEN) == NULL) exit(-1);
    sinfo.cwd = strdup(cwd);
    FREE(cwd);

    if (!sinfo.cwd) {
      ERRPRINTF("Can't get current working directory.\n");
      gs_send_tag(sinfo.sock, GS_SVC_ERR_GETCWD);
      exit(-1);
    }

    sprintf(sinfo.request_id, REQUEST_ID_TEMPLATE, sinfo.srv_cid,
       (int) getpid());

    if(gs_create_request_id(sinfo.request_id) < 0) {
      ERRPRINTF("Error creating request id.\n");
      gs_send_tag(sinfo.sock, GS_SVC_ERR_REQID);
      exit(-1);
    }

    if(mkdir(sinfo.request_id, 0700) < 0) {
      ERRPRINTF("Could not create directory '%s' ", sinfo.request_id);
      ERRPRINTF("to store output (cwd = '%s')\n", sinfo.cwd);
      gs_send_tag(sinfo.sock, GS_SVC_ERR_MKDIR);
      exit(-1);
    }

    if(chdir(sinfo.request_id) < 0) {
      ERRPRINTF("Could not cd to request directory '%s'.\n", sinfo.request_id);
      gs_send_tag(sinfo.sock, GS_SVC_ERR_CHDIR);
      exit(-1);
    }

    if(gs_increment_job_count(sinfo.srv_job_count) < 0)
      ERRPRINTF("Warning: failed to increment job count.\n");

    if(gs_send_tag(sinfo.sock, GS_PROT_OK) < 0) {
      ERRPRINTF("Error sending GS_PROT_OK.\n");
      goto service_abnormal_exit;
    }

    if(gs_send_string(sinfo.sock, sinfo.request_id) < 0) {
      ERRPRINTF("Error sending request id.\n");
      goto service_abnormal_exit;
    }

    /* now, if this is an assigned server request, send the problem
       description back to the client. */

    if(sinfo.tag == GS_PROT_PROBLEM_SOLVE_ASSIGNED) {
      char *problemstring = NULL;
      char dsig_string[256];

      sprintf(dsig_string, "%d", sinfo.my_dsig);

      if(gs_send_string(sinfo.sock, dsig_string) < 0) {
        ERRPRINTF("Error sending server data signature.\n");
        goto service_abnormal_exit;
      }

      if(gs_encode_problem(&problemstring, sinfo.problem) < 0) {
        ERRPRINTF("Error encoding problem description.\n");
        goto service_abnormal_exit;
      }

      if(gs_send_string(sinfo.sock, problemstring) < 0) {
        ERRPRINTF("Error sending problem description.\n");
        goto service_abnormal_exit;
      }
    }


#ifdef GS_SMART_GRIDSOLVE
    if(gs_recv_int(sinfo.sock, &sinfo.problem->has_smart_arg_comm) < 0) {
      ERRPRINTF("Error sending problem description.\n");
      goto service_abnormal_exit;
    }

    if(sinfo.problem->has_smart_arg_comm==1){
      if(gs_smart_recv_map_info(sinfo.sock, sinfo.problem)<0){
        ERRPRINTF("Error receiving remote comm info.\n");
        goto service_abnormal_exit;
      }
    }

    if(sinfo.problem->has_smart_arg_comm==1){
      if(gs_smart_recv_input_args(sinfo.sock, server, sinfo.problem, sinfo.client_dsig, sinfo.my_dsig)<0){         
        ERRPRINTF("SMART: Error receiving smart input args.\n");
        goto service_abnormal_exit;
      }
    }
    else{
      if(gs_recv_input_args(sinfo.sock, sinfo.problem, sinfo.client_dsig, sinfo.my_dsig) < 0) {
        ERRPRINTF("Error receiving input args.\n");
        goto service_abnormal_exit;
      }

    }

#else



    if(gs_recv_input_args(sinfo.sock, sinfo.problem, sinfo.client_dsig, sinfo.my_dsig) < 0) {
      ERRPRINTF("Error receiving input args.\n");
      goto service_abnormal_exit;
    }
#endif

    if(server)
      est_time = gs_agent_get_server_score(sinfo.problem, server);
    else
      est_time = 2000.0;

    if(gs_notify_agent_problem_solve(sinfo.agent, sinfo.agentport, 
         sinfo.problem_name, est_time, sinfo.srv_cid, sinfo.cli_username, 
         sinfo.cli_hostname, sinfo.cli_cid, sinfo.request_id, 
         sinfo.agent_taskid, sinfo.agent_est_time) < 0)
      ERRPRINTF("Warning: failed sending problem solve notification.\n");

    sinfo.bmode = gs_problem_getinfo(sinfo.problem, "BATCH_SUBMIT", NULL);

    if(sinfo.bmode) {
      if(gs_service_batch_request(&sinfo) < 0) {
        gs_send_tag(sinfo.sock, sinfo.err);
        goto service_abnormal_exit;
      }
    }
    else if(sinfo.blocking) {
      if(gs_service_blocking_request(&sinfo) < 0)
        goto service_abnormal_exit;
    }
    else {
      if(gs_service_nonblocking_request(&sinfo) < 0)
        goto service_abnormal_exit;
    }
  }

  gs_close_socket(sinfo.sock);
  exit(0);

service_abnormal_exit:
  /* for non-blocking requests, create a file whose name contains
   * the error code so we'll remember why the service failed when
   * the client connects back to wait for it to complete.
   */
  if(!sinfo.blocking || sinfo.bmode) {
    if(gs_decrement_job_count(sinfo.srv_job_count) < 0)
      ERRPRINTF("Warning: failed to decrement job count.\n");

    if(gs_create_error_file(".", sinfo.err) < 0)
      ERRPRINTF("Could not create 'error' file.\n");
  }

  /* if something goes wrong, write a "cancelled" file to the
   * request subdirectory so that it'll get cleaned up.
   */
  if(gs_create_timestamp_file(".", "cancelled", 0.0) < 0)
    ERRPRINTF("Could not create 'cancelled' file.\n");
  ERRPRINTF("Service terminating abnormally\n");
  exit(-1);
}

Here is the call graph for this function:


Variable Documentation

pid_t gs_service_pid = 0

Definition at line 90 of file service_template.c.