Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

rktimes.c

Go to the documentation of this file.
00001 /*
00002  * rktimes.c
00003  *
00004  * Copyright (c) 2003, 2004 The University of Utah and the Flux Group.
00005  * All rights reserved.
00006  *
00007  * This file is licensed under the terms of the GNU Public License.  
00008  * See the file "license.terms" for restrictions on redistribution 
00009  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
00010  */
00011 
00012 /**
00013  * @file rktimes.c
00014  *
00015  * Main file for a sampling time(1)-like command.  Unlike time(1) though, it
00016  * will not wait until the end of a run to report the CPU usage, it
00017  * continuously reports the usage in a form fit for consumption by gnuplot.
00018  * In addition, it can act as a server for a GKrellm client so the data can be
00019  * viewed instantaneously in a nice GUI.
00020  */
00021 
00022 #include "config.h"
00023 
00024 #include <errno.h>
00025 #include <stdio.h>
00026 #include <string.h>
00027 #include <stdlib.h>
00028 #include <signal.h>
00029 #include <unistd.h>
00030 #include <fcntl.h>
00031 
00032 #include <sys/time.h>
00033 #include <sys/types.h>
00034 #include <sys/times.h>
00035 #include <sys/wait.h>
00036 #include <sys/socket.h>
00037 #include <sys/resource.h>
00038 
00039 #include <netinet/in.h>
00040 
00041 #include <assert_pp.h>
00042 #include <time_util.h>
00043 
00044 #include "gkemu.h"
00045 #include "childProcess.h"
00046 
00047 #include <rk/rk.h>
00048 #include <rk/rk_error.h>
00049 
00050 #include "rk_util.h"
00051 
00052 /**
00053  * Number of times per second to record CPU usage samples.
00054  */
00055 #define SAMPLES_PER_SECOND 1
00056 
00057 /**
00058  * Maximum number of gkrellm clients.
00059  */
00060 #define MAX_CLIENTS 8
00061 
00062 /**
00063  * Size of the buffer used to format gkrellm output.
00064  */
00065 #define CLIENT_BUFFER_MAX 4096
00066 
00067 #if !defined(__XSTRING)
00068 /**
00069  * Convert a macro argument to a string.
00070  *
00071  * @param x The macro to expand to a string.
00072  */
00073 #define __XSTRING(x) __STRING(x)
00074 #endif
00075 
00076 /**
00077  * A version of perror(3) that prints out the file and line number.
00078  *
00079  * @param x The prefix string for the error string.
00080  */
00081 #define my_perror(x) perror(x ", file " __FILE__ ", line " __XSTRING(__LINE__))
00082 
00083 enum {
00084     RKTB_DONE,
00085     RKTB_CREATED_RESOURCE_SET,
00086 };
00087 
00088 /*
00089  * Flags for the rktimes_data.rkt_Flags field.
00090  *
00091  * RKTF_DONE - Stop monitoring the resource set/child process.
00092  * RKTF_CREATED_RESOURCE_SET - rktimes created the resource set for the child.
00093  */
00094 enum {
00095     RKTF_DONE = (1L << RKTB_DONE),
00096     RKTF_CREATED_RESOURCE_SET = (1L << RKTB_CREATED_RESOURCE_SET),
00097 };
00098 
00099 enum {
00100     RKTB_CLIENT_INITIALIZED,
00101 };
00102 
00103 /*
00104  * Flags for the rktimes_data.rkt_ClientFlags fields.
00105  *
00106  * RKTF_CLIENT_INITIALIZED - Indicates that the client has been sent an
00107  * "initial update".
00108  */
00109 enum {
00110     RKTF_CLIENT_INITIALIZED = (1L << RKTB_CLIENT_INITIALIZED),
00111 };
00112 
00113 /*
00114  * Global data for the tool.
00115  *
00116  * rkt_Flags - Holds the RKTF_ flags.
00117  * rkt_UtilityName - The name of the utility that is being monitored.
00118  * rkt_StartTime - The starting time for the child process.
00119  * rkt_OutputBase - The base name for output files.
00120  * rkt_Name - The requested name of the resource set.
00121  * rkt_ChildPeriod - The child's CPU reservation period or zero if a reserve
00122  * should not be created.
00123  * rkt_ChildCompute - The child's CPU reservation compute time.
00124  * rkt_ResourceSet - The child's resource set.
00125  * rkt_ChildPID - The pid_t of the child process.  This will be set to -1 when
00126  *   the child dies.
00127  * rkt_ServerSocket - The server socket listening for gkrellm clients.
00128  * rkt_ClientBuffer - Work buffer for sending data to gkrellm clients.
00129  * rkt_ClientSockets - A list of socket descriptors that correspond to gkrellm
00130  *   clients listening for updates.  Elements set to -1 are empty.
00131  */
00132 static struct {
00133     unsigned long rkt_Flags;
00134     const char *rkt_UtilityName;
00135     struct timeval rkt_StartTime;
00136     const char *rkt_OutputBase;
00137     const char *rkt_Name;
00138     unsigned int rkt_ChildPeriod;
00139     unsigned int rkt_ChildCompute;
00140     rk_resource_set_t rkt_ResourceSet;
00141     pid_t rkt_ChildPID;
00142     
00143     int rkt_ServerSocket;
00144     char rkt_ClientBuffer[CLIENT_BUFFER_MAX];
00145     int rkt_ClientSockets[MAX_CLIENTS];
00146     unsigned long rkt_ClientFlags[MAX_CLIENTS];
00147 } rktimes_data;
00148 
00149 /**
00150  * Write a byte array to a client's socket.  If they happen to fall behind or
00151  * some other error is encountered, the socket is closed immediately and erased
00152  * from the rktimes_data.rkt_ClientSockets array.
00153  *
00154  * @param index The client index in rktimes_data.rkt_ClientSockets.
00155  * @param buffer The buffer to write to the socket.
00156  * @param len The length of the buffer.
00157  */
00158 static void rktClientWrite(unsigned int index, const char *buffer, size_t len)
00159 {
00160     int fd;
00161 
00162     require(rktimes_data.rkt_ServerSocket != -1);
00163     require(index < MAX_CLIENTS);
00164     require(buffer != NULL);
00165 
00166     if( (fd = rktimes_data.rkt_ClientSockets[index]) >= 0 )
00167     {
00168         int rc;
00169 
00170         rc = write(fd, buffer, len);
00171         if( rc != len )
00172         {
00173             require((rc != -1) || (errno == EPIPE) || (errno == EWOULDBLOCK));
00174 
00175             /* No mercy! */
00176             close(fd);
00177             rktimes_data.rkt_ClientSockets[index] = -1;
00178             rktimes_data.rkt_ClientFlags[index] = 0;
00179         }
00180     }
00181 }
00182 
00183 /**
00184  * A signal handler that passes the received signal on to the child process.
00185  *
00186  * @param sig The actual signal number received.
00187  */
00188 static void sigpass(int sig)
00189 {
00190     require((sig == SIGINT) || (sig == SIGTERM));
00191     require(rktimes_data.rkt_ChildPID != 0);
00192 
00193     if( kill(rktimes_data.rkt_ChildPID, sig) == -1 )
00194     {
00195         my_perror("kill");
00196     }
00197 }
00198 
00199 /** Maximum number of processes to query in the resource set. */
00200 #define MAX_PROC_LIST 128
00201 
00202 /**
00203  * The SIGALRM signal handler.  This function will be called SAMPLES_PER_SECOND
00204  * times a second to read the CPU usage of the resource set's processes.  The
00205  * data is then output to a file and echoed to any gkrellm clients.
00206  *
00207  * @param sig The actual signal number received.
00208  *
00209  * @sa gkemu.h
00210  * @sa childProcess.h
00211  */
00212 static void sigalrm(int sig)
00213 {
00214     static unsigned long long total_compute, total_nice, total_deadline;
00215     static unsigned long long total_usage;
00216     static pid_t procs[MAX_PROC_LIST];
00217 
00218     unsigned long long inc_usage = 0, total_run, total_idle;
00219     struct cpu_reserve_attr cra;
00220     struct timeval curr, run;
00221     int lpc, len, count;
00222     rk_reserve_t cr;
00223 
00224     require(sig == SIGALRM);
00225 
00226     /* Get the current time and compute the process' total running time. */
00227     gettimeofday(&curr, NULL);
00228     timersub(&curr, &rktimes_data.rkt_StartTime, &run);
00229     total_run = (run.tv_sec * 1000000) + run.tv_usec;
00230 
00231     /* Get the list of processes in the resource set. */
00232     if( (count = rk_resource_set_get_proclist(rktimes_data.rkt_ResourceSet,
00233                                               procs,
00234                                               MAX_PROC_LIST)) == -1 )
00235     {
00236         my_perror("rk_resource_set_get_proclist");
00237         
00238         rktimes_data.rkt_Flags |= RKTF_DONE;
00239     }
00240 
00241     /* Walk through each process in the resource set and update its usage. */
00242     for( lpc = 0; lpc < count; lpc++ )
00243     {
00244         struct cpChildProcess *cp;
00245         
00246         if( (cp = cpFindChildProcess(procs[lpc])) == NULL )
00247         {
00248             /* The child is new, create an object for it. */
00249             if( (cp = cpCreateChildProcess(procs[lpc])) != NULL )
00250             {
00251                 cpOpenOutput(cp,
00252                              rktimes_data.rkt_OutputBase,
00253                              &rktimes_data.rkt_StartTime);
00254             }
00255         }
00256         if( cp != NULL )
00257         {
00258             inc_usage += cpSampleUsage(cp, &run);
00259         }
00260     }
00261 
00262     /* Add the usage since the last time this handler was called. */
00263     total_usage += inc_usage;
00264 
00265     /* Get the current CPU reservation status. */
00266     if( (cr = rk_resource_set_get_cpu_rsv(rktimes_data.rkt_ResourceSet)) !=
00267         NULL_RESERVE)
00268     {
00269         if( rk_cpu_reserve_get_attr(cr, &cra) != RK_ERROR )
00270         {
00271             unsigned long long rsv_period, rsv_compute, rsv_nice, rsv_deadline;
00272             double multiplier;
00273 
00274             /* Turn the raw reservation data into a form fit for gkrellm. */
00275             rsv_period = (cra.period.tv_sec * 1000000) +
00276                 (cra.period.tv_nsec / 1000);
00277             multiplier = (1000000.0 / rsv_period) /
00278                 (double)SAMPLES_PER_SECOND;
00279             rsv_compute = ((cra.compute_time.tv_sec * 1000000) +
00280                            (cra.compute_time.tv_nsec / 1000)) * multiplier;
00281             if( inc_usage > rsv_compute )
00282             {
00283                 rsv_nice = 0;
00284             }
00285             else
00286             {
00287                 rsv_nice = rsv_compute - inc_usage;
00288                 rsv_compute = inc_usage;
00289             }
00290             rsv_deadline = ((cra.deadline.tv_sec * 1000000) +
00291                             (cra.deadline.tv_nsec / 1000)) * multiplier;
00292 
00293             total_deadline += rsv_deadline - (rsv_compute + rsv_nice);
00294             total_compute += rsv_compute;
00295             total_nice += rsv_nice;
00296         }
00297         else
00298         {
00299             my_perror("rk_cpu_reserve_get_attr");
00300         }
00301     }
00302     
00303     total_idle = total_run - total_usage;
00304 
00305     /* Generate the new update text. */
00306     len = gkFormatUpdate(rktimes_data.rkt_ClientBuffer, CLIENT_BUFFER_MAX,
00307                          GKA_CPUUser, total_usage,
00308                          GKA_CPUIdle, total_idle,
00309                          GKA_CPUReserveUser, total_compute,
00310                          GKA_CPUReserveNice, total_nice,
00311                          GKA_CPUReserveIdle, total_deadline,
00312                          GKA_Processes, count,
00313                          GKA_ProcessesRunning, 0,
00314                          GKA_UpTime, &run,
00315                          GKA_TAG_DONE);
00316 
00317     /* Broadcast the update to the clients. */
00318     for( lpc = 0; lpc < MAX_CLIENTS; lpc++ )
00319     {
00320         if( rktimes_data.rkt_ClientSockets[lpc] != -1 )
00321         {
00322             if( !(rktimes_data.rkt_ClientFlags[lpc] &
00323                   RKTF_CLIENT_INITIALIZED) )
00324             {
00325                 rktClientWrite(lpc,
00326                                gkInitialUpdateOpen,
00327                                strlen(gkInitialUpdateOpen));
00328             }
00329             rktClientWrite(lpc, rktimes_data.rkt_ClientBuffer, len);
00330             if( !(rktimes_data.rkt_ClientFlags[lpc] &
00331                   RKTF_CLIENT_INITIALIZED) )
00332             {
00333                 rktClientWrite(lpc,
00334                                gkInitialUpdateClose,
00335                                strlen(gkInitialUpdateClose));
00336                 rktimes_data.rkt_ClientFlags[lpc] |= RKTF_CLIENT_INITIALIZED;
00337             }
00338         }
00339     }
00340 }
00341 
00342 /**
00343  * Handle a SIGCHLD signal.  This function will be called when the child exits.
00344  *
00345  * @param sig The actual signal number received.
00346  */
00347 static void sigchld(int sig)
00348 {
00349     require(sig == SIGCHLD);
00350     require(rktimes_data.rkt_ChildPID != 0);
00351     
00352     rktimes_data.rkt_Flags |= RKTF_DONE;
00353 }
00354 
00355 /**
00356  * Handle a SIGINT/SIGTERM signal when no utility is being monitored.
00357  *
00358  * @param sig The actual signal number received.
00359  */
00360 static void sigexit(int sig)
00361 {
00362     require((sig == SIGINT) || (sig == SIGTERM));
00363     
00364     rktimes_data.rkt_Flags |= RKTF_DONE;
00365 }
00366 
00367 /**
00368  * Handle a SIGIO signal.  This function will be called when the server socket
00369  * has a new connection waiting to be accept(2)'d.
00370  *
00371  * @param sig The actual signal number received.
00372  */
00373 static void sigio(int sig)
00374 {
00375     struct sockaddr saddr;
00376     socklen_t saddrlen;
00377     int fd;
00378 
00379     require(sig == SIGIO);
00380     require(rktimes_data.rkt_ServerSocket != -1);
00381 
00382     saddrlen = sizeof(saddr);
00383     /* Try to accept all of the clients waiting on the socket. */
00384     while( (fd = accept(rktimes_data.rkt_ServerSocket,
00385                         &saddr,
00386                         &saddrlen)) >= 0 )
00387     {
00388         int fl, lpc;
00389 
00390         /*
00391          * Set the client socket to be non-blocking, so we do not want to get
00392          * stuck waiting for someone.
00393          */
00394         fl = fcntl(fd, F_GETFL, 0);
00395         fcntl(fd, F_SETFL, fl | O_NONBLOCK);
00396         /* Search for an empty element in the list of client sockets. */
00397         for( lpc = 0;
00398              (lpc < MAX_CLIENTS) &&
00399                  (rktimes_data.rkt_ClientSockets[lpc] != -1);
00400              lpc++ );
00401         if( lpc < MAX_CLIENTS )
00402         {
00403             int len;
00404 
00405             rktimes_data.rkt_ClientSockets[lpc] = fd;
00406 
00407             /* Send the preamble. */
00408             len = gkFormatPreamble(
00409                 rktimes_data.rkt_ClientBuffer,
00410                 CLIENT_BUFFER_MAX,
00411                 GKA_HostName, rktimes_data.rkt_Name,
00412                 GKA_SystemName, rktimes_data.rkt_UtilityName,
00413                 GKA_TAG_DONE);
00414             rktClientWrite(lpc, rktimes_data.rkt_ClientBuffer, len);
00415         }
00416         else
00417         {
00418             /* Too many clients... */
00419             close(fd);
00420         }
00421     }
00422 }
00423 
00424 /**
00425  * Create a resource set and CPU reserve for ourself.  Otherwise, we might not
00426  * get CPU time to do the recording.
00427  *
00428  * @param name The name for the resource set.
00429  * @return A resource set for this process.
00430  */
00431 static rk_resource_set_t rktCreateSelfResourceSet(char *name)
00432 {
00433     rk_resource_set_t rs, retval = NULL_RESOURCE_SET;
00434 
00435     require(name != NULL);
00436     require(strlen(name) > 0);
00437     require(strlen(name) < RK_NAME_LEN);
00438     
00439     if( (rs = rk_resource_set_create(name)) != NULL_RESOURCE_SET )
00440     {
00441         struct cpu_reserve_attr cra;
00442         rk_reserve_t cr;
00443         
00444         memset(&cra, 0, sizeof(cra));
00445         /** @todo Come up with some better numbers for the reserve. */
00446         cra.compute_time.tv_sec = 0;
00447         cra.compute_time.tv_nsec = 5000000;
00448         cra.period.tv_sec = 0;
00449         cra.period.tv_nsec = 1000000000;
00450         cra.deadline = cra.period;
00451         cra.blocking_time = cra.start_time = (struct timespec){ 0, 0 };
00452         cra.reserve_type.sch_mode = RSV_SOFT;
00453         cra.reserve_type.enf_mode = RSV_SOFT;
00454         cra.reserve_type.rep_mode = RSV_SOFT;
00455         cra.processor = RK_ANY_CPU;
00456         if( rk_cpu_reserve_create(rs, &cr, &cra) != RK_ERROR )
00457         {
00458             /*
00459              * Note that we do not attach the process here since any children
00460              * we fork(2) off would inherit the resource set.
00461              */
00462             retval = rs;
00463         }
00464         else
00465         {
00466             my_perror("rk_cpu_reserve_create");
00467             rk_resource_set_destroy(rs);
00468         }
00469     }
00470     else
00471     {
00472         my_perror("rk_resource_set_create");
00473     }
00474     return( retval );
00475 }
00476 
00477 /**
00478  * Get a resource set for the child process.  If a resource set already exists
00479  * with the requested name, that will be used.  Otherwise, a new one will be
00480  * created.  If no name is requested, a new set will be created with the
00481  * default name.
00482  *
00483  * @param rs An existing resource set that should be used.
00484  * @param requested_name The requested resource set name or NULL if the default
00485  * should be used.
00486  * @param default_name The default resource set name to use if requested_name
00487  * is NULL.
00488  * @param child_period The child's period in microseconds or zero if no CPU
00489  * reserve should be created.
00490  * @param child_compute The child's compute time in microseconds.
00491  * @return The resource set to use for the child process or NULL_RESOURCE_SET
00492  * if a set could not be created.
00493  */
00494 static rk_resource_set_t rktGetChildResourceSet(rk_resource_set_t rs,
00495                                                 const char *requested_name,
00496                                                 const char *default_name,
00497                                                 unsigned int child_period,
00498                                                 unsigned int child_compute)
00499 {
00500     rk_resource_set_t retval = rs;
00501     
00502     require((requested_name == NULL) ||
00503             ((strlen(requested_name) > 0) &&
00504              (strlen(requested_name) <= RK_NAME_LEN)));
00505     require(default_name != NULL);
00506     require(strlen(default_name) > 0);
00507     require(strlen(default_name) <= RK_NAME_LEN);
00508     require(((child_period == 0) && (child_compute == 0)) ||
00509             (child_compute < child_period));
00510     
00511     /* If no resource set was passed in, try to create one. */
00512     if( retval == NULL_RESOURCE_SET )
00513     {
00514         const char *actual_name;
00515         
00516         if( requested_name != NULL )
00517         {
00518             actual_name = requested_name;
00519         }
00520         else
00521         {
00522             actual_name = default_name;
00523         }
00524         if( (retval = rk_resource_set_create((char *)actual_name)) !=
00525             NULL_RESOURCE_SET )
00526         {
00527             rktimes_data.rkt_Flags |= RKTF_CREATED_RESOURCE_SET;
00528         }
00529         else
00530         {
00531             my_perror("rk_resource_set_create");
00532         }
00533     }
00534     /* Attach the CPU reserve or try to repair a suspicious resource set. */
00535     if( retval != NULL_RESOURCE_SET )
00536     {
00537         rk_reserve_t cr = NULL_RESERVE;
00538         
00539         if( (cr = rk_resource_set_get_cpu_rsv(retval)) != NULL_RESERVE )
00540         {
00541             /*
00542              * Hmm, they already have a reserve, make sure there are some
00543              * processes attached.
00544              */
00545             if( rk_resource_set_get_num_procs(retval) > 0 )
00546             {
00547                 /* Okie, seems fine. */
00548             }
00549             else
00550             {
00551                 /* Suspicious, delete the reserve and make room for our own. */
00552                 if( rk_cpu_reserve_delete(retval) == 0 )
00553                 {
00554                     cr = NULL_RESERVE;
00555                     /* Assume control of the resource set. */
00556                     rktimes_data.rkt_Flags |= RKTF_CREATED_RESOURCE_SET;
00557                 }
00558                 else
00559                 {
00560                     my_perror("rk_cpu_reserve_delete");
00561                 }
00562             }
00563         }
00564         
00565         if( (cr == NULL_RESERVE) && (child_period > 0) )
00566         {
00567             struct cpu_reserve_attr cra;
00568             
00569             cra.compute_time.tv_sec = 0;
00570             cra.compute_time.tv_nsec = child_compute * 1000;
00571             cra.period.tv_sec = 0;
00572             cra.period.tv_nsec = child_period * 1000;
00573             cra.deadline = cra.period;
00574             cra.blocking_time = cra.start_time = (struct timespec){ 0, 0 };
00575             cra.reserve_type.sch_mode = RSV_SOFT;
00576             cra.reserve_type.enf_mode = RSV_SOFT;
00577             cra.reserve_type.rep_mode = RSV_SOFT;
00578             cra.processor = RK_ANY_CPU;
00579             if( rk_cpu_reserve_create(retval, &cr, &cra) < 0 )
00580             {
00581                 my_perror("rk_cpu_reserve_create");
00582             }
00583         }
00584     }
00585     return( retval );
00586 }
00587 
00588 /**
00589  * Print out the usage statement to a file handle.
00590  *
00591  * @param file The destination file handle for the usage output.
00592  * @param prog_name The program name.
00593  */
00594 static void rktUsage(FILE *file, char *prog_name)
00595 {
00596     require(file != NULL);
00597     require(prog_name != NULL);
00598     require(strlen(prog_name) > 0);
00599     
00600     fprintf(file,
00601             "Usage: %s [options] -- <command> [argument ...]\n",
00602             prog_name);
00603     fprintf(file,
00604             "\n"
00605             "Create a resource set and periodically record the CPU\n"
00606             "usage of the attached process and its children.\n"
00607             "\n"
00608             "Options:\n"
00609             "\t-h\t\tThis help message.\n"
00610             "\t-V\t\tShow the version number.\n"
00611             
00612             "\t-o <file>\tBase name for the output files. (Default: rktimes)\n"
00613             
00614             "\t-n <name>\tName of the RK resource set.  If a resource set\n"
00615             "\t\t\twith that name already exists, that will be used.\n"
00616             "\t\t\tOtherwise, a new one with that name will be created.\n"
00617             "\t\t\t(Default: rktimes)\n"
00618             
00619             "\t-p <port>\tServer port for GKrellmd emulation.\n"
00620 
00621             "\t-P <time>\tThe period for the CPU reservation.\n"
00622             "\t\t\tThe -C option must also be given.\n"
00623 
00624             "\t-C <time>\tThe compute time for the CPU reservation.\n"
00625             "\t\t\tThe -P option must also be given.\n"
00626 
00627             "\n"
00628             
00629             "Package: " PACKAGE_STRING "\n"
00630             "Contact: " PACKAGE_BUGREPORT "\n");
00631 }
00632 
00633 /**
00634  * Process the command line options.
00635  *
00636  * @param argc_inout Pointer to main's argc variable.  On return, the variable
00637  * will contain the number of arguments remaining after option processing.
00638  * @param argv_inout Pointer to main's argv variable.  On return, the variable
00639  * will contain the remaining argument values.
00640  * @return Zero if the options were processed correctly, > 0 if there was an
00641  * error and the usage should be printed, or < 0 if the program should exit
00642  * immediately.
00643  */
00644 static int rktProcessOptions(int *argc_inout, char **argv_inout[])
00645 {
00646     unsigned long long us_time;
00647     int ch, retval = 0;
00648     char *prog_name;
00649     char **argv;
00650     int argc;
00651 
00652     require(argc_inout != NULL);
00653     require(argv_inout != NULL);
00654 
00655     argc = *argc_inout;
00656     argv = *argv_inout;
00657     prog_name = argv[0];
00658     while( ((ch = getopt(argc, argv, "hVo:n:p:P:C:")) != -1) && (retval == 0) )
00659     {
00660         switch( ch )
00661         {
00662         case 'o':
00663             /* Output file base name. */
00664             if( strlen(optarg) == 0 )
00665             {
00666                 fprintf(stderr,
00667                         "%s: -o value is empty\n",
00668                         prog_name);
00669             }
00670             else
00671             {
00672                 rktimes_data.rkt_OutputBase = optarg;
00673             }
00674             break;
00675         case 'n':
00676             /* Resource set name. */
00677             if( strlen(optarg) == 0 )
00678             {
00679                 fprintf(stderr,
00680                         "%s: -n value is empty\n",
00681                         prog_name);
00682             }
00683             else if( strlen(optarg) > RK_NAME_LEN )
00684             {
00685                 fprintf(stderr,
00686                         "%s: -n value too long (only %d characters allowed)\n",
00687                         prog_name,
00688                         RK_NAME_LEN);
00689                 retval = 1;
00690             }
00691             else
00692             {
00693                 rktimes_data.rkt_Name = optarg;
00694             }
00695             break;
00696         case 'P':
00697             /* Period for the CPU reservation. */
00698             if( string_to_microsec(&us_time, optarg) )
00699             {
00700                 rktimes_data.rkt_ChildPeriod = us_time;
00701             }
00702             else
00703             {
00704                 fprintf(stderr,
00705                         "%s: -P option requires a time value\n",
00706                         prog_name);
00707                 retval = 1;
00708             }
00709             break;
00710         case 'C':
00711             /* Compute time for the CPU reservation. */
00712             if( string_to_microsec(&us_time, optarg) )
00713             {
00714                 rktimes_data.rkt_ChildCompute = us_time;
00715             }
00716             else
00717             {
00718                 fprintf(stderr,
00719                         "%s: -C option requires a time value\n",
00720                         prog_name);
00721                 retval = 1;
00722             }
00723             break;
00724         case 'p':
00725             /* Server port number for GKrellm emulation. */
00726             {
00727                 int port;
00728                 
00729                 if( (sscanf(optarg, "%d", &port) == 1) &&
00730                     (port > 0) && (port < 65536) )
00731                 {
00732                     int fd;
00733                     
00734                     if( (fd = socket(PF_INET, SOCK_STREAM, 0)) == -1 )
00735                     {
00736                         my_perror("socket");
00737                         retval = -1;
00738                     }
00739                     else
00740                     {
00741                         struct sockaddr_in sin;
00742                         int on = 1;
00743                         
00744                         (void)setsockopt(fd,
00745                                          SOL_SOCKET,
00746                                          SO_REUSEADDR,
00747                                          (char *)&on,
00748                                          sizeof(on));
00749 #if defined(BSD44)
00750                         sin.sin_len = sizeof(sin);
00751 #endif
00752                         sin.sin_family = AF_INET;
00753                         sin.sin_port = htons(port);
00754                         sin.sin_addr.s_addr = INADDR_ANY;
00755                         if( bind(fd,
00756                                  (struct sockaddr *)&sin,
00757                                  sizeof(sin)) == -1 )
00758                         {
00759                             my_perror("bind");
00760                             retval = -1;
00761                         }
00762                         else if( listen(fd, 5) == -1 )
00763                         {
00764                             my_perror("listen");
00765                             retval = -1;
00766                         }
00767                         else
00768                         {
00769                             int fl;
00770                             
00771                             fl = fcntl(fd, F_GETFL, 0);
00772                             fl |= O_NONBLOCK |
00773 #if defined(O_ASYNC)
00774                                 O_ASYNC
00775 #elif defined(FASYNC)
00776                                 FASYNC
00777 #endif
00778                                 ;
00779                             fcntl(fd, F_SETFL, fl);
00780                             fcntl(fd, F_SETOWN, getpid());
00781                             /*
00782                              * Note: SIGIO is ignored by default, so we do not
00783                              * have to worry about handling it yet.
00784                              */
00785                         }
00786                     }
00787                     if( retval == 0 )
00788                     {
00789                         rktimes_data.rkt_ServerSocket = fd;
00790                     }
00791                     else
00792                     {
00793                         close(fd);
00794                     }
00795                 }
00796                 else
00797                 {
00798                     fprintf(stderr,
00799                             "%s: Invalid -p value: %s\n",
00800                             prog_name,
00801                             optarg);
00802                     retval = 1;
00803                 }
00804             }
00805             break;
00806         case 'V':
00807             fprintf(stderr, "%s\n", PACKAGE_VERSION);
00808             retval = -1;
00809             break;
00810         case 'h':
00811         case '?':
00812         default:
00813             retval = 1;
00814             break;
00815         }
00816     }
00817     /* Make sure any compute and period times are sane. */
00818     if( (rktimes_data.rkt_ChildCompute == 0) &&
00819         (rktimes_data.rkt_ChildPeriod == 0) )
00820     {
00821     }
00822     else if( rktimes_data.rkt_ChildCompute == 0 )
00823     {
00824         fprintf(stderr,
00825                 "%s: Compute time _must_ be specified with period\n",
00826                 prog_name);
00827         retval = 1;
00828     }
00829     else if( rktimes_data.rkt_ChildPeriod == 0 )
00830     {
00831         fprintf(stderr,
00832                 "%s: Period time _must_ be specified with compute time\n",
00833                 prog_name);
00834         retval = 1;
00835     }
00836     else if( rktimes_data.rkt_ChildCompute >= rktimes_data.rkt_ChildPeriod )
00837     {
00838         fprintf(stderr,
00839                 "%s: Compute time _must_ be less than the period\n",
00840                 prog_name);
00841         retval = 1;
00842     }
00843     *argc_inout -= optind;
00844     *argv_inout += optind;
00845     /* Optionally skip the '--' that is used to terminate the option list. */
00846     if( (*argc_inout > 0) && (strcmp(*argv_inout[0], "--") == 0) )
00847     {
00848         *argc_inout -= 1;
00849         *argv_inout += 1;
00850     }
00851     if( (retval == 0) && (*argc_inout == 0) )
00852     {
00853         /* No arguments were given, make sure there is a name and */
00854         if( rktimes_data.rkt_Name == NULL )
00855         {
00856             fprintf(stderr, "%s: Missing utility to monitor\n", prog_name);
00857             retval = 1;
00858         }
00859         /* ... it is a valid resource set. */
00860         else if( (rktimes_data.rkt_ResourceSet =
00861                   rk_resource_set_get_by_name(rktimes_data.rkt_Name)) ==
00862                  NULL_RESOURCE_SET )
00863         {
00864             fprintf(stderr,
00865                     "%s: No such resource set: %s\n",
00866                     prog_name,
00867                     rktimes_data.rkt_Name);
00868             retval = 1;
00869         }
00870     }
00871     return( retval );
00872 }
00873 
00874 /**
00875  * The parent portion of the fork(2) between rktimes and the monitored utility.
00876  * This function will attach itself to a resource set to ensure that it has
00877  * some CPU time to work and then wait for gkrellm connections or the child's
00878  * death.
00879  *
00880  * @param rs The resource set for the rktimes process.
00881  * @return The return code for main().
00882  *
00883  * @sa rktChildPart
00884  */
00885 static int rktParentPart(rk_resource_set_t rs)
00886 {
00887     int retval = EXIT_FAILURE;
00888     struct itimerval itv;
00889     struct sigaction sa;
00890     sigset_t sigmask;
00891     
00892     require(rs != NULL_RESOURCE_SET);
00893 
00894     if( rktimes_data.rkt_ChildPID != 0 )
00895     {
00896         char scratch[PATH_MAX];
00897         FILE *pid_file;
00898         
00899         snprintf(scratch,
00900                  sizeof(scratch),
00901                  "%s-child.pid",
00902                  rktimes_data.rkt_OutputBase);
00903         if( (pid_file = fopen(scratch, "w")) != NULL )
00904         {
00905             fprintf(pid_file, "%d", rktimes_data.rkt_ChildPID);
00906             fclose(pid_file);
00907             pid_file = NULL;
00908         }
00909     }
00910 
00911     /* Attach ourselves here to avoid inheritance problem. */
00912     if( rk_resource_set_attach_process(rs, getpid()) < 0 )
00913     {
00914         my_perror("rk_resource_set_attach_process");
00915     }
00916 
00917     /*
00918      * We use signals as a primitive event system.  So any that we use, and are
00919      * not passed on to the child, need to be blocked before setting up the
00920      * handlers.  Once everything has been setup, we will use sigsuspend(2) to
00921      * atomically unblock and wait for the signals to arrive.
00922      */
00923     sigemptyset(&sigmask);
00924     sigaddset(&sigmask, SIGALRM);
00925     sigaddset(&sigmask, SIGCHLD);
00926     sigaddset(&sigmask, SIGIO);
00927     sigaddset(&sigmask, SIGINT);
00928     sigaddset(&sigmask, SIGTERM);
00929     if( sigprocmask(SIG_BLOCK, &sigmask, NULL) < 0 )
00930     {
00931         my_perror("sigprocmask");
00932         
00933         ensure(0);
00934     }
00935 
00936     /* Setup the signal handlers. */
00937     sa.sa_mask = sigmask;
00938     sa.sa_flags = 0;
00939 #if defined(SA_RESTART)
00940     sa.sa_flags |= SA_RESTART;
00941 #endif
00942 
00943     signal(SIGPIPE, SIG_IGN);
00944 
00945     sa.sa_handler = sigalrm;
00946     sigaction(SIGALRM, &sa, NULL);
00947 
00948     if( rktimes_data.rkt_ServerSocket != -1 )
00949     {
00950         sa.sa_handler = sigio;
00951         sigaction(SIGIO, &sa, NULL);
00952     }
00953     
00954     if( rktimes_data.rkt_ChildPID == 0 )
00955     {
00956         /*
00957          * No utility is being monitored, just a resource set.  Therefore we
00958          * need to exit when these signals are received since there is no
00959          * child to pass them to.
00960          */
00961         sa.sa_handler = sigexit;
00962         sigaction(SIGINT, &sa, NULL);
00963         sigaction(SIGTERM, &sa, NULL);
00964     }
00965     else
00966     {
00967         /* Catch SIGCHLD so we know when the child exited. */
00968         sa.sa_handler = sigchld;
00969         sigaction(SIGCHLD, &sa, NULL);
00970 
00971         /*
00972          * Pass SIGINT/SIGTERM onto the child so they can handle them as they
00973          * choose.  If they exit, we will get the SIGCHLD and exit shortly
00974          * thereafter.
00975          */
00976         sa.sa_handler = sigpass;
00977         sigaction(SIGINT, &sa, NULL);
00978         sigaction(SIGTERM, &sa, NULL);
00979     }
00980     
00981     itv.it_interval.tv_sec = 0;
00982     itv.it_interval.tv_usec = 1000000 / SAMPLES_PER_SECOND;
00983     itv.it_value.tv_sec = 0;
00984     itv.it_value.tv_usec = 1000000 / SAMPLES_PER_SECOND;
00985     /** @todo Use rk periodic stuff here. */
00986     if( setitimer(ITIMER_REAL, &itv, NULL) == 0 )
00987     {
00988         sigset_t empty_sigmask;
00989         int status;
00990 
00991         sigemptyset(&empty_sigmask);
00992         
00993         /* Keep handling signals until the child has died. */
00994         while( !(rktimes_data.rkt_Flags & RKTF_DONE) )
00995         {
00996             sigsuspend(&empty_sigmask);
00997         }
00998 
00999         /* Cleanup the child's status. */
01000         if( rktimes_data.rkt_ChildPID == 0 )
01001         {
01002             /*
01003              * No child was being monitored, ergo, there is nothing to wait for
01004              */
01005         }
01006         else if( wait(&status) >= 0 )
01007         {
01008             if( WIFEXITED(status) )
01009             {
01010                 retval = WEXITSTATUS(status);
01011             }
01012             else if( WIFSIGNALED(status) )
01013             {
01014                 retval = EXIT_SUCCESS;
01015             }
01016             else
01017             {
01018                 retval = EXIT_FAILURE;
01019             }
01020         }
01021         else
01022         {
01023             my_perror("wait");
01024             retval = EXIT_FAILURE;
01025         }
01026 
01027         /* Clear the timer. */
01028         memset(&itv, 0, sizeof(itv));
01029         if( setitimer(ITIMER_REAL, &itv, NULL) < 0 )
01030         {
01031             my_perror("setitimer");
01032 
01033             ensure(0);
01034         }
01035     }
01036     else
01037     {
01038         my_perror("setitimer");
01039         retval = EXIT_FAILURE;
01040     }
01041 
01042     /* We will be dying soon, ignore any signals and */
01043     signal(SIGALRM, SIG_IGN);
01044     signal(SIGCHLD, SIG_IGN);
01045     signal(SIGINT, SIG_IGN);
01046     signal(SIGTERM, SIG_IGN);
01047     signal(SIGIO, SIG_IGN);
01048 
01049     /* ... restore the old signal mask. */
01050     if( sigprocmask(SIG_UNBLOCK, &sigmask, NULL) < 0 )
01051     {
01052         my_perror("sigprocmask");
01053 
01054         ensure(0);
01055     }
01056     
01057     return( retval );
01058 }
01059 
01060 /**
01061  * The child portion of the fork(2) between rktimes and the monitored utility.
01062  * This function will attach the child to the given resource set and execvp(3)
01063  * the utility to monitor with the given arguments.
01064  *
01065  * @param rs The child's resource set.
01066  * @param argv The utility to startup and its arguments.
01067  * @return A failure exit code, otherwise this function will not return because
01068  * of the execvp(3).
01069  *
01070  * @sa rktParentPart
01071  */
01072 static int rktChildPart(rk_resource_set_t rs, char *argv[])
01073 {
01074     int rc, retval = EXIT_SUCCESS;
01075     sigset_t sigmask;
01076     int lpc;
01077     
01078     require(rs != NULL_RESOURCE_SET);
01079     require(argv != NULL);
01080     
01081     /*
01082      * Make sure any children created by the monitored utility are also
01083      * attached to the resource set.
01084      */
01085     rk_inherit_mode(1);
01086     if( rk_resource_set_attach_process(rs, getpid()) == 0 )
01087     {
01088     }
01089     else
01090     {
01091         my_perror("rk_resource_set_attach_process");
01092         retval = EXIT_FAILURE;
01093     }
01094 
01095     /*
01096      * Cleanup the file descriptors.
01097      *
01098      * XXX Should we do this?  Or, should rktimes be completely invisible and
01099      * pass everything through.
01100      */
01101 
01102     /* First, close our server socket, */
01103     if( rktimes_data.rkt_ServerSocket != -1 )
01104     {
01105         close(rktimes_data.rkt_ServerSocket);
01106         rktimes_data.rkt_ServerSocket = -1;
01107     }
01108 
01109     /* ... then make sure stdio is sane, and */
01110     do {
01111         if( (rc = open("/dev/null", O_RDWR)) == -1 )
01112         {
01113             fprintf(stderr, "Error: Cannot open '/dev/null'?");
01114             retval = EXIT_FAILURE;
01115         }
01116         else if( rc > 2 )
01117         {
01118             close(rc); /* not needed. */
01119         }
01120         else
01121         {
01122             /* stdin/stderr/stdout filled. */
01123         }
01124     } while( (rc >= 0) && (rc <= 2) );
01125 
01126     /* ... make sure we do not leak any descriptors. */
01127     for( lpc = 3; lpc < FD_SETSIZE; lpc++ )
01128     {
01129         if( close(lpc) != -1 )
01130         {
01131             fprintf(stderr,
01132                     "Warning: Descriptor %d was leaked from rktimes.\n",
01133                     lpc);
01134         }
01135     }
01136     
01137     /* Reset the signal mask for the child. */
01138     sigfillset(&sigmask);
01139     if( sigprocmask(SIG_UNBLOCK, &sigmask, NULL) < 0 )
01140     {
01141         my_perror("sigprocmask");
01142         
01143         ensure(0);
01144     }
01145 
01146     if( retval == EXIT_SUCCESS )
01147     {
01148         execvp(argv[0], argv);
01149 
01150         /* FALLTHROUGH, normal operation will not reach this point. */
01151         perror(argv[0]);
01152         switch( errno )
01153         {
01154         case ENOENT:
01155         case EPERM:
01156             retval = 127;
01157             break;
01158         default:
01159             retval = EXIT_FAILURE;
01160             break;
01161         }
01162     }
01163     return( retval );
01164 }
01165 
01166 int main(int argc, char *argv[])
01167 {
01168     int lpc, retval = EXIT_FAILURE;
01169 
01170     /* Default values. */
01171     rktimes_data.rkt_OutputBase = "rktimes";
01172     rktimes_data.rkt_ServerSocket = -1;
01173     for( lpc = 0; lpc < MAX_CLIENTS; lpc++ )
01174     {
01175         rktimes_data.rkt_ClientSockets[lpc] = -1;
01176     }
01177 
01178     /* Initialize the internal bits first. */
01179     if( cpInitChildProcessData() )
01180     {
01181         char *prog_name = argv[0];
01182         int rc;
01183 
01184         rc = rktProcessOptions(&argc, &argv);
01185         if( (rc == 0) && ((argc > 0) || (rktimes_data.rkt_Name != NULL)) )
01186         {
01187             char self_rs_name[RK_NAME_LEN + 1];
01188             rk_resource_set_t rs;
01189 
01190             if( argc > 0 )
01191             {
01192                 rktimes_data.rkt_UtilityName = argv[0];
01193             }
01194             else
01195             {
01196                 rktimes_data.rkt_UtilityName = "(none)";
01197             }
01198             snprintf(self_rs_name, RK_NAME_LEN + 1, "rktimes.%d", getpid());
01199 
01200             /*
01201              * Block signals so we do not die without cleaning up the resource
01202              * set(s).
01203              */
01204             {
01205                 sigset_t sigmask;
01206                 
01207                 sigaddset(&sigmask, SIGINT);
01208                 sigaddset(&sigmask, SIGTERM);
01209                 if( sigprocmask(SIG_BLOCK, &sigmask, NULL) < 0 )
01210                 {
01211                     my_perror("sigprocmask");
01212                     
01213                     ensure(0);
01214                 }
01215             }
01216             
01217             rs = rktCreateSelfResourceSet(self_rs_name);
01218             rktimes_data.rkt_ResourceSet =
01219                 rktGetChildResourceSet(rktimes_data.rkt_ResourceSet,
01220                                        rktimes_data.rkt_Name,
01221                                        "rktimes",
01222                                        rktimes_data.rkt_ChildPeriod,
01223                                        rktimes_data.rkt_ChildCompute);
01224             if( (rs != NULL_RESOURCE_SET) &&
01225                 (rktimes_data.rkt_ResourceSet != NULL_RESOURCE_SET) )
01226             {
01227                 gettimeofday(&rktimes_data.rkt_StartTime, NULL);
01228                 if( argc == 0 )
01229                 {
01230                     retval = rktParentPart(rs);
01231                 }
01232                 else if( (rktimes_data.rkt_ChildPID = fork()) > 0 )
01233                 {
01234                     retval = rktParentPart(rs);
01235                 }
01236                 else if( rktimes_data.rkt_ChildPID == 0 )
01237                 {
01238                     return( rktChildPart(rktimes_data.rkt_ResourceSet, argv) );
01239                 }
01240                 else
01241                 {
01242                     my_perror("fork");
01243                     retval = EXIT_FAILURE;
01244                 }
01245             }
01246             if( rktimes_data.rkt_Flags & RKTF_CREATED_RESOURCE_SET )
01247             {
01248                 rk_resource_set_destroy(rktimes_data.rkt_ResourceSet);
01249                 rktimes_data.rkt_ResourceSet = NULL_RESOURCE_SET;
01250             }
01251             rk_resource_set_destroy(rs);
01252         }
01253         else if( rc >= 0 )
01254         {
01255             rktUsage(stderr, prog_name);
01256         }
01257         if( rktimes_data.rkt_ServerSocket != -1 )
01258         {
01259             close(rktimes_data.rkt_ServerSocket);
01260             rktimes_data.rkt_ServerSocket = -1;
01261         }
01262         cpKillChildProcessData();
01263     }
01264     
01265     return( retval );
01266 }

Generated on Tue Jun 22 14:50:10 2004 for CPU Broker by doxygen 1.3.6