Blob Blame History Raw
From 1833f9dae371a48e3f52891262ad2d5fd75fc205 Mon Sep 17 00:00:00 2001
From: =?utf-8?q?Dan=20Hor=C3=A1k?= <dan@danny.cz>
Date: Fri, 5 Jun 2009 14:12:52 +0200
Subject: [PATCH] s390-tools-1.8.1-ziomon-fixes

---
 ziomon/stats.h       |    2 +-
 ziomon/ziomon        |   84 ++++++++++++++++++++++++++++++++++++++++++++-----
 ziomon/ziomon_util.c |    2 +-
 3 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/ziomon/stats.h b/ziomon/stats.h
index a28d436..0920b27 100644
--- a/ziomon/stats.h
+++ b/ziomon/stats.h
@@ -142,7 +142,7 @@ static inline void histlog2_print(const char *s, const __u32 a[],
 	int i;
 
 	printf("%s:\n", s);
-	for (i = 0; i < h->num; i++) {
+	for (i = 0; i < h->num - 1; i++) {
 		printf("   %10ld:%6d",
 			(unsigned long)(histlog2_upper_limit(i, h)), a[i]);
 		if (!((i + 1) % 4))
diff --git a/ziomon/ziomon b/ziomon/ziomon
index aa1cf78..fe4d8ec 100755
--- a/ziomon/ziomon
+++ b/ziomon/ziomon
@@ -32,7 +32,7 @@ WRP_DEVICES=();
 WRP_LUNS=();
 WRP_LOGFILE="";
 # limit of actual data in percent that need space on disk
-WRP_SIZE_THRESHOLD="25";
+WRP_SIZE_THRESHOLD="10";
 WRP_FORCE=0;
 
 function debug() {
@@ -234,6 +234,7 @@ function start_trace() {
    local hosts_param;
    local luns_param;
    local i;
+   local len;
 
    if [ $WRP_DEBUG -ne 0 ]; then
       verbose="-V";
@@ -276,7 +277,7 @@ function start_trace() {
    blkiomon_command="blkiomon --interval=$WRP_INTERVAL -Q  $WRP_MSG_Q_PATH -q $WRP_MSG_Q_ID -m $WRP_MSG_Q_BLKIOMON_ID $verbose_blk -d -";
    zfcpdd_command="ziomon_zfcpdd -Q  $WRP_MSG_Q_PATH -q $WRP_MSG_Q_ID -m $WRP_MSG_Q_ZIOMON_ZFCPDD_ID -i $WRP_INTERVAL";
    debug "starting blktrace: $blktrace_command | $blkiomon_command | $zfcpdd_command";
-   $blktrace_command | $blkiomon_command | $zfcpdd_command > $WRP_MSG_Q_PATH/blktrace.log &
+   $blktrace_command 2>$WRP_MSG_Q_PATH/blktrace.err | $blkiomon_command | $zfcpdd_command > $WRP_MSG_Q_PATH/blktrace.log &
    i=0;
    # might take a moment to start all processes in the pipe if system under load
    while [ $i -lt 60 ]; do
@@ -303,7 +304,17 @@ function start_trace() {
    echo "done";
    echo -n "Collecting data...";
 
-   sleep $WRP_DURATION;
+   # pay extra attention to blktrace
+   for (( i=0; i<$WRP_DURATION; ++i )); do
+      len=`cat $WRP_MSG_Q_PATH/blktrace.err | wc -l`;
+      if [ $len -ne 0 ]; then
+         cat $WRP_MSG_Q_PATH/blktrace.err;
+         echo "Error: blktrace has errors, aborting";
+         return;
+      fi
+      sleep 1;
+   done
+
    echo "done";
 }
 
@@ -358,6 +369,58 @@ function emergency_shutdown() {
 }
 
 
+function check_cpuplugd {
+   # check if cpuplugd is running
+   # If so, the whole per-cpu mechanism of blktrace gets corrupted, which
+   # results in the infamous 'bad trace magic' message
+   if [ -e /var/run/cpuplugd.pid ]; then
+      echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
+      echo "ziomon: Warning: cpuplugd is running which can corrupt the traces.";
+      echo "        It is recommended to stop cpuplugd for the duration of the";
+      echo "        trace using 'service cpuplugd stop'.";
+      echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
+   fi
+}
+
+
+# we need 2MB per device and CPU
+function check_vmalloc_space() {
+   local total;
+   local used;
+   local free;
+   local num_cpus;
+   local required;
+   local result;
+
+   num_cpus=`cat /proc/cpuinfo | grep processors | awk '{print $4}'`;
+   total=`cat /proc/meminfo | grep VmallocTotal | awk '{print $2}'`;
+   used=`cat /proc/meminfo | grep VmallocUsed | awk '{print $2}'`;
+
+   (( free=$total-$used ));
+   (( required=$num_cpus*${#WRP_DEVICES[@]}*2048 ));
+   (( result=$free-$required ));
+   debug "Required Vmalloc space: $required KBytes";
+   if [ $result -lt 0 ]; then
+      echo "$WRP_TOOLNAME: Not enough free Vmalloc space:";
+      echo "        Required: $required KBytes";
+      echo "        Free: $free KBytes";
+      exit 1;
+   fi
+
+   return 0;
+}
+
+
+function check_blkiomon() {
+   # check blkiomon version
+   ver=`blkiomon -V | awk '{print $3}'`;
+   if [ "$ver" != "0.2" ]; then
+      echo "$WRP_TOOLNAME: Unsupported blkiomon version $ver detected, aborting";
+      exit 1;
+   fi
+}
+
+
 function setup() {
    while [ -e $WRP_MSG_Q_PATH ]; do
       WRP_MSG_Q_PATH="$WRP_MSG_Q_PATH$RANDOM";
@@ -476,7 +539,7 @@ function determine_host_adapters() {
    local num_s_devs;
    local s_dev_ratio;
 
-   echo -n "check devices...";
+   echo -n "Check devices...";
 
    # Estimate fraction of /dev/s* devices - if >50%, start with check for regular devices
    num_s_devs=`echo ${WRP_DEVICES[@]} | sed "s/ /\n/g" | grep /dev/s | wc -l`;
@@ -599,7 +662,6 @@ function check_size_requirements() {
    local estimated_size;
    local free_space;
    local logpath=`dirname $WRP_LOGFILE`;
-   local num_uniq_devs;
 
    set `ziomon_mgr -e`;
    util_base_sz=$1;
@@ -611,12 +673,10 @@ function check_size_requirements() {
 
    # NOTE: Since blktrace and ziomon_zfcpdd write messages only when there is
    # traffic, the estimate is an upper boundary only
-   num_uniq_devs=`echo ${WRP_LUNS[@]} | sed 's/ /\n/g' | cut -d : -f 4 | sort | uniq | wc -l`;
-   debug "number of unique devices: $num_uniq_devs";
    debug "disk space requirements:";
    (( size_per_record = $util_base_sz + ${#WRP_HOST_ADAPTERS[@]} * $util_variable_sz + $ioerr_base_sz
-                        + $num_uniq_devs * ( $ioerr_variable_sz + $blkiotrace_sz + $zfcpiotrace_sz )
-                        + ( 2 + $num_uniq_devs) * 8 ));
+                        + ${#WRP_DEVICES[@]} * ( $ioerr_variable_sz + $blkiotrace_sz + $zfcpiotrace_sz )
+                        + ( 2 + ${#WRP_DEVICES[@]}) * 8 ));
    debug "    size per interval: $size_per_record Bytes";
    (( total_num_records = $WRP_DURATION / $WRP_INTERVAL ));
    debug "    total number of intervals: $total_num_records";
@@ -653,10 +713,16 @@ setup;
 
 parse_params $@;
 
+check_cpuplugd;
+
+check_blkiomon;
+
 check_for_existing_output;
 
 determine_host_adapters;
 
+check_vmalloc_space;
+
 check_size_requirements;
 
 [ $? -eq 0 ] && start_trace;
diff --git a/ziomon/ziomon_util.c b/ziomon/ziomon_util.c
index e3e0762..043d3d1 100644
--- a/ziomon/ziomon_util.c
+++ b/ziomon/ziomon_util.c
@@ -597,7 +597,7 @@ static int poll_ioerr_cnt(int init, struct ioerr_data *data,
 	for (i=0; i<opts->num_luns; ++i) {
 		/* read ioerr_cnt attribute */
 		if (read_attribute(opts->luns[i], line, NULL)) {
-			fprintf(stderr, "%s: Warning: Could read %s\n",
+			fprintf(stderr, "%s: Warning: Could not read %s\n",
 				toolname, opts->luns[i]);
 			grc++;
 			continue;
-- 
1.6.0.6