Blob Blame History Raw
diff --git a/src/condor_procd/proc_family.cpp b/src/condor_procd/proc_family.cpp
index d35ffcc..29d9471 100644
--- a/src/condor_procd/proc_family.cpp
+++ b/src/condor_procd/proc_family.cpp
@@ -54,7 +54,9 @@ ProcFamily::ProcFamily(ProcFamilyMonitor* monitor,
 	m_member_list(NULL)
 #if defined(HAVE_EXT_LIBCGROUP)
 	, m_cgroup_string(""),
-	m_cm(CgroupManager::getInstance())
+	m_cm(CgroupManager::getInstance()),
+	m_initial_user_cpu(0),
+	m_initial_sys_cpu(0)
 #endif
 {
 #if !defined(WIN32)
@@ -188,6 +190,7 @@ after_migrate:
 		cgroup_free(&orig_cgroup);
 	}
 
+
 after_restore:
 	if (orig_cgroup_string != NULL) {
 		free(orig_cgroup_string);
@@ -231,6 +234,27 @@ ProcFamily::set_cgroup(const std::string &cgroup_string)
 		member = member->m_next;
 	}
 
+	// Record the amount of pre-existing CPU usage here.
+	m_initial_user_cpu = 0;
+	m_initial_sys_cpu = 0;
+	get_cpu_usage_cgroup(m_initial_user_cpu, m_initial_sys_cpu);
+
+	// Reset block IO controller
+	if (m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER)) {
+		struct cgroup *tmp_cgroup = cgroup_new_cgroup(m_cgroup_string.c_str());
+		struct cgroup_controller *blkio_controller = cgroup_add_controller(tmp_cgroup, BLOCK_CONTROLLER_STR);
+		ASSERT (blkio_controller != NULL); // Block IO controller should already exist.
+		cgroup_add_value_uint64(blkio_controller, "blkio.reset_stats", 0);
+		int err;
+		if ((err = cgroup_modify_cgroup(tmp_cgroup))) {
+			// Not allowed to reset stats?
+			dprintf(D_ALWAYS,
+				"Unable to reset cgroup %s block IO statistics. "
+				"Some block IO accounting will be inaccurate (ProcFamily %u): %u %s\n",
+				m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));
+		}
+	}
+
 	return 0;
 }
 
@@ -486,6 +510,40 @@ ProcFamily::aggregate_usage_cgroup_blockio(ProcFamilyUsage* usage)
 	return 0;
 }
 
+int ProcFamily::get_cpu_usage_cgroup(long &user_time, long &sys_time) {
+
+	if (!m_cm.isMounted(CgroupManager::CPUACCT_CONTROLLER)) {
+		return 1;
+	}
+
+	void * handle = NULL;
+	u_int64_t tmp = 0;
+	struct cgroup_stat stats;
+	int err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats);
+	while (err != ECGEOF) {
+		if (err > 0) {
+			dprintf(D_PROCFAMILY,
+				"Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n",
+				m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err));
+			break;
+		}
+		if (_check_stat_uint64(stats, "user", &tmp)) {
+			user_time = tmp/clock_tick-m_initial_user_cpu;
+		} else if (_check_stat_uint64(stats, "system", &tmp)) {
+			sys_time = tmp/clock_tick-m_initial_sys_cpu;
+		}
+			err = cgroup_read_stats_next(&handle, &stats);
+	}
+	if (handle != NULL) {
+		cgroup_read_stats_end(&handle);
+	}
+	if (err != ECGEOF) {
+		dprintf(D_ALWAYS, "Internal cgroup error when retrieving CPU statistics: %s\n", cgroup_strerror(err));
+		return 1;
+	}
+	return 0;
+}
+
 int
 ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
 {
@@ -496,16 +554,13 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
 
 	int err;
 	struct cgroup_stat stats;
-	void **handle;
+	void *handle = NULL;
 	u_int64_t tmp = 0, image = 0;
 	bool found_rss = false;
 
 	// Update memory
-	handle = (void **)malloc(sizeof(void*));
-	ASSERT (handle != NULL);
-	*handle = NULL;
 
-	err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats);
+	err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats);
 	while (err != ECGEOF) {
 		if (err > 0) {
 			dprintf(D_PROCFAMILY,
@@ -522,10 +577,10 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
 		} else if (_check_stat_uint64(stats, "total_swap", &tmp)) {
 			image += tmp;
 		}
-		err = cgroup_read_stats_next(handle, &stats);
+		err = cgroup_read_stats_next(&handle, &stats);
 	}
-	if (*handle != NULL) {
-		cgroup_read_stats_end(handle);
+	if (handle != NULL) {
+		cgroup_read_stats_end(&handle);
 	}
 	if (found_rss) {
 		usage->total_image_size = image/1024;
@@ -540,29 +595,12 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
 		m_max_image_size = image/1024;
 	}
 	// Try updating the max size using cgroups
-	update_max_image_size_cgroup();
+	// XXX: This is taken out for now - kernel calculates max INCLUDING
+	// the filesystem cache.  Not what you want.
+	//update_max_image_size_cgroup();
 
 	// Update CPU
-	*handle = NULL;
-	err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats);
-	while (err != ECGEOF) {
-		if (err > 0) {
-			dprintf(D_PROCFAMILY,
-				"Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n",
-				m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err));
-			break;
-		}
-		if (_check_stat_uint64(stats, "user", &tmp)) {
-			usage->user_cpu_time = tmp/clock_tick;
-		} else if (_check_stat_uint64(stats, "system", &tmp)) {
-			usage->sys_cpu_time = tmp/clock_tick;
-		}
-		err = cgroup_read_stats_next(handle, &stats);
-	}
-	if (*handle != NULL) {
-		cgroup_read_stats_end(handle);
-	}
-	free(handle);
+	get_cpu_usage_cgroup(usage->user_cpu_time, usage->sys_cpu_time);
 
 	aggregate_usage_cgroup_blockio(usage);

--- a/src/condor_procd/proc_family.h
+++ b/src/condor_procd/proc_family.h
@@ -181,6 +181,11 @@ private:
 	std::string m_cgroup_string;
 	CgroupManager &m_cm;
 	static long clock_tick;
+	// Sometimes Condor doesn't successfully clear out the cgroup from the
+	// previous run.  Hence, we subtract off any CPU usage found at the
+	// start of the job.
+	long m_initial_user_cpu;
+	long m_initial_sys_cpu;
 	static bool have_warned_about_memsw;
 
 	int count_tasks_cgroup();
@@ -190,6 +195,7 @@ private:
 	int spree_cgroup(int);
 	int migrate_to_cgroup(pid_t);
 	void update_max_image_size_cgroup();
+	int get_cpu_usage_cgroup(long &user_cpu, long &sys_cpu);
 #endif
 };