diff --git a/src/condor_procd/proc_family.cpp b/src/condor_procd/proc_family.cpp
index d35ffcc..29d9471 100644
--- a/src/condor_procd/proc_family.cpp
+++ b/src/condor_procd/proc_family.cpp
@@ -54,7 +54,9 @@ ProcFamily::ProcFamily(ProcFamilyMonitor* monitor,
m_member_list(NULL)
#if defined(HAVE_EXT_LIBCGROUP)
, m_cgroup_string(""),
- m_cm(CgroupManager::getInstance())
+ m_cm(CgroupManager::getInstance()),
+ m_initial_user_cpu(0),
+ m_initial_sys_cpu(0)
#endif
{
#if !defined(WIN32)
@@ -188,6 +190,7 @@ after_migrate:
cgroup_free(&orig_cgroup);
}
+
after_restore:
if (orig_cgroup_string != NULL) {
free(orig_cgroup_string);
@@ -231,6 +234,27 @@ ProcFamily::set_cgroup(const std::string &cgroup_string)
member = member->m_next;
}
+ // Record the amount of pre-existing CPU usage here.
+ m_initial_user_cpu = 0;
+ m_initial_sys_cpu = 0;
+ get_cpu_usage_cgroup(m_initial_user_cpu, m_initial_sys_cpu);
+
+ // Reset block IO controller
+ if (m_cm.isMounted(CgroupManager::BLOCK_CONTROLLER)) {
+ struct cgroup *tmp_cgroup = cgroup_new_cgroup(m_cgroup_string.c_str());
+ struct cgroup_controller *blkio_controller = cgroup_add_controller(tmp_cgroup, BLOCK_CONTROLLER_STR);
+ ASSERT (blkio_controller != NULL); // Block IO controller should already exist.
+ cgroup_add_value_uint64(blkio_controller, "blkio.reset_stats", 0);
+ int err;
+ if ((err = cgroup_modify_cgroup(tmp_cgroup))) {
+ // Not allowed to reset stats?
+ dprintf(D_ALWAYS,
+ "Unable to reset cgroup %s block IO statistics. "
+ "Some block IO accounting will be inaccurate (ProcFamily %u): %u %s\n",
+ m_cgroup_string.c_str(), m_root_pid, err, cgroup_strerror(err));
+ }
+ }
+
return 0;
}
@@ -486,6 +510,40 @@ ProcFamily::aggregate_usage_cgroup_blockio(ProcFamilyUsage* usage)
return 0;
}
+int ProcFamily::get_cpu_usage_cgroup(long &user_time, long &sys_time) {
+
+ if (!m_cm.isMounted(CgroupManager::CPUACCT_CONTROLLER)) {
+ return 1;
+ }
+
+ void * handle = NULL;
+ u_int64_t tmp = 0;
+ struct cgroup_stat stats;
+ int err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats);
+ while (err != ECGEOF) {
+ if (err > 0) {
+ dprintf(D_PROCFAMILY,
+ "Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n",
+ m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err));
+ break;
+ }
+ if (_check_stat_uint64(stats, "user", &tmp)) {
+ user_time = tmp/clock_tick-m_initial_user_cpu;
+ } else if (_check_stat_uint64(stats, "system", &tmp)) {
+ sys_time = tmp/clock_tick-m_initial_sys_cpu;
+ }
+ err = cgroup_read_stats_next(&handle, &stats);
+ }
+ if (handle != NULL) {
+ cgroup_read_stats_end(&handle);
+ }
+ if (err != ECGEOF) {
+ dprintf(D_ALWAYS, "Internal cgroup error when retrieving CPU statistics: %s\n", cgroup_strerror(err));
+ return 1;
+ }
+ return 0;
+}
+
int
ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
{
@@ -496,16 +554,13 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
int err;
struct cgroup_stat stats;
- void **handle;
+ void *handle = NULL;
u_int64_t tmp = 0, image = 0;
bool found_rss = false;
// Update memory
- handle = (void **)malloc(sizeof(void*));
- ASSERT (handle != NULL);
- *handle = NULL;
- err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats);
+ err = cgroup_read_stats_begin(MEMORY_CONTROLLER_STR, m_cgroup_string.c_str(), &handle, &stats);
while (err != ECGEOF) {
if (err > 0) {
dprintf(D_PROCFAMILY,
@@ -522,10 +577,10 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
} else if (_check_stat_uint64(stats, "total_swap", &tmp)) {
image += tmp;
}
- err = cgroup_read_stats_next(handle, &stats);
+ err = cgroup_read_stats_next(&handle, &stats);
}
- if (*handle != NULL) {
- cgroup_read_stats_end(handle);
+ if (handle != NULL) {
+ cgroup_read_stats_end(&handle);
}
if (found_rss) {
usage->total_image_size = image/1024;
@@ -540,29 +595,12 @@ ProcFamily::aggregate_usage_cgroup(ProcFamilyUsage* usage)
m_max_image_size = image/1024;
}
// Try updating the max size using cgroups
- update_max_image_size_cgroup();
+ // XXX: This is taken out for now - kernel calculates max INCLUDING
+ // the filesystem cache. Not what you want.
+ //update_max_image_size_cgroup();
// Update CPU
- *handle = NULL;
- err = cgroup_read_stats_begin(CPUACCT_CONTROLLER_STR, m_cgroup_string.c_str(), handle, &stats);
- while (err != ECGEOF) {
- if (err > 0) {
- dprintf(D_PROCFAMILY,
- "Unable to read cgroup %s cpuacct stats (ProcFamily %u): %s.\n",
- m_cgroup_string.c_str(), m_root_pid, cgroup_strerror(err));
- break;
- }
- if (_check_stat_uint64(stats, "user", &tmp)) {
- usage->user_cpu_time = tmp/clock_tick;
- } else if (_check_stat_uint64(stats, "system", &tmp)) {
- usage->sys_cpu_time = tmp/clock_tick;
- }
- err = cgroup_read_stats_next(handle, &stats);
- }
- if (*handle != NULL) {
- cgroup_read_stats_end(handle);
- }
- free(handle);
+ get_cpu_usage_cgroup(usage->user_cpu_time, usage->sys_cpu_time);
aggregate_usage_cgroup_blockio(usage);
--- a/src/condor_procd/proc_family.h
+++ b/src/condor_procd/proc_family.h
@@ -181,6 +181,11 @@ private:
std::string m_cgroup_string;
CgroupManager &m_cm;
static long clock_tick;
+ // Sometimes Condor doesn't successfully clear out the cgroup from the
+ // previous run. Hence, we subtract off any CPU usage found at the
+ // start of the job.
+ long m_initial_user_cpu;
+ long m_initial_sys_cpu;
static bool have_warned_about_memsw;
int count_tasks_cgroup();
@@ -190,6 +195,7 @@ private:
int spree_cgroup(int);
int migrate_to_cgroup(pid_t);
void update_max_image_size_cgroup();
+ int get_cpu_usage_cgroup(long &user_cpu, long &sys_cpu);
#endif
};