From b49b116bb07cc234c6da4d93d6f66b26a8eb1554 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Sep 25 2010 12:29:41 +0000 Subject: Add sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch --- diff --git a/kernel.spec b/kernel.spec index e8e6cc6..e61da1f 100644 --- a/kernel.spec +++ b/kernel.spec @@ -749,6 +749,7 @@ Patch12575: sched-15-update-rq-clock-for-nohz-balanced-cpus.patch Patch12580: sched-20-fix-rq-clock-synchronization-when-migrating-tasks.patch Patch12585: sched-25-move-sched_avg_update-to-update_cpu_load.patch Patch12590: sched-30-sched-fix-nohz-balance-kick.patch +Patch12595: sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch Patch13600: btusb-macbookpro-6-2.patch Patch13601: btusb-macbookpro-7-1.patch @@ -1392,6 +1393,7 @@ ApplyPatch sched-15-update-rq-clock-for-nohz-balanced-cpus.patch ApplyPatch sched-20-fix-rq-clock-synchronization-when-migrating-tasks.patch ApplyPatch sched-25-move-sched_avg_update-to-update_cpu_load.patch ApplyPatch sched-30-sched-fix-nohz-balance-kick.patch +ApplyPatch sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch ApplyPatch btusb-macbookpro-7-1.patch ApplyPatch btusb-macbookpro-6-2.patch @@ -1998,6 +2000,8 @@ fi sched-00-fix-user-time-incorrectly-accounted-as-system-time-on-32-bit.patch - Revert: "drm/nv50: initialize ramht_refs list for faked 0 channel" (our DRM update removes ramht_refs entirely.) +- Add sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch, another + fix for excessive scheduler load balancing. * Thu Sep 23 2010 Kyle McMartin 2.6.35.5-32 - Serialize mandocs/htmldocs build, since otherwise it will constantly diff --git a/sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch b/sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch new file mode 100644 index 0000000..5277dc7 --- /dev/null +++ b/sched-35-increment-cache_nice_tries-only-on-periodic-lb.patch @@ -0,0 +1,93 @@ +From: Venkatesh Pallipadi +Date: Sat, 11 Sep 2010 01:19:17 +0000 (-0700) +Subject: sched: Increment cache_nice_tries only on periodic lb +X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fmingo%2Flinux-2.6-x86.git;a=commitdiff_plain;h=58b26c4c025778c09c7a1438ff185080e11b7d0a + +sched: Increment cache_nice_tries only on periodic lb + +scheduler uses cache_nice_tries as an indicator to do cache_hot and +active load balance, when normal load balance fails. Currently, +this value is changed on any failed load balance attempt. That ends +up being not so nice to workloads that enter/exit idle often, as +they do more frequent new_idle balance and that pretty soon results +in cache hot tasks being pulled in. + +Making the cache_nice_tries ignore failed new_idle balance seems to +make better sense. With that only the failed load balance in +periodic load balance gets accounted and the rate of accumulation +of cache_nice_tries will not depend on idle entry/exit (short +running sleep-wakeup kind of tasks). This reduces movement of +cache_hot tasks. + +schedstat diff (after-before) excerpt from a workload that has +frequent and short wakeup-idle pattern (:2 in cpu col below refers +to NEWIDLE idx) This snapshot was across ~400 seconds. + +Without this change: +domainstats: domain0 + cpu cnt bln fld imb gain hgain nobusyq nobusyg + 0:2 306487 219575 73167 110069413 44583 19070 1172 218403 + 1:2 292139 194853 81421 120893383 50745 21902 1259 193594 + 2:2 283166 174607 91359 129699642 54931 23688 1287 173320 + 3:2 273998 161788 93991 132757146 57122 24351 1366 160422 + 4:2 289851 215692 62190 83398383 36377 13680 851 214841 + 5:2 316312 222146 77605 117582154 49948 20281 988 221158 + 6:2 297172 195596 83623 122133390 52801 21301 929 194667 + 7:2 283391 178078 86378 126622761 55122 22239 928 177150 + 8:2 297655 210359 72995 110246694 45798 19777 1125 209234 + 9:2 297357 202011 79363 119753474 50953 22088 1089 200922 +10:2 278797 178703 83180 122514385 52969 22726 1128 177575 +11:2 272661 167669 86978 127342327 55857 24342 1195 166474 +12:2 293039 204031 73211 110282059 47285 19651 948 203083 +13:2 289502 196762 76803 114712942 49339 20547 1016 195746 +14:2 264446 169609 78292 115715605 50459 21017 982 168627 +15:2 260968 163660 80142 116811793 51483 21281 1064 162596 + +With this change: +domainstats: domain0 + cpu cnt bln fld imb gain hgain nobusyq nobusyg + 0:2 272347 187380 77455 105420270 24975 1 953 186427 + 1:2 267276 172360 86234 116242264 28087 6 1028 171332 + 2:2 259769 156777 93281 123243134 30555 1 1043 155734 + 3:2 250870 143129 97627 127370868 32026 6 1188 141941 + 4:2 248422 177116 64096 78261112 22202 2 757 176359 + 5:2 275595 180683 84950 116075022 29400 6 778 179905 + 6:2 262418 162609 88944 119256898 31056 4 817 161792 + 7:2 252204 147946 92646 122388300 32879 4 824 147122 + 8:2 262335 172239 81631 110477214 26599 4 864 171375 + 9:2 261563 164775 88016 117203621 28331 3 849 163926 +10:2 243389 140949 93379 121353071 29585 2 909 140040 +11:2 242795 134651 98310 124768957 30895 2 1016 133635 +12:2 255234 166622 79843 104696912 26483 4 746 165876 +13:2 244944 151595 83855 109808099 27787 3 801 150794 +14:2 241301 140982 89935 116954383 30403 6 845 140137 +15:2 232271 128564 92821 119185207 31207 4 1416 127148 + +Signed-off-by: Venkatesh Pallipadi +Signed-off-by: Peter Zijlstra +LKML-Reference: <1284167957-3675-1-git-send-email-venki@google.com> +Signed-off-by: Ingo Molnar +--- + +[ 2.6.35.x backport ] + +diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c +index a171138..aa16cf1 100644 +--- a/kernel/sched_fair.c ++++ b/kernel/sched_fair.c +@@ -3031,7 +3031,14 @@ redo: + + if (!ld_moved) { + schedstat_inc(sd, lb_failed[idle]); +- sd->nr_balance_failed++; ++ /* ++ * Increment the failure counter only on periodic balance. ++ * We do not want newidle balance, which can be very ++ * frequent, pollute the failure counter causing ++ * excessive cache_hot migrations and active balances. ++ */ ++ if (idle != CPU_NEWLY_IDLE) ++ sd->nr_balance_failed++; + + if (need_active_balance(sd, sd_idle, idle)) { + raw_spin_lock_irqsave(&busiest->lock, flags);