a52f674
From 7fed327890e10c38d89df2c7913f906d76e515e7 Mon Sep 17 00:00:00 2001
b12ff75
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
b12ff75
Date: Wed, 13 Nov 2013 00:42:22 -0500
b12ff75
Subject: [PATCH] journald: do not free space when disk space runs low
b12ff75
b12ff75
Before, journald would remove journal files until both MaxUse= and
b12ff75
KeepFree= settings would be satisfied. The first one depends (if set
b12ff75
automatically) on the size of the file system and is constant.  But
b12ff75
the second one depends on current use of the file system, and a spike
b12ff75
in disk usage would cause journald to delete journal files, trying to
b12ff75
reach usage which would leave 15% of the disk free. This behaviour is
b12ff75
surprising for the user who doesn't expect his logs to be purged when
b12ff75
disk usage goes above 85%, which on a large disk could be some
b12ff75
gigabytes from being full. In addition attempting to keep 15% free
b12ff75
provides an attack vector where filling the disk sufficiently disposes
b12ff75
of almost all logs.
b12ff75
b12ff75
Instead, obey KeepFree= only as a limit on adding additional files.
b12ff75
When replacing old files with new, ignore KeepFree=. This means that
b12ff75
if journal disk usage reached some high point that at some later point
b12ff75
start to violate the KeepFree= constraint, journald will not add files
b12ff75
to go above this point, but it will stay (slightly) below it. When
b12ff75
journald is restarted, it forgets the previous maximum usage value,
b12ff75
and sets the limit based on the current usage, so if disk remains to
b12ff75
be filled, journald might use one journal-file-size less on each
b12ff75
restart, if restarts happen just after rotation. This seems like a
b12ff75
reasonable compromise between implementation complexity and robustness.
a52f674
a52f674
(cherry picked from commit 348ced909724a1331b85d57aede80a102a00e428)
b12ff75
---
b12ff75
 man/journald.conf.xml                   | 39 +++++++++++++++++++++++----------
b12ff75
 src/journal/journal-file.h              |  1 +
b12ff75
 src/journal/journal-vacuum.c            |  6 ++---
b12ff75
 src/journal/journal-vacuum.h            |  2 +-
b12ff75
 src/journal/journald-server.c           | 22 ++++++++++++++-----
b12ff75
 src/journal/test-journal-interleaving.c |  4 ++--
b12ff75
 src/journal/test-journal.c              |  4 ++--
b12ff75
 src/shared/macro.h                      |  7 ++++++
b12ff75
 8 files changed, 58 insertions(+), 27 deletions(-)
b12ff75
b12ff75
diff --git a/man/journald.conf.xml b/man/journald.conf.xml
b12ff75
index b362c5d..e0796e1 100644
b12ff75
--- a/man/journald.conf.xml
b12ff75
+++ b/man/journald.conf.xml
b12ff75
@@ -250,20 +250,35 @@
b12ff75
                                 <para><varname>SystemMaxUse=</varname>
b12ff75
                                 and <varname>RuntimeMaxUse=</varname>
b12ff75
                                 control how much disk space the
b12ff75
-                                journal may use up at
b12ff75
-                                maximum. Defaults to 10% of the size
b12ff75
-                                of the respective file
b12ff75
-                                system. <varname>SystemKeepFree=</varname>
b12ff75
-                                and
b12ff75
+                                journal may use up at maximum.
b12ff75
+                                <varname>SystemKeepFree=</varname> and
b12ff75
                                 <varname>RuntimeKeepFree=</varname>
b12ff75
                                 control how much disk space
b12ff75
-                                systemd-journald shall always leave
b12ff75
-                                free for other uses. Defaults to 15%
b12ff75
-                                of the size of the respective file
b12ff75
-                                system. systemd-journald will respect
b12ff75
-                                both limits, i.e. use the smaller of
b12ff75
-                                the two values.
b12ff75
-                                <varname>SystemMaxFileSize=</varname>
b12ff75
+                                systemd-journald shall leave free for
b12ff75
+                                other uses.
b12ff75
+                                <command>systemd-journald</command>
b12ff75
+                                will respect both limits and use the
b12ff75
+                                smaller of the two values.</para>
b12ff75
+
b12ff75
+                                <para>The first pair defaults to 10%
b12ff75
+                                and the second to 15% of the size of
b12ff75
+                                the respective file system. If the
b12ff75
+                                file system is nearly full and either
b12ff75
+                                <varname>SystemKeepFree=</varname> or
b12ff75
+                                <varname>RuntimeKeepFree=</varname> is
b12ff75
+                                violated when systemd-journald is
b12ff75
+                                started, the value will be raised to
b12ff75
+                                percentage that is actually free. This
b12ff75
+                                means that if before there was enough
b12ff75
+                                free space and journal files were
b12ff75
+                                created, and subsequently something
b12ff75
+                                else causes the file system to fill
b12ff75
+                                up, journald will stop using more
b12ff75
+                                space, but it'll will not removing
b12ff75
+                                existing files to go reduce footprint
b12ff75
+                                either.</para>
b12ff75
+
b12ff75
+                                <para><varname>SystemMaxFileSize=</varname>
b12ff75
                                 and
b12ff75
                                 <varname>RuntimeMaxFileSize=</varname>
b12ff75
                                 control how large individual journal
b12ff75
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
b12ff75
index 0bd23f7..2e06b57 100644
b12ff75
--- a/src/journal/journal-file.h
b12ff75
+++ b/src/journal/journal-file.h
b12ff75
@@ -37,6 +37,7 @@
b12ff75
 
b12ff75
 typedef struct JournalMetrics {
b12ff75
         uint64_t max_use;
b12ff75
+        uint64_t use;
b12ff75
         uint64_t max_size;
b12ff75
         uint64_t min_size;
b12ff75
         uint64_t keep_free;
b12ff75
diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c
b12ff75
index d4a1c6c..8b07f65 100644
b12ff75
--- a/src/journal/journal-vacuum.c
b12ff75
+++ b/src/journal/journal-vacuum.c
b12ff75
@@ -150,7 +150,6 @@ static int journal_file_empty(int dir_fd, const char *name) {
b12ff75
 int journal_directory_vacuum(
b12ff75
                 const char *directory,
b12ff75
                 uint64_t max_use,
b12ff75
-                uint64_t min_free,
b12ff75
                 usec_t max_retention_usec,
b12ff75
                 usec_t *oldest_usec) {
b12ff75
 
b12ff75
@@ -164,7 +163,7 @@ int journal_directory_vacuum(
b12ff75
 
b12ff75
         assert(directory);
b12ff75
 
b12ff75
-        if (max_use <= 0 && min_free <= 0 && max_retention_usec <= 0)
b12ff75
+        if (max_use <= 0 && max_retention_usec <= 0)
b12ff75
                 return 0;
b12ff75
 
b12ff75
         if (max_retention_usec > 0) {
b12ff75
@@ -310,8 +309,7 @@ int journal_directory_vacuum(
b12ff75
                 }
b12ff75
 
b12ff75
                 if ((max_retention_usec <= 0 || list[i].realtime >= retention_limit) &&
b12ff75
-                    (max_use <= 0 || sum <= max_use) &&
b12ff75
-                    (min_free <= 0 || (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free))
b12ff75
+                    (max_use <= 0 || sum <= max_use))
b12ff75
                         break;
b12ff75
 
b12ff75
                 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
b12ff75
diff --git a/src/journal/journal-vacuum.h b/src/journal/journal-vacuum.h
b12ff75
index f5e3e52..bc30c3a 100644
b12ff75
--- a/src/journal/journal-vacuum.h
b12ff75
+++ b/src/journal/journal-vacuum.h
b12ff75
@@ -23,4 +23,4 @@
b12ff75
 
b12ff75
 #include <inttypes.h>
b12ff75
 
b12ff75
-int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free, usec_t max_retention_usec, usec_t *oldest_usec);
b12ff75
+int journal_directory_vacuum(const char *directory, uint64_t max_use, usec_t max_retention_usec, usec_t *oldest_usec);
b12ff75
diff --git a/src/journal/journald-server.c b/src/journal/journald-server.c
b12ff75
index 1fcb3d5..cd2cfe9 100644
b12ff75
--- a/src/journal/journald-server.c
b12ff75
+++ b/src/journal/journald-server.c
b12ff75
@@ -158,9 +158,18 @@ static uint64_t available_space(Server *s, bool verbose) {
b12ff75
         }
b12ff75
 
b12ff75
         ss_avail = ss.f_bsize * ss.f_bavail;
b12ff75
-        avail = ss_avail > m->keep_free ? ss_avail - m->keep_free : 0;
b12ff75
 
b12ff75
-        s->cached_available_space = MIN(m->max_use, avail) > sum ? MIN(m->max_use, avail) - sum : 0;
b12ff75
+        /* If we reached a high mark, we will always allow this much
b12ff75
+         * again, unless usage goes above max_use. This watermark
b12ff75
+         * value is cached so that we don't give up space on pressure,
b12ff75
+         * but hover below the maximum usage. */
b12ff75
+
b12ff75
+        if (m->use < sum)
b12ff75
+                m->use = sum;
b12ff75
+
b12ff75
+        avail = LESS_BY(ss_avail, m->keep_free);
b12ff75
+
b12ff75
+        s->cached_available_space = LESS_BY(MIN(m->max_use, avail), sum);
b12ff75
         s->cached_available_space_timestamp = ts;
b12ff75
 
b12ff75
         if (verbose) {
b12ff75
@@ -168,13 +177,14 @@ static uint64_t available_space(Server *s, bool verbose) {
b12ff75
                         fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX];
b12ff75
 
b12ff75
                 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
b12ff75
-                                      "%s journal is using %s (max %s, leaving %s of free %s, current limit %s).",
b12ff75
+                                      "%s journal is using %s (max allowed %s, "
b12ff75
+                                      "trying to leave %s free of %s available → current limit %s).",
b12ff75
                                       s->system_journal ? "Permanent" : "Runtime",
b12ff75
                                       format_bytes(fb1, sizeof(fb1), sum),
b12ff75
                                       format_bytes(fb2, sizeof(fb2), m->max_use),
b12ff75
                                       format_bytes(fb3, sizeof(fb3), m->keep_free),
b12ff75
                                       format_bytes(fb4, sizeof(fb4), ss_avail),
b12ff75
-                                      format_bytes(fb5, sizeof(fb5), MIN(m->max_use, avail)));
b12ff75
+                                      format_bytes(fb5, sizeof(fb5), s->cached_available_space + sum));
b12ff75
         }
b12ff75
 
b12ff75
         return s->cached_available_space;
b12ff75
@@ -378,7 +388,7 @@ void server_vacuum(Server *s) {
b12ff75
         if (s->system_journal) {
b12ff75
                 char *p = strappenda("/var/log/journal/", ids);
b12ff75
 
b12ff75
-                r = journal_directory_vacuum(p, s->system_metrics.max_use, s->system_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
b12ff75
+                r = journal_directory_vacuum(p, s->system_metrics.max_use, s->max_retention_usec, &s->oldest_file_usec);
b12ff75
                 if (r < 0 && r != -ENOENT)
b12ff75
                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
b12ff75
         }
b12ff75
@@ -386,7 +396,7 @@ void server_vacuum(Server *s) {
b12ff75
         if (s->runtime_journal) {
b12ff75
                 char *p = strappenda("/run/log/journal/", ids);
b12ff75
 
b12ff75
-                r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->runtime_metrics.keep_free, s->max_retention_usec, &s->oldest_file_usec);
b12ff75
+                r = journal_directory_vacuum(p, s->runtime_metrics.max_use, s->max_retention_usec, &s->oldest_file_usec);
b12ff75
                 if (r < 0 && r != -ENOENT)
b12ff75
                         log_error("Failed to vacuum %s: %s", p, strerror(-r));
b12ff75
         }
b12ff75
diff --git a/src/journal/test-journal-interleaving.c b/src/journal/test-journal-interleaving.c
b12ff75
index 1a058ea..974fa3b 100644
b12ff75
--- a/src/journal/test-journal-interleaving.c
b12ff75
+++ b/src/journal/test-journal-interleaving.c
b12ff75
@@ -194,7 +194,7 @@ static void test_skip(void (*setup)(void))
b12ff75
         if (arg_keep)
b12ff75
                 log_info("Not removing %s", t);
b12ff75
         else {
b12ff75
-                journal_directory_vacuum(".", 3000000, 0, 0, NULL);
b12ff75
+                journal_directory_vacuum(".", 3000000, 0, NULL);
b12ff75
 
b12ff75
                 assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
b12ff75
         }
b12ff75
@@ -279,7 +279,7 @@ static void test_sequence_numbers(void) {
b12ff75
         if (arg_keep)
b12ff75
                 log_info("Not removing %s", t);
b12ff75
         else {
b12ff75
-                journal_directory_vacuum(".", 3000000, 0, 0, NULL);
b12ff75
+                journal_directory_vacuum(".", 3000000, 0, NULL);
b12ff75
 
b12ff75
                 assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
b12ff75
         }
b12ff75
diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c
b12ff75
index 190c426..3b8778d 100644
b12ff75
--- a/src/journal/test-journal.c
b12ff75
+++ b/src/journal/test-journal.c
b12ff75
@@ -126,7 +126,7 @@ static void test_non_empty(void) {
b12ff75
         if (arg_keep)
b12ff75
                 log_info("Not removing %s", t);
b12ff75
         else {
b12ff75
-                journal_directory_vacuum(".", 3000000, 0, 0, NULL);
b12ff75
+                journal_directory_vacuum(".", 3000000, 0, NULL);
b12ff75
 
b12ff75
                 assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
b12ff75
         }
b12ff75
@@ -165,7 +165,7 @@ static void test_empty(void) {
b12ff75
         if (arg_keep)
b12ff75
                 log_info("Not removing %s", t);
b12ff75
         else {
b12ff75
-                journal_directory_vacuum(".", 3000000, 0, 0, NULL);
b12ff75
+                journal_directory_vacuum(".", 3000000, 0, NULL);
b12ff75
 
b12ff75
                 assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
b12ff75
         }
b12ff75
diff --git a/src/shared/macro.h b/src/shared/macro.h
b12ff75
index d4f92b6..bc5b3c1 100644
b12ff75
--- a/src/shared/macro.h
b12ff75
+++ b/src/shared/macro.h
b12ff75
@@ -114,6 +114,13 @@ static inline size_t ALIGN_TO(size_t l, size_t ali) {
b12ff75
                         _a < _b ? _a : _b;      \
b12ff75
                 })
b12ff75
 
b12ff75
+#define LESS_BY(A,B)                            \
b12ff75
+        __extension__ ({                        \
b12ff75
+                        typeof(A) _A = (A);     \
b12ff75
+                        typeof(B) _B = (B);     \
b12ff75
+                        _A > _B ? _A - _B : 0;  \
b12ff75
+                })
b12ff75
+
b12ff75
 #ifndef CLAMP
b12ff75
 #define CLAMP(x, low, high)                                             \
b12ff75
         __extension__ ({                                                \