Blob Blame History Raw
From 0d8ab270ea422c8a1bc771d54d4d3e23c7ffe53b Mon Sep 17 00:00:00 2001
From: Mike McLean <mikem@redhat.com>
Date: Jun 10 2021 17:44:55 +0000
Subject: [PATCH 1/3] strict_module_state_transitions config option


Fixes: https://pagure.io/fm-orchestrator/issue/1678

---

diff --git a/module_build_service/common/config.py b/module_build_service/common/config.py
index 91bbb33..22d55de 100644
--- a/module_build_service/common/config.py
+++ b/module_build_service/common/config.py
@@ -747,6 +747,11 @@ class Config(object):
             ],
             "desc": "The list Python paths for the Celery application to import.",
         },
+        "strict_module_state_transitions": {
+            "type": bool,
+            "default": True,
+            "desc": "Whether to strictly enforce module state transitions",
+        },
     }
 
     def __init__(self, conf_section_obj):
diff --git a/module_build_service/scheduler/handlers/modules.py b/module_build_service/scheduler/handlers/modules.py
index 3e8d21f..3b858dd 100644
--- a/module_build_service/scheduler/handlers/modules.py
+++ b/module_build_service/scheduler/handlers/modules.py
@@ -61,8 +61,17 @@ def failed(msg_id, module_build_id, module_build_state):
             "Note that retrieved module state %r doesn't match message module state %r",
             build.state, module_build_state,
         )
-        # This is ok.. it's a race condition we can ignore.
-        pass
+
+    if conf.strict_module_state_transitions:
+        valid_states = (
+            models.BUILD_STATES["init"],
+            models.BUILD_STATES["wait"],
+            models.BUILD_STATES["build"],
+            models.BUILD_STATES["failed"],
+        )
+        if build.state not in valid_states:
+            log.error("Module failed handler called while module in state %r", build.state)
+            return
 
     if build.koji_tag:
         builder = GenericBuilder.create_from_module(db_session, build, conf)
@@ -123,8 +132,15 @@ def done(msg_id, module_build_id, module_build_state):
             "Note that retrieved module state %r doesn't match message module state %r",
             build.state, module_build_state,
         )
-        # This is ok.. it's a race condition we can ignore.
-        pass
+
+    if conf.strict_module_state_transitions:
+        valid_states = (
+            models.BUILD_STATES["build"],
+            models.BUILD_STATES["done"],
+        )
+        if build.state not in valid_states:
+            log.error("Module done handler called while module in state %r", build.state)
+            return
 
     # Scratch builds stay in 'done' state
     if not build.scratch:
@@ -349,8 +365,15 @@ def wait(msg_id, module_build_id, module_build_state):
             "Note that retrieved module state %r doesn't match message module state %r",
             build.state, module_build_state,
         )
-        # This is ok.. it's a race condition we can ignore.
-        pass
+
+    if conf.strict_module_state_transitions:
+        valid_states = (
+            models.BUILD_STATES["init"],
+            models.BUILD_STATES["wait"],
+        )
+        if build.state not in valid_states:
+            log.error("Module wait handler called while module in state %r", build.state)
+            return
 
     try:
         build_deps = get_module_build_dependencies(build)

From 57359dfd66c9f811265f53a549568329b9c2d58b Mon Sep 17 00:00:00 2001
From: Mike McLean <mikem@redhat.com>
Date: Jun 10 2021 17:47:28 +0000
Subject: [PATCH 2/3] fix unit tests


---

diff --git a/tests/__init__.py b/tests/__init__.py
index ca188fa..c995d6b 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -409,7 +409,7 @@ def _populate_data(data_size=10, contexts=False, scratch=False):
     db_session.commit()
 
 
-def scheduler_init_data(tangerine_state=None, scratch=False):
+def scheduler_init_data(tangerine_state=None, scratch=False, module_state="build"):
     """ Creates a testmodule in the building state with all the components in the same batch
     """
     clean_database()
@@ -421,7 +421,7 @@ def scheduler_init_data(tangerine_state=None, scratch=False):
         name="testmodule",
         stream="master",
         version='20170109091357',
-        state=BUILD_STATES["build"],
+        state=BUILD_STATES[module_state],
         scratch=scratch,
         build_context="ac4de1c346dcf09ce77d38cd4e75094ec1c08eb0",
         runtime_context="ac4de1c346dcf09ce77d38cd4e75094ec1c08eb0",
diff --git a/tests/test_build/test_build.py b/tests/test_build/test_build.py
index 809e3e0..104c49d 100644
--- a/tests/test_build/test_build.py
+++ b/tests/test_build/test_build.py
@@ -1152,16 +1152,24 @@ class TestBuild(BaseTestBuild):
 
         from module_build_service.scheduler.db_session import db_session
 
+        # module should be in wait state for this test
+        module_build = models.ModuleBuild.get_by_id(db_session, 3)
+        module_build.state = models.BUILD_STATES["wait"]
+        db_session.commit()
+
         # Create a dedicated database session for scheduler to avoid hang
         self.run_scheduler(
             msgs=[{
                 "msg_id": "local module build",
                 "event": events.MBS_MODULE_STATE_CHANGE,
-                "module_build_id": 3,
-                "module_build_state": 1
+                "module_build_id": module_build.id,
+                "module_build_state": module_build.state,
             }]
         )
 
+        # commit so that our assertions see the updates
+        db_session.commit()
+
         reused_component_ids = {
             "module-build-macros": None,
             "tangerine": 3,
@@ -1239,6 +1247,11 @@ class TestBuild(BaseTestBuild):
 
         FakeModuleBuilder.on_buildroot_add_artifacts_cb = on_buildroot_add_artifacts_cb
 
+        # module should be in wait state for this test
+        module_build = models.ModuleBuild.get_by_id(db_session, 3)
+        module_build.state = models.BUILD_STATES["wait"]
+        db_session.commit()
+
         self.run_scheduler(
             msgs=[{
                 "msg_id": "local module build",
@@ -1248,6 +1261,9 @@ class TestBuild(BaseTestBuild):
             }]
         )
 
+        # commit so that our assertions see the updates
+        db_session.commit()
+
         # All components should be built and module itself should be in "done"
         # or "ready" state.
         for build in models.ModuleBuild.get_by_id(db_session, 3).component_builds:
diff --git a/tests/test_scheduler/test_module_wait.py b/tests/test_scheduler/test_module_wait.py
index 4a31d71..7257c7b 100644
--- a/tests/test_scheduler/test_module_wait.py
+++ b/tests/test_scheduler/test_module_wait.py
@@ -21,7 +21,7 @@ base_dir = os.path.dirname(os.path.dirname(__file__))
 
 class TestModuleWait:
     def setup_method(self, test_method):
-        scheduler_init_data()
+        scheduler_init_data(module_state="wait")
 
         self.config = conf
         self.session = mock.Mock()

From 2283ca1760432148394693654c24f6c687c5b857 Mon Sep 17 00:00:00 2001
From: Mike McLean <mikem@redhat.com>
Date: Jun 14 2021 20:29:15 +0000
Subject: [PATCH 3/3] additional unit tests for strict_module_state_transitions


---

diff --git a/tests/test_scheduler/test_module_states.py b/tests/test_scheduler/test_module_states.py
new file mode 100644
index 0000000..f9b6d0e
--- /dev/null
+++ b/tests/test_scheduler/test_module_states.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: MIT
+from __future__ import absolute_import
+import os
+
+import mock
+from mock import patch
+import pytest
+
+from module_build_service.common import build_logs, conf, models
+import module_build_service.resolver
+from module_build_service.scheduler.db_session import db_session
+import module_build_service.scheduler.handlers.modules
+from tests import scheduler_init_data, clean_database
+
+base_dir = os.path.dirname(os.path.dirname(__file__))
+
+
+class TestModuleStateChecks:
+    def setup_method(self, test_method):
+        clean_database()
+        self.config = conf
+        self.session = mock.Mock()
+        conf.strict_module_state_transitions = True
+
+    def teardown_method(self, test_method):
+        try:
+            path = build_logs.path(db_session, 1)
+            os.remove(path)
+        except Exception:
+            pass
+
+    @pytest.mark.parametrize(
+        "bad_state",
+        ["build", "done", "failed", "ready", "garbage"],
+    )
+    @patch("module_build_service.builder.GenericBuilder.create_from_module")
+    def test_wait_state_validation(self, create_builder, bad_state):
+        scheduler_init_data(module_state=bad_state)
+        build = models.ModuleBuild.get_by_id(db_session, 2)
+        # make sure we have the right build
+        assert build.state == models.BUILD_STATES[bad_state]
+        assert build.version == "20170109091357"
+        with patch("module_build_service.resolver.GenericResolver.create"):
+            module_build_service.scheduler.handlers.modules.wait(
+                msg_id="msg-id-1",
+                module_build_id=build.id,
+                module_build_state=models.BUILD_STATES["wait"])
+
+        # the handler should exit early for these bad states
+        create_builder.assert_not_called()
+
+        # build state should not be changed
+        build = models.ModuleBuild.get_by_id(db_session, build.id)
+        assert build.state == models.BUILD_STATES[bad_state]
+
+    @pytest.mark.parametrize(
+        "bad_state",
+        ["done", "ready", "garbage"],
+    )
+    @patch("module_build_service.builder.GenericBuilder.create_from_module")
+    def test_failed_state_validation(self, create_builder, bad_state):
+        scheduler_init_data(module_state=bad_state)
+        build = models.ModuleBuild.get_by_id(db_session, 2)
+        # make sure we have the right build
+        assert build.state == models.BUILD_STATES[bad_state]
+        assert build.version == "20170109091357"
+        with patch("module_build_service.resolver.GenericResolver.create"):
+            module_build_service.scheduler.handlers.modules.failed(
+                msg_id="msg-id-1",
+                module_build_id=build.id,
+                module_build_state=models.BUILD_STATES["wait"])
+
+        # the handler should exit early for these bad states
+        create_builder.assert_not_called()
+
+        # build state should not be changed
+        build = models.ModuleBuild.get_by_id(db_session, build.id)
+        assert build.state == models.BUILD_STATES[bad_state]
+
+    @pytest.mark.parametrize(
+        "bad_state",
+        ["init", "wait", "failed", "ready", "garbage"],
+    )
+    @patch("module_build_service.builder.GenericBuilder.clear_cache")
+    def test_done_state_validation(self, clear_cache, bad_state):
+        scheduler_init_data(module_state=bad_state)
+        build = models.ModuleBuild.get_by_id(db_session, 2)
+        # make sure we have the right build
+        assert build.state == models.BUILD_STATES[bad_state]
+        assert build.version == "20170109091357"
+        with patch("module_build_service.resolver.GenericResolver.create"):
+            module_build_service.scheduler.handlers.modules.done(
+                msg_id="msg-id-1",
+                module_build_id=build.id,
+                module_build_state=models.BUILD_STATES["done"])
+
+        # the handler should exit early for these bad states
+        clear_cache.assert_not_called()
+
+        # build state should not be changed
+        build = models.ModuleBuild.get_by_id(db_session, build.id)
+        assert build.state == models.BUILD_STATES[bad_state]