From c9080edcd82da26d0f15a09e9517c7f221cd3259 Mon Sep 17 00:00:00 2001 From: Aneesh Agrawal Date: Thu, 22 Dec 2016 14:35:22 -0500 Subject: [PATCH 1/3] Disable Python bytecode caching for Buildbot master For some reason (likely due to our "graceful" restarts of Buildbot), when Buildbot is restarted after its configuration is updated, it does not read the updated `.py` files but instead uses the existing compiled bytecode (`.pyc`) files, which reflect an older version and are not up to date. These bytecode files are also not regenerated, meaning the new configuration does not (fully) take effect. To work around this, disable bytecode caching for the Buildbot master entirely to avoid using out-of-date bytecode. saltfs-migration: Delete all `.pyc` files (recursively) in the Buildbot master directory (/home/servo/buildbot/master). --- buildbot/master/files/buildbot-master.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildbot/master/files/buildbot-master.conf b/buildbot/master/files/buildbot-master.conf index 414407c8..ead2c0c9 100644 --- a/buildbot/master/files/buildbot-master.conf +++ b/buildbot/master/files/buildbot-master.conf @@ -7,4 +7,4 @@ start on (local-filesystems and net-device-up IFACE!=lo) stop on runlevel [016] env HOME={{ common.servo_home }} - +env PYTHONDONTWRITEBYTECODE=1 From f087f3c16b342fc7dd5556bb547fee9a30b646aa Mon Sep 17 00:00:00 2001 From: Aneesh Agrawal Date: Fri, 23 Dec 2016 03:04:23 -0500 Subject: [PATCH 2/3] Upgrade Buildbot master DB on upgrade When upgrading the Buildbot master version, or starting from a fresh deploy (no existing database), the Buildbot database must be upgraded in order for Buildbot to start normally. (The DB is usually created during `buildbot create-master`, but we want to avoid checking in the database.) Add a `cmd.run` state that only runs if the Buildbot version is changed, using an `onchanges` requisite. The upgrade script requires that Buildbot is not running, presuambly to avoid conflicting updates, so Buildbot must be stopped before running the upgrade. We want to perform a clean stop, but the built-in `buildbot stop` command is nonblocking and will return immediately, without waiting for the existing Buildbot instance to finish. Buildbot has blocking/waiting for stop functionality built-in but not exposed, so add a small helper script to stop Buildbot and block until it is finished shutting down, and invoke it before the upgrade. An alternative would be trying to use Upstart or Salt directly to stop Buildbot, as a clean shutdown boils down to sending a SIGUSR1 to the Buildbot process (only if one is running), in the Unix tradition. However, this would be hard to integrate with; in particular, we need to wait for the existing Buildbot process to finish running; the easiest way to integrate this into a Salt state (without writing a custom Salt state) is to start a process to do the waiting, hence the stop script. Note that the upgrade-master command also adds various other cruft to the master directory; the Buildbot internal upgradeDatabase API is not called because it is layered in Twisted Reactor/inline callback goop, and it is simpler to just call the CLI command. Also update the Buildbot master states to be more strict about using requisites for better ordering control, and re-order/space out states for a better reading flow. --- buildbot/master/files/stop-buildbot.py | 40 +++++++++++++ buildbot/master/init.sls | 80 +++++++++++++++++--------- 2 files changed, 94 insertions(+), 26 deletions(-) create mode 100644 buildbot/master/files/stop-buildbot.py diff --git a/buildbot/master/files/stop-buildbot.py b/buildbot/master/files/stop-buildbot.py new file mode 100644 index 00000000..2feb5669 --- /dev/null +++ b/buildbot/master/files/stop-buildbot.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +""" +Utility which stops a Buildbot daemon gracefully +and blocks until the daemon is stopped. +This is built-in to Buildbot but not exposed by default. +""" + +from __future__ import absolute_import, print_function + +import os +import sys + +from buildbot.scripts import base, stop + + +USAGE = "usage: {} [ -h | --help | ]" + + +def main(argv): + usage = USAGE.format(argv[0]) + + if len(argv) != 2: + print(usage, file=sys.stderr) + return 1 + + if argv[1] == "-h" or argv[1] == "--help": + print(usage) + return 0 + + config = { + 'quiet': False, + 'clean': True, + 'basedir': os.path.abspath(argv[1]), + } + return stop.stop(config, wait=True) + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/buildbot/master/init.sls b/buildbot/master/init.sls index 97a6b6e4..fce71a1e 100644 --- a/buildbot/master/init.sls +++ b/buildbot/master/init.sls @@ -1,26 +1,8 @@ {% from 'common/map.jinja' import common %} -buildbot-master: - pip.installed: - - pkgs: - - buildbot == 0.8.12 - - service_identity == 14.0.0 - - txgithub == 15.0.0 - - boto == 2.38.0 - - pyyaml == 3.11 - - require: - - pkg: pip - service.running: - - enable: True - # Buildbot must be restarted manually! See 'Buildbot administration' on the - # wiki and https://github.com/servo/saltfs/issues/304. - - require: - - pip: buildbot-master - - file: {{ common.servo_home }}/buildbot/master - - file: /etc/init/buildbot-master.conf - -{{ common.servo_home }}/buildbot/master: +buildbot-config: file.recurse: + - name: {{ common.servo_home }}/buildbot/master - source: salt://{{ tpldir }}/files/config - user: servo - group: servo @@ -30,8 +12,10 @@ buildbot-master: - context: common: {{ common }} buildbot_credentials: {{ pillar['buildbot']['credentials'] }} + - require: + - user: servo -ownership-{{ common.servo_home }}/buildbot/master: +buildbot-config-ownership: file.directory: - name: {{ common.servo_home }}/buildbot/master - user: servo @@ -39,9 +23,43 @@ ownership-{{ common.servo_home }}/buildbot/master: - recurse: - user - group + - require: + - user: servo + - file: buildbot-config + +/usr/local/bin/stop-buildbot.py: + file.managed: + - source: salt://{{ tpldir }}/files/stop-buildbot.py + - user: root + - group: root + - mode: 755 -/etc/init/buildbot-master.conf: +buildbot-master: + pip.installed: + - pkgs: + - buildbot == 0.8.12 + - service_identity == 14.0.0 + - txgithub == 15.0.0 + - boto == 2.38.0 + - pyyaml == 3.11 + - require: + - pkg: pip + cmd.run: # Need to create/upgrade DB file on new Buildbot version + - name: | + '/usr/local/bin/stop-buildbot.py' \ + '{{ common.servo_home }}/buildbot/master' \ + && buildbot upgrade-master '{{ common.servo_home }}/buildbot/master' + - runas: servo + - env: + - PYTHONDONTWRITEBYTECODE: "1" + - require: + - user: servo + - file: buildbot-config-ownership + - file: /usr/local/bin/stop-buildbot.py + - onchanges: + - pip: buildbot-master file.managed: + - name: /etc/init/buildbot-master.conf - source: salt://{{ tpldir }}/files/buildbot-master.conf - user: root - group: root @@ -49,6 +67,16 @@ ownership-{{ common.servo_home }}/buildbot/master: - template: jinja - context: common: {{ common }} + service.running: + - enable: True + # Buildbot must be restarted manually! See 'Buildbot administration' on the + # wiki and https://github.com/servo/saltfs/issues/304. + - require: + - pip: buildbot-master + - file: buildbot-config-ownership + - cmd: buildbot-master + - file: buildbot-master + /usr/local/bin/github_buildbot.py: file.managed: @@ -57,8 +85,9 @@ ownership-{{ common.servo_home }}/buildbot/master: - group: root - mode: 755 -/etc/init/buildbot-github-listener.conf: +buildbot-github-listener: file.managed: + - name: /etc/init/buildbot-github-listener.conf - source: salt://{{ tpldir }}/files/buildbot-github-listener.conf - user: root - group: root @@ -66,13 +95,12 @@ ownership-{{ common.servo_home }}/buildbot/master: - template: jinja - context: common: {{ common }} - -buildbot-github-listener: service.running: - enable: True - watch: - file: /usr/local/bin/github_buildbot.py - - file: /etc/init/buildbot-github-listener.conf + - file: buildbot-github-listener + remove-old-build-logs: cron.present: From 0b69a00f8f0d5fe6e0d69af7812e32fdc2d2c0a9 Mon Sep 17 00:00:00 2001 From: Aneesh Agrawal Date: Thu, 6 Oct 2016 13:08:58 -0400 Subject: [PATCH 3/3] Automatically queue Buildbot master restarts Buildbot comes with built-in functionality for clean restarts, which entail starting a new Buildbot process that does the following: - Cleanly shut down the existing buildmaster (existing instance) by waiting for pending builds to be finshed, ending the existing process. - Start a new buildmaster in the new process, taking over. Note that the new process becomes the new daemon, and thus needs to linger/be kept alive and managed. Upstart's built-in restart functionality hard-kills the existing process, which is undesirable; it's also hard to make Upstart wait for pending builds before stopping the existing process, as only Buildbot knows about any pending builds. Additionally, Buildbot has a limited reload functionality, but there are many pitfalls, gotchas, and inconsistencies, and it is not recommended for customized installations like ours. (e.g. re-loading imports doesn't work.) Note that when a Buildbot clean restart is requested, there are multiple processes running simultaneously. Model this in Upstart by using an "instance" job, which makes it easy to queue restarts and monitor them, without having to leave processes running in e.g. tmux or screen. Add an additional Upstart task to spawn a single instance of the Buildbot master service at boot-up, or during a Salt deploy if no instances are alive. More instances of the buildbot-master job can be started manually, or automatically by Salt, to queue up restarts. (The original process will exit gracefully after some time.) All instances will stop gracefully on shutdown. Note that the switch to an instance job means each separate instance must be differentiated by a `reason` variable, which is not used for any other purpose; this is automatically set to the current date/time, suffixed with some metadata about the reason for the instance. This has a side effect of creating separate Upstart logs (in `/var/log/upstart` for each instance), but the choice of reason keeps the logs sorted by date. Finally, automate this by having Salt queue a new restart job instance if anything about the Buildbot master configuration (or package) changes. --- .../files/buildbot-master-autostart.conf | 19 ++++++++++++ buildbot/master/files/buildbot-master.conf | 15 ++++++++-- buildbot/master/init.sls | 29 +++++++++++++++++-- 3 files changed, 58 insertions(+), 5 deletions(-) create mode 100644 buildbot/master/files/buildbot-master-autostart.conf diff --git a/buildbot/master/files/buildbot-master-autostart.conf b/buildbot/master/files/buildbot-master-autostart.conf new file mode 100644 index 00000000..f46c3ee9 --- /dev/null +++ b/buildbot/master/files/buildbot-master-autostart.conf @@ -0,0 +1,19 @@ +# Because the buildbot-master service is an instance job, +# we must provide a reason to start an instance of it. +# This Upstart task will automatically start one instance at bootup. +# Additionally, if no instances are alive during a Salt deploy, +# then this task will start a fresh instance. +# Upstart will not create new instances otherwise; +# additional instances can be created on-demand. +# Each instance will automatically stop itself at shutdown via its `stop on`. + +start on (local-filesystems and net-device-up IFACE!=lo) + +task + +script + # Avoid restarting Buildbot if it's already running + if ! initctl list | grep "buildbot-master (" >/dev/null; then + start buildbot-master reason="$(date --utc --iso-8601=seconds)-autostart" + fi +end script diff --git a/buildbot/master/files/buildbot-master.conf b/buildbot/master/files/buildbot-master.conf index ead2c0c9..81bf5199 100644 --- a/buildbot/master/files/buildbot-master.conf +++ b/buildbot/master/files/buildbot-master.conf @@ -1,9 +1,20 @@ -exec /usr/local/bin/buildbot start --nodaemon {{ common.servo_home }}/buildbot/master +# Use multiple instances to model multiple process: +# - (1) currently running Buildbot master +# - (n) queued restarts +# Each 'restart' process will wait for the existing master to finish pending +# builds, then take over as its own long-lived process. +# Runnign them via Upstart provides process supervision and monitoring, +# obviating the need to run restarts in tmux or screen (or disown them). +# +# Note that we don't actually use the $reason variable, +# except to provide distinct instance IDs for Upstart. +instance $reason + +exec /usr/local/bin/buildbot restart --clean --nodaemon $HOME/buildbot/master setuid servo setgid servo -start on (local-filesystems and net-device-up IFACE!=lo) stop on runlevel [016] env HOME={{ common.servo_home }} diff --git a/buildbot/master/init.sls b/buildbot/master/init.sls index fce71a1e..70f0927a 100644 --- a/buildbot/master/init.sls +++ b/buildbot/master/init.sls @@ -45,8 +45,10 @@ buildbot-master: - require: - pkg: pip cmd.run: # Need to create/upgrade DB file on new Buildbot version + # Explicit call to `/usr/bin/python` is to work around Travis mega-PATH, + # the stop-buildbot.py script has a proper shebang - name: | - '/usr/local/bin/stop-buildbot.py' \ + /usr/bin/python /usr/local/bin/stop-buildbot.py \ '{{ common.servo_home }}/buildbot/master' \ && buildbot upgrade-master '{{ common.servo_home }}/buildbot/master' - runas: servo @@ -67,15 +69,36 @@ buildbot-master: - template: jinja - context: common: {{ common }} + +# Automatically queue a clean restart of Buildbot if anything changes +queue-buildbot-master-restart: + cmd.run: + - name: 'initctl start buildbot-master reason="$(date --utc --iso-8601=seconds)-salt-restart"' + - runas: root + - onchanges: + - pip: buildbot-master + - file: buildbot-config + - file: buildbot-config-ownership + - file: buildbot-master + +# Start a fresh Buildbot instance if one isn't running (including at bootup) +buildbot-master-autostart: + file.managed: + - name: /etc/init/buildbot-master-autostart.conf + - source: salt://{{ tpldir }}/files/buildbot-master-autostart.conf + - user: root + - group: root + - mode: 644 + - template: jinja service.running: - enable: True - # Buildbot must be restarted manually! See 'Buildbot administration' on the - # wiki and https://github.com/servo/saltfs/issues/304. - require: - pip: buildbot-master - file: buildbot-config-ownership - cmd: buildbot-master - file: buildbot-master + - cmd: queue-buildbot-master-restart + - file: buildbot-master-autostart /usr/local/bin/github_buildbot.py: