diff --git a/drbd.ocf b/drbd.ocf deleted file mode 100644 index cf6b966..0000000 --- a/drbd.ocf +++ /dev/null @@ -1,1133 +0,0 @@ -#!/bin/bash -# -# -# OCF Resource Agent compliant drbd resource script. -# -# Copyright (c) 2009 LINBIT HA-Solutions GmbH, -# Copyright (c) 2009 Florian Haas, Lars Ellenberg -# Based on the Heartbeat drbd OCF Resource Agent by Lars Marowsky-Bree -# (though it turned out to be an almost complete rewrite) -# -# All Rights Reserved. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of version 2 of the GNU General Public License as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it would be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# -# Further, this software is distributed without any warranty that it is -# free of the rightful claim of any third person regarding infringement -# or the like. Any license provided herein, whether implied or -# otherwise, applies only to this software file. Patent licenses, if -# any, provided herein do not apply to combinations of this program with -# other software, or any other product whatsoever. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write the Free Software Foundation, -# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. -# -# - -# OCF instance parameters -# OCF_RESKEY_drbd_resource -# OCF_RESKEY_drbdconf -# OCF_RESKEY_stop_outdates_secondary -# OCF_RESKEY_adjust_master_score -# -# meta stuff this agent looks at: -# OCF_RESKEY_CRM_meta_clone_max -# OCF_RESKEY_CRM_meta_clone_node_max -# OCF_RESKEY_CRM_meta_master_max -# OCF_RESKEY_CRM_meta_master_node_max -# -# OCF_RESKEY_CRM_meta_interval -# -# OCF_RESKEY_CRM_meta_notify -# OCF_RESKEY_CRM_meta_notify_active_uname -# OCF_RESKEY_CRM_meta_notify_demote_uname -# OCF_RESKEY_CRM_meta_notify_master_uname -# OCF_RESKEY_CRM_meta_notify_operation -# OCF_RESKEY_CRM_meta_notify_promote_uname -# OCF_RESKEY_CRM_meta_notify_slave_uname -# OCF_RESKEY_CRM_meta_notify_start_uname -# OCF_RESKEY_CRM_meta_notify_stop_uname -# OCF_RESKEY_CRM_meta_notify_type -# - -####################################################################### -# Initialization: - -# Resource-agents have moved their ocf-shellfuncs file around. -# There are supposed to be symlinks or wrapper files in the old location, -# pointing to the new one, but people seem to get it wrong all the time. -# Try several locations. - -if test -n "${OCF_FUNCTIONS_DIR}" ; then - if test -e "${OCF_FUNCTIONS_DIR}/ocf-shellfuncs" ; then - . "${OCF_FUNCTIONS_DIR}/ocf-shellfuncs" - elif test -e "${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs" ; then - . "${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs" - fi -else - if test -e "${OCF_ROOT}/lib/heartbeat/ocf-shellfuncs" ; then - . "${OCF_ROOT}/lib/heartbeat/ocf-shellfuncs" - elif test -e "${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"; then - . "${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs" - fi -fi - -# Defaults -OCF_RESKEY_drbdconf_default="/etc/drbd.conf" - -# The passed in OCF_CRM_meta_notify_* environment -# is not reliably with pacemaker up to at least -# 1.0.10 and 1.1.4. It should be fixed later. -# Until that is fixed, the "self-outdating feature" would base its actions on -# wrong information, and possibly not outdate when it should, or, even worse, -# outdate the last remaining valid copy. -# Disable. -OCF_RESKEY_stop_outdates_secondary_default="false" - - OCF_RESKEY_adjust_master_score_default="5 10 1000 10000" -# ignored | Consistent | Unknown -' | | | -# ignored | NOT UpToDate | UpToDate ---' | | -# Secondary | UpToDate | unknown --------' | -# ignored | UpToDate | known --------------+ -# Primary | UpToDate | ignored --------------' - -: ${OCF_RESKEY_drbdconf:=${OCF_RESKEY_drbdconf_default}} -: ${OCF_RESKEY_stop_outdates_secondary:=${OCF_RESKEY_stop_outdates_secondary_default}} -: ${OCF_RESKEY_adjust_master_score:=${OCF_RESKEY_adjust_master_score_default}} - -# Defaults according to "Configuration 1.0 Explained", -# "Multi-state resource configuration options" -: ${OCF_RESKEY_CRM_meta_clone_node_max=1} -: ${OCF_RESKEY_CRM_meta_master_max=1} -: ${OCF_RESKEY_CRM_meta_master_node_max=1} -####################################################################### -# for debugging this RA -DEBUG_LOG_DIR=/tmp/drbd.ocf.ra.debug -DEBUG_LOG=$DEBUG_LOG_DIR/log -USE_DEBUG_LOG=false -ls_stat_is_dir_0700_root() { - set -- $(command ls -ldn "$1" 2>/dev/null); - case "$1/$3" in - drwx?-??-?/0|\ - drwx?-??-?./0) true ;; - *) false ;; - esac -} -# try to avoid symlink vuln. -if ls_stat_is_dir_0700_root $DEBUG_LOG_DIR && - [[ -w "$DEBUG_LOG" && ! -L "$DEBUG_LOG" ]] -then - USE_DEBUG_LOG=true - exec 9>>"$DEBUG_LOG" - date >&9 - echo "$*" >&9 - env | grep OCF_ | sort >&9 -else - exec 9>/dev/null -fi -# end of debugging aid -####################################################################### - -meta_data() { - cat < - - -1.3 - - -This resource agent manages a DRBD resource as a master/slave resource. -DRBD is a shared-nothing replicated storage device. -Note that you should configure resource level fencing in DRBD, -this cannot be done from this resource agent. -See the DRBD User's Guide for more information. -http://www.drbd.org/docs/applications/ - - -Manages a DRBD device as a Master/Slave resource - - - - -The name of the drbd resource from the drbd.conf file. - -drbd resource name - - - - - -Full path to the drbd.conf file. - -Path to drbd.conf - - - - - -Space separated list of four master score adjustments for different scenarios: - - only access to 'consistent' data - - only remote access to 'uptodate' data - - currently Secondary, local access to 'uptodate' data, but remote is unknown - - local access to 'uptodate' data, and currently Primary or remote is known - -Numeric values are expected to be non-decreasing. - -Default are the previously hardcoded values. - -Set the first value to 0 (and configure proper fencing methods) -to prevent pacemaker from trying to promote while it is unclear -whether the data is really the most recent copy. -(DRBD knows it is "consistent", but is unsure about "uptodate"ness). - -Advanced use: Adjust the other values to better fit into complex -dependency score calculations. - -master score adjustments - - - - - -Recommended setting: leave at default (disabled). - -Note that this feature depends on the passed in information in -OCF_RESKEY_CRM_meta_notify_master_uname to be correct, which unfortunately is -not reliable for pacemaker versions up to at least 1.0.10 / 1.1.4. - -If a Secondary is stopped (unconfigured), it may be marked as outdated in the -drbd meta data, if we know there is still a Primary running in the cluster. -Note that this does not affect fencing policies set in drbd config, -but is an additional safety feature of this resource agent only. -You can enable this behaviour by setting the parameter to true. - -If this feature seems to not do what you expect, make sure you have defined -fencing policies in the drbd configuration as well. - -outdate a secondary on stop - - - - - - - - - - - - - - - - -END -} - -do_cmd() { - # Run a command, return its exit code, capture any output, and log - # everything if appropriate. - local cmd="$*" cmd_out ret - ocf_log debug "$DRBD_RESOURCE: Calling $cmd" - cmd_out=$( "$@" ) - ret=$? - - if [ $ret != 0 ]; then - ocf_log err "$DRBD_RESOURCE: Called $cmd" - ocf_log err "$DRBD_RESOURCE: Exit code $ret" - ocf_log err "$DRBD_RESOURCE: Command output: $cmd_out" - else - ocf_log debug "$DRBD_RESOURCE: Exit code $ret" - ocf_log debug "$DRBD_RESOURCE: Command output: $cmd_out" - fi - - echo "$cmd_out" - - return $ret -} - -do_drbdadm() { - local ret - # Run drbdadm with appropriate command line options, and capture - # its output. - # $DRBDADM is defined during drbd_validate as "drbdadm" plus - # appropriate command line options - do_cmd $DRBDADM "$@" - ret=$? - - # having the version mismatch warning once per RA invokation - # should be enough. - export DRBD_DONT_WARN_ON_VERSION_MISMATCH= - - return $ret -} - -set_master_score() { - # Use quiet mode (-Q) to quench logging. Actual score updates - # will get logged by attrd anyway - if [[ $1 -le 0 ]]; then - remove_master_score - else - do_cmd ${HA_SBIN_DIR}/crm_master -Q -l reboot -v $1 - fi -} - -remove_master_score() { - do_cmd ${HA_SBIN_DIR}/crm_master -l reboot -D -} - -_sh_status_process() { - # _volume not present should not happen, - # but may help make this agent work even if it talks to drbd 8.3. - : ${_volume:=0} - # not-yet-created volumes are reported as -1 - (( _volume >= 0 )) || _volume=$[1 << 16] - DRBD_ROLE_LOCAL[$_volume]=${_role:-Unconfigured} - DRBD_ROLE_REMOTE[$_volume]=${_peer:-Unknown} - DRBD_CSTATE[$_volume]=$_cstate - DRBD_DSTATE_LOCAL[$_volume]=${_disk:-Unconfigured} - DRBD_DSTATE_REMOTE[$_volume]=${_pdsk:-DUnknown} -} -drbd_set_status_variables() { - # drbdsetup sh-status prints these values to stdout, - # and then prints _sh_status_process. - # - # if we eval that, we do not need several drbdadm/drbdsetup commands - # to figure out the various aspects of the state. - local _minor _res_name _known _cstate _role _peer _disk _pdsk - local _volume - local _flags_susp _flags_aftr_isp _flags_peer_isp _flags_user_isp - local _resynced_percent - - DRBD_ROLE_LOCAL=() - DRBD_ROLE_REMOTE=() - DRBD_CSTATE=() - DRBD_DSTATE_LOCAL=() - DRBD_DSTATE_REMOTE=() - - if $DRBD_HAS_MULTI_VOLUME ; then - eval "$($DRBDSETUP sh-status "$DRBD_RESOURCE")" - else - # without "MULTI_VOLUME", the DRBD_DEVICES array - # should contain exactly one value - eval "$($DRBDSETUP "$DRBD_DEVICES" sh-status)" - fi - - # if there was no output at all, or a weird output - # make sure the status arrays won't be empty. - [[ ${#DRBD_ROLE_LOCAL[@]} != 0 ]] || DRBD_ROLE_LOCAL=(Unconfigured) - [[ ${#DRBD_ROLE_REMOTE[@]} != 0 ]] || DRBD_ROLE_REMOTE=(Unknown) - [[ ${#DRBD_CSTATE[@]} != 0 ]] || DRBD_CSTATE=(Unconfigured) - [[ ${#DRBD_DSTATE_LOCAL[@]} != 0 ]] || DRBD_DSTATE_LOCAL=(Unconfigured) - [[ ${#DRBD_DSTATE_REMOTE[@]} != 0 ]] || DRBD_DSTATE_REMOTE=(DUnknown) - - - : == DEBUG == DRBD_ROLE_LOCAL == ${DRBD_ROLE_LOCAL[@]} == - : == DEBUG == DRBD_ROLE_REMOTE == ${DRBD_ROLE_REMOTE[@]} == - : == DEBUG == DRBD_CSTATE == ${DRBD_CSTATE[@]} == - : == DEBUG == DRBD_DSTATE_LOCAL == ${DRBD_DSTATE_LOCAL[@]} == - : == DEBUG == DRBD_DSTATE_REMOTE == ${DRBD_DSTATE_REMOTE[@]} == -} - -# This is not the only fencing mechanism. -# But in addition to the drbd "fence-peer" handler, which should be configured, -# and is expected to place some appropriate constraints, this is used to -# actually store the Outdated information in DRBD on-disk meta data. -# -# called after stop, and from post notification events. -maybe_outdate_self() -{ - # if you claim your right to go online with stale data, - # there you are. - ocf_is_true $OCF_RESKEY_stop_outdates_secondary || return 1 - - local host stop_uname - # We ignore $OCF_RESKEY_CRM_meta_notify_promote_uname here - # because: if demote and promote for a _stacked_ resource - # (or a "floating" one, where DRBD sits on top of some SAN) - # happen in the same transition, demote will see the promote - # hostname here, and voluntarily outdate itself. Which would - # result in promote failure, as it is using the same meta - # data, which would then be outdated. - # If that is not sufficient for you, you probably need to - # configure fencing policies in the drbd configuration. - host=$(printf "%s\n" $OCF_RESKEY_CRM_meta_notify_master_uname | - grep -vix -m1 -e "$HOSTNAME" ) - if [[ -z $host ]] ; then - # no current master host found, do not outdate myself - return 1 - fi - for stop_uname in $OCF_RESKEY_CRM_meta_notify_stop_uname; do - [[ $host == "$stop_uname" ]] || continue - # post notification for stop on that host. - # hrmpf. crm passed in stale master_uname :( - # ignore - return 1 - done - - # e.g. post/promote of some other peer. - # Should not happen, fencing constraints should take care of that. - # But in case it does, scream out loud. - case "${DRBD_ROLE_LOCAL[*]}" in - *Primary*) - # I am Primary. - # The other one is Primary (according to OCF_RESKEY_CRM_meta_notify_master_uname). - # But we cannot talk to each other :( (otherwise this function was not called) - # One of us has to die. - # Which one, however, is not ours to decide. - - ocf_log crit "resource internal SPLIT BRAIN: both $HOSTNAME and $host are Primary for $DRBD_RESOURCE, but the replication link is down!" - return 1 - esac - - # OK, I am not Primary, but there is an other node Primary - # Outdate myself - ocf_log notice "outdating $DRBD_RESOURCE: according to OCF_RESKEY_CRM_meta_notify_master_uname, '$host' is still master" - do_drbdadm outdate $DRBD_RESOURCE - - # on some pacemaker versions, -INFINITY may cause resource instance stop/start. - # But in this case that is ok, it may even clear the replication link - # problem. - set_master_score -INFINITY - - return 0 -} - -drbd_update_master_score() { - set -- $OCF_RESKEY_adjust_master_score - local only_consistent=$1 only_remote=$2 local_ok=$3 as_good_as_it_gets=$4 - # NOTE - # there may be constraint scores from rules on role=Master, - # that in some ways can add to the node attribute based master score we - # specify below. If you think you want to add personal preferences, - # in case the scores given by this RA do not suffice, this is the - # value space you can work with: - # -INFINITY: Do not promote. Really. Won't work anyways. - # Too bad, at least with current (Oktober 2009) Pacemaker, - # negative master scores cause instance stop; restart cycle :( - # missing, zero: Do not promote. - # I think my data is not good enough. - # Though, of course, you may try, and it might even work. - # 5: please, do not promote, unless this is your only option. - # 10: promotion is probably a bad idea, our local data is no good, - # you'd probably run into severe performance problems, and risk - # application crashes or blocking IO in case you lose the - # replication connection. - # 1000: Ok to be promoted, we have good data locally (though we don't - # know about the peer, so possibly it has even better data?). - # You sould use the crm-fence-peer.sh handler or similar - # mechanism to avoid data divergence. - # 10000: Please promote me/keep me Primary. - # I'm confident that my data is as good as it gets. - # - # For multi volume, we need to compare who is "better" a bit more sophisticated. - # The ${XXX[*]//UpToDate}, without being in double quotes, results in a single space, - # if all are UpToDate. - : == DEBUG == ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ == - case ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ in - *Primary*/\ /*/) - # I am Primary, all local disks are UpToDate - set_master_score $as_good_as_it_gets - ;; - */\ /*DUnknown*/) - # all local disks are UpToDate, - # but I'm not Primary, - # and I'm not sure about the peer's disk state(s). - # We may need to outdate ourselves? - # But if we outdate in a MONITOR, and are disconnected - # secondary because of a hard primary crash, before CRM noticed - # that there is no more master, we'd make us utterly useless! - # Trust that the primary will also notice the disconnect, - # and will place an appropriate fencing constraint via - # its fence-peer handler callback. - set_master_score $local_ok - ;; - */\ /*/) - # We know something about our peer, which means that either the - # replication link is established, or it was not even - # consistent last time we talked to each other. - # Also all our local disks are UpToDate, which means even if we are - # currently synchronizing, we do so as SyncSource. - set_master_score $as_good_as_it_gets - ;; - - */*/\ /) - # At least one of our local disks is not up to date. - # But our peer is ALL OK. - # We can expect to have access to useful - # data, but must expect degraded performance. - set_master_score $only_remote - ;; - - */*Attaching*/*/|\ - */*Negotiating*/*/) - # some transitional state. - # just don't do anything - : ;; - - Unconfigured*|\ - */*Diskless*/*/|\ - */*Failed*/*/|\ - */*Inconsistent*/*/|\ - */*Outdated*/*/) - # ALWAYS put the cluster in MAINTENANCE MODE - # if you add a volume to a live replication group, - # because the new volume will typically come up as Inconsistent - # the first time, which would cause a monitor to revoke the - # master score! - # - # At least some of our local disks are not really useable. - # Our peer is not all good either (or some previous case block - # would have matched). We have no access to useful data. - # DRBD would refuse to be promoted, anyways. - # - # set_master_score -INFINITY - # Too bad, at least with current (Oktober 2009) Pacemaker, - # negative master scores cause instance stop; restart cycle :( - # Hope that this will suffice. - remove_master_score - ;; - *) - # All local disks seem to be Consistent. - # They _may_ be up to date, or not. - # We hope that fencing mechanisms have put constraints in - # place, so we won't be promoted with stale data. - # But in case this was a cluster crash, - # at least allow _someone_ to be promoted. - set_master_score $only_consistent - ;; - esac - - : "$OCF_SUCCESS = OCF_SUCCESS" - return $OCF_SUCCESS -} - -is_drbd_enabled() { - test -f /proc/drbd -} - -####################################################################### - -drbd_usage() { - echo "\ -usage: $0 {start|stop|monitor|validate-all|promote|demote|notify|meta-data} - -Expects to have a fully populated OCF RA-compliant environment set." -} - -drbd_status() { - local rc - local dev - rc=$OCF_NOT_RUNNING - - # NOT local! but "return values" - # since 8.4 supports multi volumes per resource, - # these are shell arrays. - # - # Initialize to "Unconfigured", in case this returns early. - # They will be re-initialized and properly populated in drbd_set_status_variables. - DRBD_ROLE_LOCAL=(Unconfigured) - DRBD_ROLE_REMOTE=(Unknown) - DRBD_CSTATE=(Unconfigured) - DRBD_DSTATE_LOCAL=(Unconfigured) - DRBD_DSTATE_REMOTE=(DUnknown) - - is_drbd_enabled || return $rc - - # Not running, if no block devices exist. - # - # FIXME what if some do, and some do not exist? - # Adding/removing volumes to/from existing resources should only be - # done with maintenance-mode enabled. - # If someone does manually kill/remove only some of the volumes, - # we tolerate that here. - for dev in ${DRBD_DEVICES[@]} ""; do - test -b $dev && break - done - [[ $dev ]] || return $rc - - # ok, module is loaded, block device nodes exist. - # lets see the status - drbd_set_status_variables - case "${DRBD_ROLE_LOCAL[*]}" in - *Primary*) - rc=$OCF_RUNNING_MASTER - ;; - *Secondary*) - rc=$OCF_SUCCESS - ;; - *Unconfigured*) - rc=$OCF_NOT_RUNNING - ;; - *) - ocf_log err "Unexpected role(s) >>${DRBD_ROLE_LOCAL[*]}<<" - rc=$OCF_ERR_GENERIC - esac - - return $rc -} - -# I'm sorry, but there is no $OCF_DEGRADED_MASTER or similar yet. -drbd_monitor() { - local status - drbd_status - status=$? - - if [[ $status = $OCF_NOT_RUNNING ]] && ocf_is_probe ; then - # see also linux-ha mailing list archives, - # From: Andrew Beekhof - # Subject: Re: pacemaker+drbd promotion delay - # Date: 2012-04-13 01:47:37 GMT - # e.g.: http://thread.gmane.org/gmane.linux.highavailability.user/37089/focus=37163 - # --- - : "do nothing" ; - else - drbd_update_master_score - fi - - case $status in - (0) : "OCF_SUCCESS" ;; - (1) : "OCF_ERR_GENERIC" ;; - (2) : "OCF_ERR_ARGS" ;; - (3) : "OCF_ERR_UNIMPLEMENTED" ;; - (4) : "OCF_ERR_PERM" ;; - (5) : "OCF_ERR_INSTALLED" ;; - (6) : "OCF_ERR_CONFIGURED" ;; - (7) : "OCF_NOT_RUNNING" ;; - (8) : "OCF_RUNNING_MASTER" ;; - (9) : "OCF_FAILED_MASTER" ;; - (*) : " WTF? $status " ;; - esac - - return $status -} - -figure_out_drbd_peer_uname() -{ - # depending on whether or not the peer is currently - # configured, slave, master, or about to be started, - # it may be mentioned in various variables (or not at all) - local x - # intentionally not cared for stop_uname - x=$(printf "%s\n" \ - $OCF_RESKEY_CRM_meta_notify_start_uname \ - $OCF_RESKEY_CRM_meta_notify_promote_uname \ - $OCF_RESKEY_CRM_meta_notify_master_uname \ - $OCF_RESKEY_CRM_meta_notify_slave_uname \ - $OCF_RESKEY_CRM_meta_notify_demote_uname | - grep -vix -m1 -e "$HOSTNAME" ) - DRBD_TO_PEER=${x:+ --peer $x} -} - -my_udevsettle() -{ - for dev in ${DRBD_DEVICES[@]}; do - while ! test -b $dev; do - sleep 1; - done - done - return 0 -} -create_device_udev_settle() { - local dev - if $DRBD_HAS_MULTI_VOLUME; then - if do_drbdadm new-resource $DRBD_RESOURCE && - do_drbdadm new-minor $DRBD_RESOURCE; then - my_udevsettle - else - return 1 - fi - elif do_drbdadm syncer $DRBD_RESOURCE ; then - my_udevsettle - else - return 1 - fi -} - -drbd_start() { - local rc - local status - local first_try=true - - rc=$OCF_ERR_GENERIC - - if ! is_drbd_enabled; then - do_cmd modprobe -s drbd `$DRBDADM sh-mod-parms` || { - ocf_log err "Cannot load the drbd module."; - : "$OCF_ERR_INSTALLED = OCF_ERR_INSTALLED" - return $OCF_ERR_INSTALLED - } - ocf_log debug "$DRBD_RESOURCE start: Module loaded." - fi - - # Keep trying to bring up the resource; - # wait for the CRM to time us out if this fails - while :; do - drbd_status - status=$? - case "$status" in - $OCF_SUCCESS) - # Just in case we have to adjust something, this is a - # good place to do it. Actually, we don't expect to be - # called to "start" an already "running" resource, so - # this is probably dead code. - # Also, ignore the exit code of adjust, as we are - # "running" already, anyways, right? - figure_out_drbd_peer_uname - do_drbdadm $DRBD_TO_PEER adjust $DRBD_RESOURCE - rc=$OCF_SUCCESS - break - ;; - $OCF_NOT_RUNNING) - # Check for offline resize. If using internal meta data, - # we may need to move it first to its expected location. - $first_try && do_drbdadm check-resize $DRBD_RESOURCE - figure_out_drbd_peer_uname - if ! create_device_udev_settle; then - # We cannot even create the objects - exit $OCF_ERR_GENERIC - fi - if ! do_drbdadm $DRBD_TO_PEER attach $DRBD_RESOURCE ; then - # If we cannot up it, even on the second try, - # it is unlikely to get better. Don't wait for - # this operation to timeout, but short circuit - # exit with generic error. - $first_try || exit $OCF_ERR_GENERIC - sleep 1 - fi - ;; - $OCF_RUNNING_MASTER) - ocf_log warn "$DRBD_RESOURCE already Primary, demoting." - do_drbdadm secondary $DRBD_RESOURCE - esac - $first_try || sleep 1 - first_try=false - done - # in case someone does not configure monitor, - # we must at least call it once after start. - drbd_update_master_score - - return $rc -} - -drbd_promote() { - local rc - local status - local first_try=true - - rc=$OCF_ERR_GENERIC - - # Keep trying to promote the resource; - # wait for the CRM to time us out if this fails - while :; do - drbd_status - status=$? - case "$status" in - $OCF_SUCCESS) - do_drbdadm primary $DRBD_RESOURCE - if [[ $? = 17 ]]; then - # All available disks are inconsistent, - # or I am consistent, but failed to fence the peer. - # Cannot become primary. - # No need to retry indefinitely. - ocf_log crit "Refusing to be promoted to Primary without UpToDate data" - break - fi - ;; - $OCF_NOT_RUNNING) - ocf_log error "Trying to promote a resource that was not started" - break - ;; - $OCF_RUNNING_MASTER) - rc=$OCF_SUCCESS - break - esac - $first_try || sleep 1 - first_try=false - done - - # avoid too tight pacemaker driven "recovery" loop, - # if promotion keeps failing for some reason - if [[ $rc != 0 ]] && (( $SECONDS < 15 )) ; then - delay=$(( 15 - SECONDS )) - ocf_log warn "promotion failed; sleep $delay # to prevent tight recovery loop" - sleep $delay - fi - return $rc -} - -drbd_demote() { - local rc - local status - local first_try=true - - rc=$OCF_ERR_GENERIC - - # Keep trying to demote the resource; - # wait for the CRM to time us out if this fails - while :; do - drbd_status - status=$? - case "$status" in - $OCF_SUCCESS) - rc=$OCF_SUCCESS - break - ;; - $OCF_NOT_RUNNING) - ocf_log error "Trying to promote a resource that was not started" - break - ;; - $OCF_RUNNING_MASTER) - do_drbdadm secondary $DRBD_RESOURCE - esac - $first_try || sleep 1 - first_try=false - done - - return $rc -} - -drbd_stop() { - local rc=$OCF_ERR_GENERIC - local first_try=true - - # Keep trying to bring down the resource; - # wait for the CRM to time us out if this fails - while :; do - drbd_status - status=$? - case "$status" in - $OCF_SUCCESS) - do_drbdadm down $DRBD_RESOURCE - ;; - $OCF_NOT_RUNNING) - # Just in case, down it anyways, in case it has been - # deconfigured but not yet removed. - # Relevant for >= 8.4. - do_drbdadm down $DRBD_RESOURCE - # But ignore any return codes, - # we are not running, so stop is successfull. - rc=$OCF_SUCCESS - break - ;; - $OCF_RUNNING_MASTER) - ocf_log warn "$DRBD_RESOURCE still Primary, demoting." - do_drbdadm secondary $DRBD_RESOURCE - esac - $first_try || sleep 1 - first_try=false - done - - # if there is some Master (Primary) still around, - # outdate myself in drbd on-disk meta data. - maybe_outdate_self - - # do not let old master scores laying around. - # they may confuse crm if this node was set to standby. - remove_master_score - - return $rc -} - - -drbd_notify() { - local n_type=$OCF_RESKEY_CRM_meta_notify_type - local n_op=$OCF_RESKEY_CRM_meta_notify_operation - - # active_* and *_resource not really interessting - # : "== DEBUG == active = $OCF_RESKEY_CRM_meta_notify_active_uname" - : "== DEBUG == slave = $OCF_RESKEY_CRM_meta_notify_slave_uname" - : "== DEBUG == master = $OCF_RESKEY_CRM_meta_notify_master_uname" - : "== DEBUG == start = $OCF_RESKEY_CRM_meta_notify_start_uname" - : "== DEBUG == promote = $OCF_RESKEY_CRM_meta_notify_promote_uname" - : "== DEBUG == stop = $OCF_RESKEY_CRM_meta_notify_stop_uname" - : "== DEBUG == demote = $OCF_RESKEY_CRM_meta_notify_demote_uname" - - case $n_type/$n_op in - */start) - # We do not get a /pre/ start notification for ourself. - # but we get a /pre/ start notification for the other side, unless both - # are started from the same transition graph. If there are only two - # peers (the "classic" two-node DRBD), this adjust is usually a no-op. - # - # In case of more than one _possible_ peer, we may still be StandAlone, - # or configured for a meanwhile failed peer, and should now adjust our - # network settings during pre-notification of start of the other node. - # - # We usually get /post/ notification for ourself and the peer. - # In both cases adjust should be a no-op. - drbd_set_status_variables - figure_out_drbd_peer_uname - do_drbdadm $DRBD_TO_PEER -v adjust $DRBD_RESOURCE - ;; - post/*) - # After something has been done is a good time to - # recheck our status: - drbd_set_status_variables - drbd_update_master_score - - : == DEBUG == ${DRBD_DSTATE_REMOTE[*]} == - case ${DRBD_DSTATE_REMOTE[*]} in - *DUnknown*) - # Still not communicating. - # Maybe someone else is primary (too)? - maybe_outdate_self - esac - esac - - : "$OCF_SUCCESS = OCF_SUCCESS" - return $OCF_SUCCESS -} - -# "macro" to be able to give useful error messages -# on clone resource configuration error. -meta_expect() -{ - local what=$1 whatvar=OCF_RESKEY_CRM_meta_${1//-/_} op=$2 expect=$3 - local val=${!whatvar} - if [[ -n $val ]]; then - # [, not [[, or it won't work ;) - [ $val $op $expect ] && return - fi - ocf_log err "meta parameter misconfigured, expected $what $op $expect, but found ${val:-unset}." - exit $OCF_ERR_CONFIGURED -} - -ls_stat_is_block_maj_147() { - set -- $(command ls -L -l "$1" 2>/dev/null) - [[ $1 = b* ]] && [[ $5 == 147,* ]] -} - -check_crm_feature_set() -{ - set -- ${OCF_RESKEY_crm_feature_set//[!0-9]/ } - local a=${1:-0} b=${2:-0} c=${3:-0} - - (( a > 3 )) || - (( a == 3 && b > 0 )) || - (( a == 3 && b == 0 && c > 0 )) || - ocf_log warn "You may be disappointed: This RA is intended for pacemaker 1.0 or better!" -} - -drbd_validate_all () { - DRBDADM="drbdadm" - DRBDSETUP="drbdsetup" - DRBD_HAS_MULTI_VOLUME=false - - # these will _exit_ if they don't find the binaries - check_binary $DRBDADM - check_binary $DRBDSETUP - # XXX I really take cibadmin, sed, grep, etc. for granted. - - local VERSION DRBD_KERNEL_VERSION_CODE=0 - if VERSION="$($DRBDADM --version 2>/dev/null)"; then - eval $VERSION - fi - if (( $DRBD_KERNEL_VERSION_CODE == 0x0 )) ; then - # Maybe the DRBD module was not loaded (yet). - # I don't want to load the module here, - # maybe this is just a probe or stop. - # It will be loaded on "start", though. - # Instead, look at modinfo output. - # Newer drbdadm does this implicitly, but may reexec older - # drbdadm versions for compatibility reasons. - DRBD_KERNEL_VERSION_CODE=$(printf "0x%02x%02x%02x" $( - modinfo -F version drbd | - sed -ne 's/^\([0-9]\+\)\.\([0-9]\+\)\.\([0-9]\+\).*$/\1 \2 \3/p')) - fi - if (( $DRBD_KERNEL_VERSION_CODE >= 0x080400 )); then - DRBD_HAS_MULTI_VOLUME=true - fi - check_crm_feature_set - - # Check clone and M/S options. - meta_expect clone-max -le 2 - meta_expect clone-node-max = 1 - meta_expect master-node-max = 1 - meta_expect master-max -le 2 - - # Rather than returning $OCF_ERR_CONFIGURED, we sometimes return - # $OCF_ERR_INSTALLED here: the local config may be broken, but some - # other node may have a valid config. - - # check drbdconf plausibility - case "$OCF_RESKEY_drbdconf" in - "") - # this is actually ok. drbdadm has its own builtin defaults. - # but as long as we assign an explicit default above, - # this cannot happen anyways. - : ;; - *[!-%+./0-9:=@A-Z_a-z]*) - # no, I do not trust the configurable cib parameters. - ocf_log err "drbdconf name must only contain [-%+./0-9:=@A-Z_a-z]" - : "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED" - return $OCF_ERR_CONFIGURED - ;; - *) - # Check if we can read the configuration file. - if [ ! -r "${OCF_RESKEY_drbdconf}" ]; then - ocf_log err "Configuration file ${OCF_RESKEY_drbdconf} does not exist or is not readable!" - : "$OCF_ERR_INSTALLED = OCF_ERR_INSTALLED" - return $OCF_ERR_INSTALLED - fi - DRBDADM="$DRBDADM -c $OCF_RESKEY_drbdconf" - esac - - # check drbd_resource plausibility - case "$OCF_RESKEY_drbd_resource" in - "") - ocf_log err "No resource name specified!" - : "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED" - return $OCF_ERR_CONFIGURED - ;; - *[!-%+./0-9:=@A-Z_a-z]*) - # no, I do not trust the configurable cib parameters. - ocf_log err "Resource name must only contain [-%+./0-9:=@A-Z_a-z]" - : "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED" - return $OCF_ERR_CONFIGURED - esac - # exporting this is useful for "drbdsetup show". - # and it makes it all a little bit more readable. - export DRBD_RESOURCE=$OCF_RESKEY_drbd_resource - - # The resource should appear in the config file, - # otherwise something's fishy - # NOTE - # since 8.4 has multi volume support, - # DRBD_DEVICES will be a shell array! - # FIXME we should double check that we explicitly restrict the set of - # valid characters in device names... - if DRBD_DEVICES=($($DRBDADM --stacked sh-dev $DRBD_RESOURCE 2>/dev/null)); then - # apparently a "stacked" resource. Remember for future DRBDADM calls. - DRBDADM="$DRBDADM -S" - elif DRBD_DEVICES=($($DRBDADM sh-dev $DRBD_RESOURCE 2>/dev/null)); then - : # nothing to do. - else - if [[ $__OCF_ACTION = "monitor" && $OCF_RESKEY_CRM_meta_interval = 0 ]]; then - # ok, this was a probe. That may happen on any node, - # to enforce configuration. - : "$OCF_NOT_RUNNING = OCF_NOT_RUNNING" - return $OCF_NOT_RUNNING - else - # hm. probably misconfigured constraint somewhere. - # sorry. don't retry anywhere. - ocf_log err "DRBD resource ${DRBD_RESOURCE} not found in configuration file ${OCF_RESKEY_drbdconf}." - remove_master_score - : "$OCF_ERR_INSTALLED = OCF_ERR_INSTALLED" - return $OCF_ERR_INSTALLED - fi - fi - - # check for master-max and allow-two-primaries on start|promote only, - # so it could be stopped still, if someone re-configured while running. - case $__OCF_ACTION:$OCF_RESKEY_CRM_meta_master_max in - start:2|promote:2) - if ! $DRBDADM -d -v dump $DRBD_RESOURCE 2>/dev/null | - grep -q -Ee '^[[:space:]]*allow-two-primaries([[:space:]]+yes)?;$' - then - ocf_log err "master-max = 2, but DRBD resource $DRBD_RESOURCE does not allow-two-primaries." - : "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED" - return $OCF_ERR_CONFIGURED - fi - esac - - # detect whether notify is configured or not. - # for probes, the meta_notify* namespace is not exported. - case $__OCF_ACTION in - monitor|validate-all) - :;; - *) - # Test if the environment variables for either the notify - # enabled, or one of its effects, are set. - # If both are unset, we complain. - if ! ocf_is_true ${OCF_RESKEY_CRM_meta_notify} && - [[ ${OCF_RESKEY_CRM_meta_notify_start_uname- NOT SET } = " NOT SET " ]]; then - ocf_log err "you really should enable notify when using this RA" - : "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED" - return $OCF_ERR_CONFIGURED - fi - esac - - local i j n=0 fallback=false - for i in $OCF_RESKEY_adjust_master_score; do - [[ $i = *[!0-9]* ]] && fallback=true && ocf_log err "BAD adjust_master_score value $i ; falling back to default" - [[ $j && $i -lt $j ]] && fallback=true && ocf_log err "BAD adjust_master_score value $j > $i ; falling back to default" - j=$i - n=$(( n+1 )) - done - [[ $n != 4 ]] && fallback=true && ocf_log err "Not enough adjust_master_score values ($n != 4); falling back to default" - $fallback && OCF_RESKEY_adjust_master_score=$OCF_RESKEY_adjust_master_score_default - - # we use it in various places, - # just make sure it contains what we expect. - HOSTNAME=`uname -n` - - : "$OCF_SUCCESS = OCF_SUCCESS" - return $OCF_SUCCESS -} - -####################################################################### - -if [ $# != 1 ]; then - drbd_usage - exit $OCF_ERR_ARGS -fi - -# if $__OCF_ACTION = monitor, but meta_interval not set, -# this is a "probe". we could change behaviour. -: ${OCF_RESKEY_CRM_meta_interval=0} - -case $__OCF_ACTION in -meta-data) - meta_data - exit $OCF_SUCCESS - ;; -usage) - drbd_usage - exit $OCF_SUCCESS -esac - -if $USE_DEBUG_LOG ; then - exec 2>&9 - set -x -fi - -# Everything except usage and meta-data must pass the validate test -drbd_validate_all || exit - -case $__OCF_ACTION in -start) - drbd_start - ;; -stop) - drbd_stop - ;; -notify) - drbd_notify - ;; -promote) - drbd_promote - ;; -demote) - drbd_demote - ;; -status) - drbd_status - ;; -monitor) - drbd_monitor - ;; -validate-all) - ;; -*) - drbd_usage - exit $OCF_ERR_UNIMPLEMENTED -esac -# exit code is the exit code (return code) of the last command (shell function) diff --git a/drbd.spec b/drbd.spec index f83e25c..5cd12e8 100644 --- a/drbd.spec +++ b/drbd.spec @@ -4,7 +4,6 @@ Version: 9.15.1 Release: 1%{?dist} Source0: http://www.linbit.com/downloads/%{name}/utils/%{name}-utils-%{version}.tar.gz Source1: drbd.service -Source2: drbd.ocf Patch0: drbd-utils-9.12.2-disable_xsltproc_network_read.patch Patch1: drbd-utils-9.15.0-glibc2.32_clock_gettime.patch Patch2: drbd-utils-9.15.0-make_configure-workaround.patch @@ -74,9 +73,6 @@ rm -f $RPM_BUILD_ROOT/drbd.service # Remove old heartbeat files that aren't needed any longer in Fedora rm -rf $RPM_BUILD_ROOT/etc/ha.d -# RHBZ 1253056: Fix pacemaker integration problem -install -m 755 %{SOURCE2} $RPM_BUILD_ROOT/usr/lib/ocf/resource.d/linbit/drbd - %package utils Summary: Management utilities for DRBD