#!/bin/sh # # Description: Manages a PostgreSQL Server as an OCF High-Availability # resource # # Authors: Serge Dubrouski (sergeyfd@gmail.com) -- original RA # Florian Haas (florian@linbit.com) -- makeover # NIPPON TELEGRAPH AND TELEPHONE CORPORATION -- support pg-rex 9.0 # # Copyright: 2006-2011 Serge Dubrouski # and other Linux-HA contributors # License: GNU General Public License (GPL) # ############################################################################### # OCF parameters: # OCF_RESKEY_pgctl - Path to pg_ctl. Default /usr/bin/pg_ctl # OCF_RESKEY_start_opt - Startup options, options passed to postgress with -o # OCF_RESKEY_ctl_opt - Additional options for pg_ctl (-w, -W etc...) # OCF_RESKEY_psql - Path to psql. Default is /usr/bin/psql # OCF_RESKEY_pgdata - PGDATA directory. Default is /var/lib/pgsql/data # OCF_RESKEY_pgdba - userID that manages DB. Default is postgres # OCF_RESKEY_pghost - Host/IP Address where PostgreSQL is listening # OCF_RESKEY_pgport - Port where PostgreSQL is listening # OCF_RESKEY_monitor_user - PostgreSQL user that pg-rex RA will user for monitor operations. # OCF_RESKEY_monitor_password - Password for monitor user. # OCF_RESKEY_monitor_sql - SQL script that will be used for monitor operations. # OCF_RESKEY_config - Path to the PostgreSQL configuration file for the instance. # OCF_RESKEY_pgdb - database to monitor. Default is template1 # OCF_RESKEY_logfile - Path to PostgreSQL log file. Default is /dev/null # OCF_RESKEY_socketdir - Unix socket directory for PostgeSQL. # OCF_RESKEY_tmpdir - Path to a directory where flag files are put. # OCF_RESKEY_trigger_file - Path to a trigger file whose presence ends recovery in the standby server. # OCF_RESKEY_start_check_interval - Interval where start checking of PostgreSQL is carried out. # OCF_RESKEY_start_check_multiple - Increased multiple in the interval where start checking of PostgreSQL is carried out. # OCF_RESKEY_start_check_interval_limit - Upper limit of the interval where start checking of PostgreSQL is carried out. # OCF_RESKEY_rep_mode - Replication mode(auto/sync/async). default is auto ############################################################################### # Initialization: : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/resource.d/heartbeat} . ${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs # # Get PostgreSQL Configuration parameter # get_pgsql_param() { local config_file local param_name local loglevel=err if ocf_is_probe; then loglevel=warn fi param_name=$1 #Check that config file exists if [ -n "$OCF_RESKEY_config" ]; then config=$OCF_RESKEY_config else config=$OCF_RESKEY_pgdata/postgresql.conf fi if [ ! -f "$config" ]; then ocf_log $loglevel "Cannot find configuration file $config" return fi perl_code="if (/^\s*$param_name[\s=]+\s*(.*)$/) { \$dir=\$1; \$dir =~ s/\s*\#.*//; \$dir =~ s/^'(\S*)'/\$1/; print \$dir;}" perl -ne "$perl_code" < $config } # Defaults OCF_RESKEY_pgctl_default=/usr/bin/pg_ctl OCF_RESKEY_psql_default=/usr/bin/psql OCF_RESKEY_pgdata_default=/var/lib/pgsql/data OCF_RESKEY_pgdba_default=postgres OCF_RESKEY_pghost_default="" OCF_RESKEY_pgport_default=5432 OCF_RESKEY_config_default="" OCF_RESKEY_start_opt_default="" OCF_RESKEY_pgdb_default=template1 OCF_RESKEY_logfile_default=/dev/null OCF_RESKEY_monitor_user_default="" OCF_RESKEY_monitor_password_default="" OCF_RESKEY_monitor_sql_default="select pg_is_in_recovery();" OCF_RESKEY_tmpdir_default="/var/lib/pg-rex" OCF_RESKEY_trigger_file_default="${OCF_RESKEY_tmpdir_default}/PGSQL.${OCF_RESKEY_pgport_default}.trigger" OCF_RESKEY_start_check_interval_default=1 OCF_RESKEY_start_check_multiple_default=2 OCF_RESKEY_start_check_interval_limit_default=30 OCF_RESKEY_rep_mode_default=auto : ${OCF_RESKEY_pgctl=${OCF_RESKEY_pgctl_default}} : ${OCF_RESKEY_psql=${OCF_RESKEY_psql_default}} : ${OCF_RESKEY_pgdata=${OCF_RESKEY_pgdata_default}} : ${OCF_RESKEY_pgdba=${OCF_RESKEY_pgdba_default}} : ${OCF_RESKEY_pghost=${OCF_RESKEY_pghost_default}} : ${OCF_RESKEY_pgport=${OCF_RESKEY_pgport_default}} : ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} : ${OCF_RESKEY_start_opt=${OCF_RESKEY_start_opt_default}} : ${OCF_RESKEY_pgdb=${OCF_RESKEY_pgdb_default}} : ${OCF_RESKEY_logfile=${OCF_RESKEY_logfile_default}} : ${OCF_RESKEY_monitor_user=${OCF_RESKEY_monitor_user_default}} : ${OCF_RESKEY_monitor_password=${OCF_RESKEY_monitor_password_default}} : ${OCF_RESKEY_monitor_sql=${OCF_RESKEY_monitor_sql_default}} : ${OCF_RESKEY_tmpdir=${OCF_RESKEY_tmpdir_default}} : ${OCF_RESKEY_trigger_file=${OCF_RESKEY_trigger_file_default}} : ${OCF_RESKEY_start_check_interval=${OCF_RESKEY_start_check_interval_default}} : ${OCF_RESKEY_start_check_multiple=${OCF_RESKEY_start_check_multiple_default}} : ${OCF_RESKEY_start_check_interval_limit=${OCF_RESKEY_start_check_interval_limit_default}} : ${OCF_RESKEY_rep_mode=${OCF_RESKEY_rep_mode_default}} usage() { cat < 1.0 Resource script for PostgreSQL. It manages a PostgreSQL as an HA resource. Manages a PostgreSQL database instance Path to pg_ctl command. pgctl Start options (-o start_opt in pg_ctl). "-i -p 5432" for example. start_opt Additional pg_ctl options (-w, -W etc..). ctl_opt Path to psql command. psql Path to PostgreSQL data directory. pgdata User that owns PostgreSQL. pgdba Hostname/IP address where PostgreSQL is listening pghost Port where PostgreSQL is listening pgport PostgreSQL user that pg-rex RA will user for monitor operations. If it's not set pgdba user will be used. monitor_user Password for monitor user. monitor_password SQL script that will be used for monitor operations. monitor_sql Path to the PostgreSQL configuration file for the instance Configuration file Database that will be used for monitoring. pgdb Path to PostgreSQL server log output file. logfile Unix socket directory for PostgeSQL socketdir Path to a directory where flag files are put. tmpdir Trigger file whose presence ends recovery in the standby. You must set the same value as trigger_file set with recovery.conf of PostgreSQL. trigger file Interval where start checking of PostgreSQL is carried out. start check interval Increased multiple in the interval where start checking of PostgreSQL is carried out. start_check_multiple Upper limit of the interval where start checking of PostgreSQL is carried out. start check interval limit Replication mode(auto/sync/async). rep_mode EOF } # # Run the given command in the Resource owner environment... # runasowner() { local quietrun="" local loglevel="-err" local var for var in 1 2 do case "$1" in "-q") quietrun="-q" shift 1;; "warn"|"err") loglevel="-$1" shift 1;; *) ;; esac done ocf_run $quietrun $loglevel su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $*" } # # Shell escape # escape_string() { echo "$*" | sed -e "s/'/'\\\\''/g" } # # What methods/operations do we support? # pg_rex_methods() { cat </dev/null 2>&1" return $? fi # No PID file false } # # status operation # pg_rex_status() { if pgsql_status then ocf_log info "PostgreSQL is up" exit $OCF_SUCCESS else ocf_log info "PostgreSQL is down" exit $OCF_NOT_RUNNING fi } # # Monitor PostgreSQL # pgsql_monitor() { local loglevel # Set the log level of the error message loglevel=${1:-err} if ! pgsql_status then ocf_log debug "PostgreSQL is down" return $OCF_NOT_RUNNING fi output=`runsql $loglevel "$OCF_RESKEY_monitor_sql"` rc=$? if [ $rc -ne $OCF_SUCCESS ]; then return $OCF_ERR_GENERIC fi ocf_log debug "'$OCF_RESKEY_monitor_sql' output: $output" case $output in f) ocf_log debug "PostgreSQL is running as a primary." return $OCF_RUNNING_MASTER;; t) ocf_log debug "PostgreSQL is running as a hot standby." return $OCF_SUCCESS;; *) ocf_log err "Fatal error in '$OCF_RESKEY_monitor_sql': $output" return $OCF_ERR_GENERIC;; esac } # # monitor operation # pg_rex_monitor() { if ! ocf_is_probe; then if [ -f $REPRESS_MONITOR ]; then ocf_log info "Monitor operation is repressing..." return $OCF_SUCCESS fi fi pgsql_monitor rc=$? if ocf_is_probe; then if [ $rc -eq $OCF_RUNNING_MASTER -o $rc -eq $OCF_SUCCESS ]; then ocf_log warn "PostgreSQL has started before the resource agent starts. PID=`cat $PIDFILE`" if ! rm -f $REPRESS_MONITOR; then ocf_log err "Can't remove $REPRESS_MONITOR." return $OCF_ERR_GENERIC fi if ! rm -f $OCF_RESKEY_trigger_file; then ocf_log err "Can't remove $OCF_RESKEY_trigger_file." return $OCF_ERR_GENERIC fi if ! touch $REPRESS_START; then ocf_log err "Can't create $REPRESS_START." return $OCF_ERR_GENERIC fi ocf_log info "Start of PostgreSQL was prohibited." if [ $rc -eq $OCF_RUNNING_MASTER ]; then ocf_log info "PostgreSQL is running by a primary, so this resource changes to the master." $CRM_MASTER -v $PROMOTE_ME else ocf_log info "PostgreSQL is running by a hot standby, so this resource changes to the slave." $CRM_MASTER -v $CAN_PROMOTE fi fi fi return $rc } check_binary2() { if ! have_binary "$1"; then ocf_log err "Setup problem: couldn't find command: $1" return 1 fi return 0 } # # Validate most critical parameters # pg_rex_validate_all() { if ! check_binary2 "$OCF_RESKEY_pgctl" || ! check_binary2 "$OCF_RESKEY_psql"; then return $OCF_ERR_INSTALLED fi if [ -n "$OCF_RESKEY_config" -a ! -f "$OCF_RESKEY_config" ]; then ocf_log err "the configuration file $OCF_RESKEY_config doesn't exist" return $OCF_ERR_INSTALLED fi if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor password can't be empty" return $OCF_ERR_INSTALLED fi if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ] then ocf_log err "monitor_user has to be set if monitor_password is set" return $OCF_ERR_INSTALLED fi if [ ! -d "$OCF_RESKEY_tmpdir" ]; then ocf_log err "the temporary directory $OCF_RESKEY_tmpdir doesn't exist" return $OCF_ERR_INSTALLED fi if [ "$OCF_RESKEY_rep_mode" = "async" ]; then dir=`dirname $OCF_RESKEY_trigger_file` if ! runasowner "test -r $dir -a -w $dir"; then ocf_log err "$OCF_RESKEY_pgdba can't create or remove $OCF_RESKEY_trigger_file" return $OCF_ERR_INSTALLED fi fi return $OCF_SUCCESS } # # Check if we need to create a log file # check_log_file() { if [ ! -f "$1" ] then touch $1 > /dev/null 2>&1 chown $OCF_RESKEY_pgdba:`getent passwd $OCF_RESKEY_pgdba | cut -d ":" -f 4` $1 fi #Check if $OCF_RESKEY_pgdba can write to the log file if ! runasowner "test -w $1" then return 1 fi return 0 } # # Check socket directory # check_socket_dir() { if [ ! -d "$OCF_RESKEY_socketdir" ]; then if ! mkdir "$OCF_RESKEY_socketdir"; then ocf_log err "Cannot create directory $OCF_RESKEY_socketdir" exit $OCF_ERR_GENERIC fi if ! chown $OCF_RESKEY_pgdba:`getent passwd \ $OCF_RESKEY_pgdba | cut -d ":" -f 4` "$OCF_RESKEY_socketdir" then ocf_log err "Cannot change ownership for $OCF_RESKEY_socketdir" exit $OCF_ERR_GENERIC fi if ! chmod 2775 "$OCF_RESKEY_socketdir"; then ocf_log err "Cannot change permissions for $OCF_RESKEY_socketdir" exit $OCF_ERR_GENERIC fi else if ! runasowner "touch $OCF_RESKEY_socketdir/test.$$"; then ocf_log err "$OCF_RESKEY_pgdba cannot create files in $OCF_RESKEY_socketdir" exit $OCF_ERR_GENERIC fi rm $OCF_RESKEY_socketdir/test.$$ fi } # # Has data catch up? # has_data_catch_up() { output=`runsql err "$DATA_CATCH_UP_SQL"` if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "'$DATA_CATCH_UP_SQL' output: $output" if [ x$output = xt ]; then # Data has catch up return 0 fi fi # Data doesn't catch up return 1 } # # is synchronous mode? # pg_is_sync_mode() { if ! runsql err "SHOW ALL;" | grep ^$REP_MODE_PARAM\| >/dev/null 2>&1; then ocf_log debug "$REP_MODE_PARAM parameter isn't defined." return 1 fi output=`runsql err "$REP_MODE_SQL"` if [ $? -eq $OCF_SUCCESS ]; then ocf_log debug "'$REP_MODE_SQL' output: $output" case $output in recv|fsync|apply) # Snchronous mode return 0 esac fi # Asynchronous mode return 1 } # # run sql script # runsql() { local loglevel local psql_options # Set the log level of the error message loglevel=${1:-err} if ! pgsql_status then ocf_log info "PostgreSQL is down" return $OCF_NOT_RUNNING fi if [ -n "$OCF_RESKEY_monitor_user" ]; then PGUSER=$OCF_RESKEY_monitor_user; export PGUSER PGPASSWORD=$OCF_RESKEY_monitor_password; export PGPASSWORD psql_options="-p $OCF_RESKEY_pgport $OCF_RESKEY_pgdb" else psql_options="-p $OCF_RESKEY_pgport -U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb" fi if [ -n "$OCF_RESKEY_pghost" ]; then psql_options="$psql_options -h $OCF_RESKEY_pghost" else if [ -n "$OCF_RESKEY_socketdir" ]; then psql_options="$psql_options -h $OCF_RESKEY_socketdir" fi fi sql=`escape_string "$2"` output=`su $OCF_RESKEY_pgdba -c "cd $OCF_RESKEY_pgdata; $OCF_RESKEY_psql $psql_options -Atc '$sql'" 2>&1` rc=$? if [ $rc -ne 0 ]; then ocf_log $loglevel "PostgreSQL $OCF_RESKEY_pgdb isn't running" if [ $rc -eq 1 ]; then ocf_log err "Fatal error (out of memory, file not found, etc.) occurred while executing the psql command." elif [ $rc -eq 2 ]; then ocf_log $loglevel "Connection error (connection to the server went bad and the session was not interactive) occurred while executing the psql command." elif [ $rc -eq 3 ]; then ocf_log err "Script error (the variable ON_ERROR_STOP was set) occurred while executing the psql command." fi return $OCF_ERR_GENERIC fi echo "$output" return $OCF_SUCCESS } # # 'main' starts here... # if [ $# -ne 1 ] then usage exit $OCF_ERR_GENERIC fi ocf_log debug "$1 operation start." PIDFILE=${OCF_RESKEY_pgdata}/postmaster.pid BACKUPLABEL=${OCF_RESKEY_pgdata}/backup_label RECOVERY_CONF=${OCF_RESKEY_pgdata}/recovery.conf REPRESS_MONITOR=${OCF_RESKEY_tmpdir}/PGSQL.${OCF_RESKEY_pgport}.repress_monitor REPRESS_START=${OCF_RESKEY_tmpdir}/PGSQL.${OCF_RESKEY_pgport}.repress_start CRM_MASTER="${HA_SBIN_DIR}/crm_master -l reboot" CAN_NOT_PROMOTE="-INFINITY" CAN_PROMOTE="100" PROMOTE_ME="1000" REP_MODE_PARAM="replication_mode" REP_MODE_SQL="select current_setting('$REP_MODE_PARAM');" DATA_CATCH_UP_SQL="select pg_is_in_sync();" case "$1" in methods) pg_rex_methods exit $?;; meta-data) pg_rex_meta_data exit $OCF_SUCCESS;; esac # $OCF_RESKEY_pgdata has to be initialized at this momemnt : ${OCF_RESKEY_socketdir=`get_pgsql_param unix_socket_directory`} pg_rex_validate_all rc=$? [ "$1" == "validate-all" ] && exit $rc if [ $rc -ne 0 ] then case "$1" in stop) exit $OCF_SUCCESS;; monitor) exit $OCF_NOT_RUNNING;; status) exit $OCF_NOT_RUNNING;; *) exit $rc;; esac fi US=`id -u -n` if [ $US != root -a $US != $OCF_RESKEY_pgdba ] then ocf_log err "$0 must be run as root or $OCF_RESKEY_pgdba" exit $OCF_ERR_GENERIC fi # What kind of method was invoked? case "$1" in status) pg_rex_status exit $?;; monitor) pg_rex_monitor exit $?;; start) pg_rex_start exit $?;; promote) pg_rex_promote exit $?;; demote) pg_rex_demote exit $?;; notify) exit $OCF_SUCCESS;; stop) pg_rex_stop exit $?;; *) exit $OCF_ERR_UNIMPLEMENTED;; esac