#!/bin/ash # Copyright (C) 2019 Checkmk GmbH - License: GNU General Public License v2 # This file is part of Checkmk (https://checkmk.com). It is subject to the terms and # conditions defined in the file COPYING, which is part of this source code package. # shellcheck shell=dash # Actually it's a BusyBox ash # # BEGIN COMMON AGENT CODE # usage() { cat </dev/null 2>&1 } get_file_atime() { stat -c %X "${1}" 2>/dev/null || stat -f %a "${1}" 2>/dev/null || perl -e 'if (! -f $ARGV[0]){die "0000000"};$atime=(stat($ARGV[0]))[8];print $atime."\n";' "${1}" } get_file_mtime() { stat -c %Y "${1}" 2>/dev/null || stat -f %m "${1}" 2>/dev/null || perl -e 'if (! -f $ARGV[0]){die "0000000"};$mtime=(stat($ARGV[0]))[9];print $mtime."\n";' "${1}" } is_valid_plugin() { # test if a file is executable and does not have certain # extensions (remnants from distro upgrades). case "${1:?No plugin defined}" in *.dpkg-new | *.dpkg-old | *.dpkg-temp | *.dpkg-tmp) return 1 ;; *) [ -f "${1}" ] && [ -x "${1}" ] ;; esac } set_up_process_commandline_arguments() { while [ -n "${1}" ]; do case "${1}" in -d | --debug) set -xv DISABLE_STDERR=false shift ;; -p | --profile) LOG_SECTION_TIME=true # disable caching to get the whole execution time DISABLE_CACHING=true shift ;; --force-inventory) export MK_FORCE_INVENTORY=true shift ;; -h | --help) usage exit 1 ;; *) shift ;; esac done } set_up_get_epoch() { # On some systems date +%s returns a literal %s if date +%s | grep "^[0-9].*$" >/dev/null 2>&1; then get_epoch() { date +%s; } else # do not check whether perl is even present. # in weird cases we may be fine without get_epoch. get_epoch() { perl -e 'print($^T."\n");'; } fi } set_up_current_shell() { # Note the current shell may not be the same as what is specified in the # shebang, e.g. when reconfigured in the xinetd/systemd/whateverd config file CURRENT_SHELL="$(ps -o args= -p $$ | cut -d' ' -f1)" } # # END COMMON AGENT CODE # set_variable_defaults() { : "${MK_LIBDIR:=/usr/lib/check_mk_agent}" : "${MK_CONFDIR:=/etc/check_mk}" : "${MK_VARDIR:=/var/lib/check_mk_agent}" : "${MK_LOGDIR:=/var/log/check_mk_agent}" # some 'booleans' [ "${MK_RUN_SYNC_PARTS}" = "false" ] || MK_RUN_SYNC_PARTS=true [ "${MK_RUN_ASYNC_PARTS}" = "false" ] || MK_RUN_ASYNC_PARTS=true } preamble_1() { export MK_LIBDIR export MK_CONFDIR export MK_VARDIR # Optionally set a tempdir for all subsequent calls #export TMPDIR= # The service name gets patched for baked agents to "check-mk-agent" XINETD_SERVICE_NAME=check_mk # Provide information about the remote host. That helps when data # is being sent only once to each remote host. if [ "${REMOTE_HOST}" ]; then export REMOTE=${REMOTE_HOST} elif [ "${SSH_CLIENT}" ]; then export REMOTE=${SSH_CLIENT%% *} fi # Make sure locally installed binaries are found # Only add binaries if they are not already in the path! If you append to path in a loop the process will # eventually each the 128k size limit for the environment and become a zombie process. See execve manpage. [ "${PATH#*"/usr/local/bin"}" != "${PATH}" ] || PATH="${PATH}:/usr/local/bin" [ -d "/var/qmail/bin" ] && { [ "${PATH#*"/var/qmail/bin"}" != "${PATH}" ] || PATH="${PATH}:/var/qmail/bin"; } # All executables in PLUGINSDIR will simply be executed and their # ouput appended to the output of the agent. Plugins define their own # sections and must output headers with '<<<' and '>>>' PLUGINSDIR=${MK_LIBDIR}/plugins # All executables in LOCALDIR will by executabled and their # output inserted into the section <<>>. Please # refer to online documentation for details about local checks. LOCALDIR=${MK_LIBDIR}/local # All files in SPOOLDIR will simply appended to the agent # output if they are not outdated (see below) SPOOLDIR=${MK_VARDIR}/spool } # encryption not implemented optionally_encrypt() { cat; } # # BEGIN COMMON AGENT CODE # # SC2089: Quotes/backslashes will be treated literally. Use an array. # shellcheck disable=SC2089 MK_DEFINE_LOG_SECTION_TIME='_log_section_time() { "$@"; }' finalize_profiling() { :; } set_up_profiling() { PROFILING_CONFIG="${MK_CONFDIR}/profiling.cfg" if [ -e "${PROFILING_CONFIG}" ]; then # Config vars: # LOG_SECTION_TIME=true/false # DISABLE_CACHING=true/false # If LOG_SECTION_TIME=true via profiling.cfg do NOT disable caching in order # to get the real execution time during operation. # shellcheck disable=SC1090 . "${PROFILING_CONFIG}" fi PROFILING_LOGFILE_DIR="${MK_LOGDIR}/profiling/$(date +%Y%m%d_%H%M%S)" if ${LOG_SECTION_TIME:-false}; then mkdir -p "${PROFILING_LOGFILE_DIR}" agent_start="$(perl -MTime::HiRes=time -le 'print time()')" # SC2016: Expressions don't expand in single quotes, use double quotes for that. # SC2089: Quotes/backslashes will be treated literally. Use an array. # shellcheck disable=SC2016,SC2089 MK_DEFINE_LOG_SECTION_TIME='_log_section_time() { section_func="$@" base_name=$(echo "${section_func}" | sed "s/[^A-Za-z0-9.-]/_/g") profiling_logfile="'"${PROFILING_LOGFILE_DIR}"'/${base_name}.log" start="$(perl -MTime::HiRes=time -le "print time()")" { time ${section_func}; } 2>> "${profiling_logfile}" echo "runtime $(perl -MTime::HiRes=time -le "print time() - ${start}")" >> "${profiling_logfile}" }' finalize_profiling() { pro_log_file="${PROFILING_LOGFILE_DIR}/profiling_check_mk_agent.log" agent_end="$(perl -MTime::HiRes=time -le 'print time()')" echo "runtime $(echo "${agent_end} - ${agent_start}" | bc)" >>"${pro_log_file}" } fi eval "${MK_DEFINE_LOG_SECTION_TIME}" # SC2090: Quotes/backslashes in this variable will not be respected. # shellcheck disable=SC2090 export MK_DEFINE_LOG_SECTION_TIME } unset_locale() { # eliminate localized outputs where possible # The locale logic here is used to make the Python encoding detection work (see CMK-2778). unset -v LANG LC_ALL if inpath locale && inpath paste; then # match C.UTF-8 at the beginning, but not e.g. es_EC.UTF-8! case "$(locale -a | paste -sd ' ' -)" in *' C.UTF-8'* | 'C.UTF-8'*) LC_ALL="C.UTF-8" ;; *' C.utf8'* | 'C.utf8'*) LC_ALL="C.utf8" ;; esac fi LC_ALL="${LC_ALL:-C}" export LC_ALL } # # END COMMON AGENT CODE # # Prefer (relatively) new /usr/bin/timeout from coreutils against # our shipped waitmax. waitmax is statically linked and crashes on # some Ubuntu versions recently. if inpath timeout; then waitmax() { timeout "$@" } fi # # CHECK SECTIONS # section_mem() { # If you add a IS_DOCKERIZED check here please inform the kubernetes team # who uses this agent without modifications. They expect it to be run # without docker detection. echo '<<>>' grep -v -E '^Swap:|^Mem:|total:' >>' if [ "$(uname -m)" = "armv7l" ]; then CPU_REGEX='^processor' else CPU_REGEX='^CPU|^processor' fi echo "$(cat /proc/loadavg) $(grep -c -E ${CPU_REGEX} >>" echo "Version: 2.3.0p18" echo "AgentOS: openwrt" echo "Hostname: $(hostname)" echo "AgentDirectory: ${MK_CONFDIR}" echo "DataDirectory: ${MK_VARDIR}" echo "SpoolDirectory: ${SPOOLDIR}" echo "PluginsDirectory: ${PLUGINSDIR}" echo "LocalDirectory: ${LOCALDIR}" echo "OSType: linux" while read -r line; do raw_line=$(echo "$line" | tr -d \") case $raw_line in NAME=*) echo "OSName: ${raw_line##*=}" ;; VERSION_ID=*) echo "OSVersion: ${raw_line##*=}" ;; esac done /dev/null # If we are called via xinetd, try to find only_from configuration if [ -n "${REMOTE_HOST}" ]; then printf 'OnlyFrom: ' sed -n '/^service[[:space:]]*'"${XINETD_SERVICE_NAME}"'/,/}/s/^[[:space:]]*only_from[[:space:]]*=[[:space:]]*\(.*\)/\1/p' /etc/xinetd.d/* | head -n1 fi # # BEGIN COMMON AGENT CODE # if [ -n "${NO_PYTHON}" ]; then python_fail_msg="No suitable python installation found." elif [ -n "${WRONG_PYTHON_COMMAND}" ]; then python_fail_msg="Configured python command not found." fi cat </dev/null >&2 || return printf "<<>>\n" cmk-agent-ctl status --json --no-query-remote } section_checkmk_agent_plugins() { printf "<<>>\n" printf "pluginsdir %s\n" "${PLUGINSDIR}" printf "localdir %s\n" "${LOCALDIR}" for script in \ "${PLUGINSDIR}"/* \ "${PLUGINSDIR}"/[1-9]*/* \ "${LOCALDIR}"/* \ "${LOCALDIR}"/[1-9]*/*; do if is_valid_plugin "${script}"; then script_version=$(grep -e '^__version__' -e '^CMK_VERSION' "${script}" || echo 'CMK_VERSION="unversioned"') printf "%s:%s\n" "${script}" "${script_version}" fi done } section_checkmk_failed_plugin() { ${MK_RUN_SYNC_PARTS} || return echo "<<>>" echo "FailedPythonPlugins: ${1}" } # # END COMMON AGENT CODE # section_df() { # Print out Partitions / Filesystems. (-P gives non-wrapped POSIXed output) # Note: BusyBox df does not support -x and -l as arguments # If you add a IS_DOCKERIZED check here please inform the kubernetes team # who uses this agent without modifications. They expect it to be run # without docker detection. if ! inpath waitmax; then return fi echo '<<>>' waitmax -s 9 5 df -kPT # df inodes information if waitmax -s 9 5 df -i >/dev/null 2>&1; then echo '<<>>' echo '[df_inodes_start]' waitmax -s 9 5 df -PTi echo '[df_inodes_end]' fi } section_zfsget() { # Filesystem usage for ZFS if inpath zfs; then echo '<<>>' zfs get -Hp name,quota,used,avail,mountpoint,type -t filesystem,volume || zfs get -Hp name,quota,used,avail,mountpoint,type echo '[df]' df -PTlk -t zfs | sed 1d fi } section_mounts() { # Check NFS mounts by accessing them with stat -f (System # call statfs()). If this lasts more then 2 seconds we # consider it as hanging. We need waitmax. if inpath waitmax; then STAT_VERSION=$(stat --version | head -1 | cut -d" " -f4) STAT_BROKE="5.3.0" echo '<<>>' # SC2162: read without -r will mangle backslashes. # We suppress it here for compatibility (curretly backslashes e.g. before spaces are dropped). # Since escaping of field seperators is not relevant when reading into one variable, we probably # would have wanted "read -r". # shellcheck disable=SC2162 sed -n '/ nfs4\? /s/[^ ]* \([^ ]*\) .*/\1/p' >>' sed -n -e '/ cifs /s/.*\ \([^ ]*\)\ cifs\ .*/\1/p' >>' grep ^/dev >>' echo "[time]" get_epoch echo "[processes]" ps ax -o user:32,vsz,rss,cputime,etime,pid,command --columns 10000 | sed -e 1d -e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4\/\5,\6) /' } section_uptime() { # If you add a IS_DOCKERIZED check here please inform the kubernetes team # who uses this agent without modifications. They expect it to be run # without docker detection. echo '<<>>' cat /proc/uptime } section_lnx_if() { # New variant: Information about speed and state in one section if inpath ip; then echo '<<>>' echo "[start_iplink]" ip link echo "[end_iplink]" fi echo '<<>>' sed 1,2d /proc/net/dev if inpath ethtool; then sed -e 1,2d /proc/net/dev | cut -d':' -f1 | sort | while read -r eth; do echo "[${eth}]" ethtool "${eth}" | grep -E '(Speed|Duplex|Link detected|Auto-negotiation):' echo "Address: $(cat "/sys/class/net/${eth}/address")" done fi } # Current state of bonding interfaces section_bonding_interfaces() { ( cd /proc/net/bonding 2>/dev/null || return echo '<<>>' head -v -n 1000 ./* ) } section_ovs_bonding() { # Same for Open vSwitch bonding if inpath ovs-appctl; then BONDS=$(ovs-appctl bond/list) COL=$(echo "${BONDS}" | awk '{for(i=1;i<=NF;i++) {if($i == "bond") printf("%d", i)} exit 0}') echo '<<>>' for bond in $(echo "${BONDS}" | sed -e 1d | cut "-f${COL}"); do echo "[${bond}]" ovs-appctl bond/show "${bond}" done fi } section_tcp_conn() { # Number of TCP connections in the various states echo '<<>>' cat /proc/net/tcp /proc/net/tcp6 2>/dev/null | awk ' /:/ { c[$4]++; } END { for (x in c) { print x, c[x]; } }' } section_multipath() { # Linux Multipathing if inpath multipath; then echo '<<>>' multipath -l fi } section_diskstat() { # Performancecounter Platten # If you add a IS_DOCKERIZED check here please inform the kubernetes team # who uses this agent without modifications. They expect it to be run # without docker detection. echo '<<>>' get_epoch grep -E ' (x?[shv]d[a-z]*|cciss/c[0-9]+d[0-9]+|emcpower[a-z]+|dm-[0-9]+|VxVM.*|mmcblk.*|dasd[a-z]*) ' >>' get_epoch cat /proc/vmstat /proc/stat } section_ipmitool() { # Hardware sensors via IPMI (need ipmitool) if inpath ipmitool; then _run_cached_internal "ipmi" 300 300 900 600 "echo <<>>; waitmax 300 ipmitool sensor list | grep -v 'command failed' | grep -v -E '^[^ ]+ na ' | grep -v ' discrete '" # readable discrete sensor states _run_cached_internal "ipmi_discrete" 300 300 900 600 "echo <<>>; waitmax 300 ipmitool sdr elist compact" fi } section_ipmisensors() { # keep in sync with linux agent # IPMI data via ipmi-sensors (of freeipmi). # Even if freeipmi is installed make sure that IPMI is really supported by your hardware. inpath ipmi-sensors && ls /dev/ipmi* 2>/dev/null 1>&2 || return ${MK_RUN_SYNC_PARTS} && echo '<<>>' # Newer ipmi-sensors version have new output format; Legacy format can be used if ipmi-sensors --help | grep -q legacy-output; then IPMI_FORMAT="--legacy-output" else IPMI_FORMAT="" fi if ipmi-sensors --help | grep -q " \-\-groups"; then IPMI_GROUP_OPT="-g" else IPMI_GROUP_OPT="-t" fi # At least with ipmi-sensors 0.7.16 this group is Power_Unit instead of "Power Unit" _run_cached_internal "ipmi_sensors" 300 300 900 600 "echo '<<>>'; for class in Temperature Power_Unit Fan; do ipmi-sensors ${IPMI_FORMAT} --sdr-cache-directory /var/cache/ ${IPMI_GROUP_OPT} \"\${class}\" | sed -e 's/ /_/g' -e 's/:_\?/ /g' -e 's@ \([^(]*\)_(\([^)]*\))@ \2_\1@' # In case of a timeout immediately leave loop. if [ $? = 255 ]; then break; fi done" } section_md() { # RAID status of Linux software RAID echo '<<>>' cat /proc/mdstat } section_dmraid() { # RAID status of Linux RAID via device mapper if inpath dmraid && DMSTATUS=$(dmraid -r); then echo '<<>>' # Output name and status dmraid -s | grep -e ^name -e ^status # Output disk names of the RAID disks DISKS=$(echo "${DMSTATUS}" | cut -f1 -d:) for disk in ${DISKS}; do device=$(cat "/sys/block/$(basename "${disk}")/device/model") status=$(echo "${DMSTATUS}" | grep "^${disk}") echo "${status} Model: ${device}" done fi } section_lsi() { # RAID status of LSI controllers via cfggen if inpath cfggen; then echo '<<>>' cfggen 0 DISPLAY | grep -E '(Target ID|State|Volume ID|Status of volume)[[:space:]]*:' | sed -e 's/ *//g' -e 's/:/ /' fi } section_megaraid() { # RAID status of LSI MegaRAID controller via MegaCli. You can download that tool from: # http://www.lsi.com/downloads/Public/MegaRAID%20Common%20Files/8.02.16_MegaCLI.zip if inpath MegaCli; then MegaCli_bin="MegaCli" elif inpath MegaCli64; then MegaCli_bin="MegaCli64" elif inpath megacli; then MegaCli_bin="megacli" elif inpath storcli; then MegaCli_bin="storcli" elif inpath storcli64; then MegaCli_bin="storcli64" else MegaCli_bin="unknown" fi if [ "${MegaCli_bin}" != "unknown" ]; then echo '<<>>' for part in $(${MegaCli_bin} -EncInfo -aALL -NoLog >>' ${MegaCli_bin} -LDInfo -Lall -aALL -NoLog >>' ${MegaCli_bin} -AdpBbuCmd -GetBbuStatus -aALL -NoLog >>' tw_cli "/${C}" show all | grep -E 'Model =|Firmware|Serial' echo '<<<3ware_disks>>>' tw_cli "/${C}" show drivestatus | grep -E 'p[0-9]' | sed "s/^/${C}\//" echo '<<<3ware_units>>>' tw_cli "/${C}" show unitstatus | grep -E 'u[0-9]' | sed "s/^/${C}\//" done fi } section_arc_raid_status() { # RAID controllers from areca (Taiwan) # cli64 can be found at ftp://ftp.areca.com.tw/RaidCards/AP_Drivers/Linux/CLI/ if inpath cli64; then _run_cached_internal "arc_raid_status" 300 300 900 600 "echo <<>>; cli64 rsf info | tail -n +3 | head -n -2" fi } section_openvpn_clients() { # OpenVPN Clients. Currently we assume that the configuration # is in # /etc/openvpn. We might find a safer way to find the configuration later. if [ -e /etc/openvpn/openvpn-status.log ]; then echo '<<>>' sed -n -e '/CLIENT LIST/,/ROUTING TABLE/p' >>; waitmax 5 ntpq -np | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/' || true" fi } section_chrony() { # Time synchronization with Chrony if inpath chronyc; then # Force successful exit code. Otherwise section will be missing if daemon not running _run_cached_internal "chrony" 30 120 200 20 "echo <<>>; waitmax 5 chronyc tracking || true" fi } section_nvidia() { if inpath nvidia-settings && [ -S /tmp/.X11-unix/X0 ]; then echo '<<>>' for var in GPUErrors GPUCoreTemp; do DISPLAY=:0 waitmax 2 nvidia-settings -t -q ${var} | sed "s/^/${var}: /" done fi } section_drbd() { # If you add a IS_DOCKERIZED check here please inform the kubernetes team # which uses this agent without modifications. They expect it to be run # without docker detection. if [ -e /proc/drbd ]; then echo '<<>>' cat /proc/drbd fi } section_cups_queues() { # TODO: this seems broken. Don't we need to export cups_queues? # Status of CUPS printer queues if inpath lpstat; then if pgrep -f "\bcupsd" >/dev/null 2>&1; then # first define a function to check cups # shellcheck disable=SC2317 # called indirectly cups_queues() { CPRINTCONF=/etc/cups/printers.conf if [ -r "${CPRINTCONF}" ]; then LOCAL_PRINTERS=$(grep -E "<(Default)?Printer .*>" "${CPRINTCONF}" | awk '{print $2}' | sed -e 's/>//') # SC2162: read without -r will mangle backslashes. # We suppress it here for compatibility (curretly backslashes e.g. before spaces are dropped). # Since escaping of field seperators is not relevant when reading into one variable, we probably # would have wanted "read -r". # shellcheck disable=SC2162 lpstat -p | while read LINE; do PRINTER=$(echo "${LINE}" | awk '{print $2}') if echo "${LOCAL_PRINTERS}" | grep -q "${PRINTER}"; then echo "${LINE}" fi done echo '---' # SC2162: read without -r will mangle backslashes. # We suppress it here for compatibility (curretly backslashes e.g. before spaces are dropped). # Since escaping of field seperators is not relevant when reading into one variable, we probably # would have wanted "read -r". # shellcheck disable=SC2162 lpstat -o | while read LINE; do PRINTER=${LINE%%-*} if echo "${LOCAL_PRINTERS}" | grep -q "${PRINTER}"; then echo "${LINE}" fi done else lpstat -p echo '---' lpstat -o | sort fi } _run_cached_internal "cups_queues" 300 300 900 600 "echo <<>>; cups_queues" fi fi } section_heartbeat() { # Heartbeat monitoring # Different handling for heartbeat clusters with and without CRM # for the resource state if { [ -S /var/run/heartbeat/crm/cib_ro ] || [ -S /var/run/crm/cib_ro ] } || pgrep "^(crmd|pacemaker-contr)$" >/dev/null 2>&1; then echo '<<>>' TZ=UTC crm_mon -1 -r | grep -v ^$ | sed 's/^ //; /^\sResource Group:/,$ s/^\s//; s/^\s/_/g' fi if inpath cl_status; then echo '<<>>' cl_status rscstatus echo '<<>>' for NODE in $(cl_status listnodes); do if [ "${NODE}" != "$(uname -n | tr '[:upper:]' '[:lower:]')" ]; then STATUS=$(cl_status nodestatus "${NODE}") echo -n "${NODE} ${STATUS}" for LINK in $(cl_status listhblinks "${NODE}" 2>/dev/null); do echo -n " ${LINK} $(cl_status hblinkstatus "${NODE}" "${LINK}")" done echo fi done fi } section_postfix() { # Postfix mailqueue monitoring # Determine the number of mails and their size in several postfix mail queues if inpath postconf; then postfix_queue_dir=$(postconf -h queue_directory 2>/dev/null) if [ -n "${postfix_queue_dir}" ]; then echo '<<>>' for queue in deferred active; do count=$(find "${postfix_queue_dir}/${queue}" -type f | wc -l) size=$(du -s "${postfix_queue_dir}/${queue}" | awk '{print $1 }') if [ -z "${size}" ]; then size=0 fi echo "QUEUE_${queue} ${size} ${count}" done fi elif [ -x /usr/sbin/ssmtp ]; then echo '<<>>' mailq 2>&1 | sed 's/^[^:]*: \(.*\)/\1/' | tail -n 6 fi } section_qmail() { # Check status of qmail mailqueue if inpath qmail-qstat; then echo "<<>>" qmail-qstat fi } section_nullmailer() { # Nullmailer queue monitoring if inpath nullmailer-send && [ -d /var/spool/nullmailer/queue ]; then echo '<<>>' COUNT=$(find /var/spool/nullmailer/queue -type f | wc -l) SIZE=$(du -s /var/spool/nullmailer/queue | awk '{print $1 }') echo "${SIZE} ${COUNT}" fi } section_mknotify() { # Check status of OMD sites and Checkmk Notification spooler if inpath omd; then # 60 is _probably_ the agents polling interval. Why would you use that?? _run_cached_internal "omd_status" 60 60 180 120 "echo <<>>; omd status --bare --auto || true" echo '<<>>' get_epoch for statefile in /omd/sites/*/var/log/mknotifyd.state; do if [ -e "${statefile}" ]; then site=${statefile%/var/log*} site=${site#/omd/sites/} echo "[${site}]" grep -v '^#' <"${statefile}" fi done fi } section_omd_apache() { if inpath omd; then echo '<<>>' for statsfile in /omd/sites/*/var/log/apache/stats; do if [ -e "${statsfile}" ]; then site=${statsfile%/var/log*} site=${site#/omd/sites/} echo "[${site}]" cat "${statsfile}" : >"${statsfile}" fi done fi } section_zpool() { # Welcome the ZFS check on Linux # We do not endorse running ZFS on linux if your vendor doesnt support it ;) # check zpool status if inpath zpool; then echo "<<>>" zpool status -x fi } section_veritas_vcs() { # Veritas Cluster Server # Software is always installed in /opt/VRTSvcs. # Secure mode must be off to allow root to execute commands if [ -x /opt/VRTSvcs/bin/haclus ]; then echo "<<>>" vcshost=$(hostname | cut -d. -f1) waitmax -s 9 2 /opt/VRTSvcs/bin/haclus -display -localclus | grep -e ClusterName -e ClusState waitmax -s 9 2 /opt/VRTSvcs/bin/hasys -display -attribute SysState waitmax -s 9 2 /opt/VRTSvcs/bin/hagrp -display -sys "${vcshost}" -attribute State -localclus waitmax -s 9 2 /opt/VRTSvcs/bin/hares -display -sys "${vcshost}" -attribute State -localclus fi } # # BEGIN COMMON AGENT CODE # section_job() { # Get statistics about monitored jobs. _cat_files() { # read file names from stdin and write like `head -n -0 -v file` while read -r file; do printf "==> %s <==\n" "${file##./}" cat "${file}" done } ( cd "${MK_VARDIR}/job" 2>/dev/null || return printf "<<>>\n" for user in *; do ( cd "${user}" 2>/dev/null || return # return from subshell only # This folder is owned (and thus writable) by the user that ran the jobs. # The agent (root) must not read files that are not owned by the user. # This prevents symlink or hardlink attacks. find -L . -type f -user "${user}" | _cat_files ) done ) } section_fileinfo() { # fileinfo check: put patterns for files into /etc/check_mk/fileinfo.cfg perl -e ' use File::Glob "bsd_glob"; my @patterns = (); foreach (bsd_glob("$ARGV[0]/fileinfo.cfg"), bsd_glob("$ARGV[0]/fileinfo.d/*")) { open my $handle, "<", $_ or next; while (<$handle>) { chomp; next if /^\s*(#|$)/; my $pattern = $_; $pattern =~ s/\$DATE:(.*?)\$/substr(`date +"$1"`, 0, -1)/eg; push @patterns, $pattern; } warn "error while reading $_: $!\n" if $!; close $handle; } exit if ! @patterns; my $file_stats = ""; foreach (@patterns) { foreach (bsd_glob("$_")) { if (! -f) { $file_stats .= "$_|missing\n" if ! -d; } elsif (my @infos = stat) { $file_stats .= "$_|ok|$infos[7]|$infos[9]\n"; } else { $file_stats .= "$_|stat failed: $!\n"; } } } print "<<>>\n", time, "\n[[[header]]]\nname|status|size|time\n[[[content]]]\n$file_stats"; ' -- "${MK_CONFDIR}" } # # END COMMON AGENT CODE # section_lnx_thermal() { # Gather thermal information provided e.g. by acpi # At the moment only supporting thermal sensors if ls /sys/class/thermal/thermal_zone* >/dev/null 2>&1; then echo '<<>>' for F in /sys/class/thermal/thermal_zone*; do echo -n "${F##*/} " if [ ! -e "${F}/mode" ]; then echo -n "- "; fi cat "${F}/mode" "${F}/type" "${F}/temp" "${F}/trip_point_"* | tr \\n " " echo done fi } section_libelle() { # Libelle Business Shadow if inpath trd; then echo "<<>>" trd -s fi } section_varnish() { # HTTP Accelerator Statistics if inpath varnishstat; then echo "<<>>" varnishstat -1 fi } section_pvecm() { # Proxmox Cluster if inpath pvecm; then echo "<<>>" pvecm status echo "<<>>" pvecm nodes fi } # Implements Real-Time Check feature of the Checkmk agent which can send # some section data in 1 second resolution. Useful for fast notifications and # detailed graphing (if you configure your RRDs to this resolution). run_real_time_checks_for_remote() { PIDFILE=${MK_VARDIR}/real_time_checks.pid echo $$ >"${PIDFILE}" # shellcheck source=agents/cfg_examples/real_time_checks.cfg . "${MK_CONFDIR}/real_time_checks.cfg" if [ "${PASSPHRASE}" != "" ]; then # new mechanism to set the passphrase has priority RTC_SECRET=${PASSPHRASE} fi while true; do # terminate when pidfile is gone or other Real-Time Check process started or configured timeout if [ ! -e "${PIDFILE}" ] || [ "$(cat "${PIDFILE}")" -ne "$$" ] || [ "${RTC_TIMEOUT}" -eq 0 ]; then exit 1 fi for SECTION in ${RTC_SECTIONS}; do if [ "${ENCRYPTED_RT}" != "no" ]; then PROTOCOL=00 else PROTOCOL=99 fi # Be aware of maximum packet size. Maybe we need to check the size of the section # output and do some kind of nicer error handling. # 2 bytes: protocol version, 10 bytes: timestamp, rest: encrypted data # dd is used to concatenate the output of all commands to a single write/block => udp packet # # This never works, we should make that more transparent or remove the code. # For now: # shellcheck disable=SC2169,SC3025 # In dash, /dev/{tcp,udp} is not supported { echo -n ${PROTOCOL} get_epoch | tr -d '\n' if [ "${ENCRYPTED_RT}" != "no" ]; then export RTC_SECRET=${RTC_SECRET} "section_${SECTION}" | openssl enc -aes-256-cbc -md md5 -pass env:RTC_SECRET -nosalt else "section_${SECTION}" fi } | dd bs=9999 iflag=fullblock 2>/dev/null >"/dev/udp/${REMOTE}/${RTC_PORT}" done sleep 1 RTC_TIMEOUT=$((RTC_TIMEOUT - 1)) done } run_real_time_checks() { # Start new liveupdate process in background on each agent execution. Starting # a new live update process will terminate the old one automatically after # max. 1 sec. if [ -e "${MK_CONFDIR}/real_time_checks.cfg" ]; then if [ -z "${REMOTE}" ]; then echo "ERROR: \${REMOTE} not specified. Not sending real-time data." >&2 elif ! inpath openssl; then echo "ERROR: openssl command is missing. Not sending real-time data." >&2 else run_real_time_checks_for_remote >/dev/null & fi fi } # # BEGIN COMMON AGENT CODE # run_cached() { # Compatibility wrapper for plugins that might use run_cached. # We should have never exposed this as quasi API. NAME="${1}" MAXAGE="${2}" REFRESH_INTERVAL="${3}" shift 3 OUTPUT_TIMEOUT=$((MAXAGE * 3)) CREATION_TIMEOUT=$((MAXAGE * 2)) _run_cached_internal "${NAME}" "${REFRESH_INTERVAL}" "${MAXAGE}" "${OUTPUT_TIMEOUT}" "${CREATION_TIMEOUT}" "$@" } _run_cached_internal() { # Run a command asynchronous by use of a cache file. # Usage: _run_cached_internal NAME REFRESH_INTERVAL MAXAGE OUTPUT_TIMEOUT OUTPUT_TIMEOUT CREATION_TIMEOUT [COMMAND ...] # Note that while multiple COMMAND arguments are considered, they are evaluated in a string. # This means that extra escaping is required. # For example: # To run a cat command every two minutes, considering the created data valid for one three minutes, # send the created data for four minutes and allowing the command to run for 12 minutes, you'll have to call # # _run_cached_interal "my_file_content" 120 180 240 720 "cat \"My File\"" # # Mind the escaping... NAME="${1}" # name of the section (also used as cache file name) REFRESH_INTERVAL="${2}" # threshold in seconds when the cache file needs to be regenerated MAXAGE="${3}" # maximum cache livetime in seconds OUTPUT_TIMEOUT="${4}" # threshold in seconds for how long the cache file will be output (regardless of whether it is outdated) CREATION_TIMEOUT="${5}" # threshold in seconds for how long the process is allowed to be running before it is killed (see below for details) shift 5 # $* is now the command to run if ${DISABLE_CACHING:-false}; then # We need the re-splitting to be compatible with the caching case, so: # shellcheck disable=SC2068 $@ return fi [ -d "${MK_VARDIR}/cache" ] || mkdir -p "${MK_VARDIR}/cache" CACHEFILE="${MK_VARDIR}/cache/${NAME}.cache" FAIL_REPORT_FILE="${SPOOLDIR}/${NAME}.cachefail" NOW="$(get_epoch)" MTIME="$(get_file_mtime "${CACHEFILE}" 2>/dev/null)" || MTIME=0 if ${MK_RUN_SYNC_PARTS}; then if [ -s "${CACHEFILE}" ] && [ $((NOW - MTIME)) -le "${OUTPUT_TIMEOUT}" ]; then # Output the file (if it is not too outdated) CACHE_INFO="cached(${MTIME},${MAXAGE})" # prefix or insert cache info, unless already present. # WATCH OUT: AIX does not allow us to pass this as a single '-e' option! if [ "${NAME%%_*}" = "local" ] || [ "${NAME%%_*}" = "mrpe" ]; then sed -e '/^<<<.*>>>/{p;d;}' -e '/^cached([0-9]*,[0-9]*) /{p;d;}' -e "s/^/${CACHE_INFO} /" "${CACHEFILE}" else sed -e '/^<<<.*\(:cached(\).*>>>/{p;d;}' -e 's/^<<<\([^>]*\)>>>$/<<<\1:'"${CACHE_INFO}"'>>>/' "${CACHEFILE}" fi fi fi if ${MK_RUN_ASYNC_PARTS}; then # Kill the process if it is running too long (cache file not accessed for more than CREATION_TIMEOUT seconds). # If killing succeeds, remove CACHFILE.new.PID. # Write info about the timed out process and the kill attempt to the SPOOLDIR. # It will be reported to the server in the next (synchronous) agent execution. # The file will be deleted as soon as the plugin/local check is functional again. # Do not output the file here, it will interrupt the local and mrpe sections, as well as any other # partially cached section. for cfile in "${CACHEFILE}.new."*; do [ -e "${cfile}" ] || break # no match TRYING_SINCE="$(get_file_atime "${cfile}")" [ -n "${TRYING_SINCE}" ] || break # race condition: file vanished if [ $((NOW - TRYING_SINCE)) -ge "${CREATION_TIMEOUT}" ]; then { printf "<<>>\n" pid="${cfile##*.new.}" printf "timeout|%s|%s|%s\n" "${NAME}" "${CREATION_TIMEOUT}" "${pid}" kill -9 "${pid}" >/dev/null 2>&1 && sleep 2 # TODO: what about child processes? if [ -n "$(ps -o args= -p "${pid}")" ]; then printf "killfailed|%s|%s|%s\n" "${NAME}" "${CREATION_TIMEOUT}" "${pid}" else rm -f "${cfile}" fi } >"${FAIL_REPORT_FILE}" 2>&1 fi done # This does the right thing, regardless whether the pattern matches! _cfile_in_use() { for cfile in "${CACHEFILE}.new."*; do printf "%s\n" "${cfile}" break done } # Time to refresh cache file and new job not yet running? if [ $((NOW - MTIME)) -gt "${REFRESH_INTERVAL}" ] && [ ! -e "$(_cfile_in_use)" ]; then # Start it. If the command fails the output is thrown away cat </dev/null 2>&1 & eval '${MK_DEFINE_LOG_SECTION_TIME}' exec > "${CACHEFILE}.new.\$\$" || exit 1 $* && mv -f "${CACHEFILE}.new.\$\$" "${CACHEFILE}" && rm -f "${FAIL_REPORT_FILE}" || rm -f "${CACHEFILE}.new.\$\$" HERE fi fi unset NAME MAXAGE CREATION_TIMEOUT REFRESH_INTERVAL CACHEFILE NOW MTIME CACHE_INFO TRYING_SINCE OUTPUT_TIMEOUT } run_local_checks() { cd "${LOCALDIR}" || return if ${MK_RUN_SYNC_PARTS}; then echo '<<>>' for script in ./*; do if is_valid_plugin "${script}"; then _log_section_time "${script}" fi done fi # Call some local checks only every X'th second for script in [1-9]*/*; do if is_valid_plugin "${script}"; then interval="${script%/*}" _run_cached_internal "local_${script##*/}" "${interval}" "${interval}" $((interval * 3)) $((interval * 2)) "_log_section_time '${script}'" fi done } run_spooler() { ( cd "${SPOOLDIR}" 2>/dev/null || return now=$(get_epoch) for file in *; do [ "${file}" != "*" ] || return # If prefixed with a number, then that is the maximum age in seconds. # If the file is older than that, it is ignored. maxage="${file%%[^0-9]*}" if [ "${maxage}" ]; then mtime=$(get_file_mtime "${file}") [ $((now - mtime)) -le "${maxage}" ] || continue fi cat "${file}" done ) } get_plugin_interpreter() { # Return the interpreter (or "") for the plugin file (or fail). # We return the interpreter instead of wrapping the call, so we don't # have to export the function (which is not portable). # normalize input agent_plugin="${1#./}" extension="${agent_plugin##*.}" filename="${agent_plugin%.*}" # Execute all non python plugins with ./foo if [ "${extension}" != "py" ]; then return 0 fi if [ "${filename#"${filename%??}"}" != "_2" ]; then if [ -n "${NO_PYTHON}" ] || [ -n "${WRONG_PYTHON_COMMAND}" ]; then section_checkmk_failed_plugin "${agent_plugin}" return 1 fi if [ -n "${PYTHON3}" ]; then echo "${PYTHON3}" return 0 fi if [ ! -e "${filename}_2.py" ]; then section_checkmk_failed_plugin "${agent_plugin} (Missing Python 3 installation)" return 1 fi # no python3 found, but python2 plugin file present return 1 fi if [ -x "${filename%??}.py" ] && [ -n "${PYTHON3}" ]; then return 1 fi if [ -n "${PYTHON2}" ]; then echo "${PYTHON2}" return 0 fi section_checkmk_failed_plugin "${agent_plugin} (missing Python 2 installation)" return 1 } run_plugins() { cd "${PLUGINSDIR}" || return if ${MK_RUN_SYNC_PARTS}; then for script in ./*; do if is_valid_plugin "${script}"; then if plugin_interpreter=$(get_plugin_interpreter "${script}"); then # SC2086: We don't want to quote, interpreter is "nothing" if empty, not "''" # shellcheck disable=SC2086 _log_section_time ${plugin_interpreter} "${script}" fi fi done fi # Call some plugins only every X'th second for script in [1-9]*/*; do if is_valid_plugin "${script}"; then if plugin_interpreter=$(get_plugin_interpreter "${script}"); then interval="${script%/*}" # shellcheck disable=SC2086 _run_cached_internal "plugins_${script##*/}" "${interval}" "${interval}" $((interval * 3)) $((interval * 2)) _log_section_time ${plugin_interpreter} "${script}" fi fi done } _non_comment_lines() { grep -Ev '^[[:space:]]*($|#)' "${1}" } _mrpe_get_interval() { echo "${1}" | grep -E '^\([^)]*\)' | sed -n 's/^.*interval=\([^:)]*\).*$/\1/p' } _mrpe_normalize_spaces() { # watch out: # * [:blank:] does not include \t on AIX # * [:space:] does include \n on Linux tr -s '\t' ' ' } run_remote_plugins() { configfile="${1}" prefix="${2}" [ -f "${configfile}" ] || return _non_comment_lines "${configfile}" | _mrpe_normalize_spaces | while read -r descr rest; do interval="$(_mrpe_get_interval "${rest}")" cmdline="${rest#\(*\) }" if [ -n "${prefix}" ]; then cmdline="${prefix} '${cmdline}'" fi if [ -z "${interval}" ]; then ${MK_RUN_SYNC_PARTS} && run_mrpe "${descr}" "${cmdline}" else # Sourcing the agent here is not very performant, but we need 'run_mrpe', and not all shells support exporting of functions. _run_cached_internal "mrpe_${descr}" "${interval}" "${interval}" $((interval * 3)) $((interval * 2)) "MK_SOURCE_AGENT=yes . '${0}'; run_mrpe \"${descr}\" \"${cmdline}\"" fi done } run_mrpe() { descr="${1}" shift PLUGIN="${1%% *}" OUTPUT="$(eval "${MK_DEFINE_LOG_SECTION_TIME}; _log_section_time $*")" STATUS="$?" printf "<<>>\n" printf "(%s) %s %s %s" "${PLUGIN##*/}" "${descr}" "${STATUS}" "${OUTPUT}" | tr \\n \\1 printf "\n" unset descr PLUGIN OUTPUT STATUS } # # END COMMON AGENT CODE # run_purely_synchronous_sections() { section_checkmk section_cmk_agent_ctl_status section_checkmk_agent_plugins section_df section_zfsget section_mounts section_ps # Memory usage section_mem # Load and number of processes section_cpu section_uptime section_lnx_if section_bonding_interfaces section_ovs_bonding section_tcp_conn section_multipath section_diskstat section_kernel section_md section_dmraid section_lsi section_megaraid section_3ware section_openvpn_clients seciton_nvidia section_drbd section_cups_queues section_heartbeat section_postfix section_qmail section_nullmailer section_mknotify section_omd_apache section_zpool section_veritas_vcs section_fileinfo section_job section_lnx_thermal section_libelle section_varnish section_pvecm } run_partially_asnchronous_sections() { section_ipmitool section_ipmisensors section_arc_raid_status section_ntp section_chrony run_remote_plugins "${MK_CONFDIR}/mrpe.cfg" } main_setup() { exec /dev/null fi set_up_get_epoch set_up_current_shell set_variable_defaults set_up_profiling unset_locale preamble_1 } main_sync_parts() { run_purely_synchronous_sections run_spooler } main_mixed_parts() { run_partially_asnchronous_sections run_local_checks run_plugins } main_async_parts() { run_real_time_checks } main_finalize_sync() { finalize_profiling } # # BEGIN COMMON AGENT CODE # main() { while true; do main_setup "$@" ( ${MK_RUN_SYNC_PARTS} && main_sync_parts (${MK_RUN_ASYNC_PARTS} || ${MK_RUN_SYNC_PARTS}) && main_mixed_parts ${MK_RUN_ASYNC_PARTS} && main_async_parts ${MK_RUN_SYNC_PARTS} && main_finalize_sync ) | { if ${MK_RUN_SYNC_PARTS}; then optionally_encrypt "${PASSPHRASE}" ""; else cat; fi; } [ "${MK_LOOP_INTERVAL}" -gt 0 ] 2>/dev/null || return 0 sleep "${MK_LOOP_INTERVAL}" done } [ -z "${MK_SOURCE_AGENT}" ] && main "$@"