#!/usr/bin/python
# -*- encoding: utf-8; py-indent-offset: 4 -*-
# +------------------------------------------------------------------+
# |             ____ _               _        __  __ _  __           |
# |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
# |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
# |           | |___| | | |  __/ (__|   <    | |  | | . \            |
# |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
# |                                                                  |
# | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
# +------------------------------------------------------------------+
#
# This file is part of Check_MK.
# The official homepage is at http://mathias-kettner.de/check_mk.
#
# check_mk is free software;  you can redistribute it and/or modify it
# under the  terms of the  GNU General Public License  as published by
# the Free Software Foundation in version 2.  check_mk is  distributed
# in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
# out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
# PARTICULAR PURPOSE. See the  GNU General Public License for more de-
# tails. You should have  received  a copy of the  GNU  General Public
# License along with GNU Make; see the file  COPYING.  If  not,  write
# to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
# Boston, MA 02110-1301 USA.

#   .--mem.linux-----------------------------------------------------------.
#   |                                      _ _                             |
#   |           _ __ ___   ___ _ __ ___   | (_)_ __  _   ___  __           |
#   |          | '_ ` _ \ / _ \ '_ ` _ \  | | | '_ \| | | \ \/ /           |
#   |          | | | | | |  __/ | | | | |_| | | | | | |_| |>  <            |
#   |          |_| |_| |_|\___|_| |_| |_(_)_|_|_| |_|\__,_/_/\_\           |
#   |                                                                      |
#   +----------------------------------------------------------------------+
#   |  Specialized memory check for Linux that takes into account          |
#   |  all of its specific information in /proc/meminfo.                   |
#   '----------------------------------------------------------------------'

# BEWARE: half of the information and blob entries about /proc/meminfo
# in the internet is unprecise or even totally wrong!

# <<<mem>>>
# MemTotal:       24707592 kB
# MemFree:          441224 kB
# Buffers:          320672 kB
# Cached:         19981008 kB
# SwapCached:         6172 kB
# Active:          8756876 kB
# Inactive:       13360444 kB
# Active(anon):    1481236 kB
# Inactive(anon):   371260 kB
# Active(file):    7275640 kB
# Inactive(file): 12989184 kB
# Unevictable:      964808 kB
# Mlocked:          964808 kB
# SwapTotal:      16777212 kB
# SwapFree:       16703328 kB
# Dirty:           4408124 kB
# Writeback:         38020 kB
# AnonPages:       2774444 kB
# Mapped:            69456 kB
# Shmem:             33772 kB
# Slab:             861028 kB
# SReclaimable:     756236 kB
# SUnreclaim:       104792 kB
# KernelStack:        4176 kB
# PageTables:        15892 kB
# NFS_Unstable:          0 kB
# Bounce:                0 kB
# WritebackTmp:          0 kB
# CommitLimit:    39014044 kB
# Committed_AS:    3539808 kB
# VmallocTotal:   34359738367 kB
# VmallocUsed:      347904 kB
# VmallocChunk:   34346795572 kB
# HardwareCorrupted:     0 kB
# AnonHugePages:         0 kB
# HugePages_Total:       0
# HugePages_Free:        0
# HugePages_Rsvd:        0
# HugePages_Surp:        0
# Hugepagesize:       2048 kB
# DirectMap4k:      268288 kB
# DirectMap2M:     8112128 kB
# DirectMap1G:    16777216 kB

# This is from an earlier kernel (CentOS 5.5). Some entries
# are missing here:
# <<<mem>>>
# MemTotal:       377176 kB
# MemFree:         60112 kB
# Buffers:         93864 kB
# Cached:         116364 kB
# SwapCached:          0 kB
# Active:         169140 kB
# Inactive:        84144 kB
# HighTotal:           0 kB
# HighFree:            0 kB
# LowTotal:       377176 kB
# LowFree:         60112 kB
# SwapTotal:     2064376 kB
# SwapFree:      2062756 kB
# Dirty:             172 kB
# Writeback:           0 kB
# AnonPages:       43080 kB
# Mapped:           8352 kB
# Slab:            45892 kB
# PageTables:       3208 kB
# NFS_Unstable:        0 kB
# Bounce:              0 kB
# CommitLimit:   2252964 kB
# Committed_AS:   125968 kB
# VmallocTotal: 34359738367 kB
# VmallocUsed:     18112 kB
# VmallocChunk: 34359719415 kB
# HugePages_Total:     0
# HugePages_Free:      0
# HugePages_Rsvd:      0
# Hugepagesize:     2048 kB

# Yet earlier kernel (SLES 9):
# <<<mem>>>
# MemTotal: 6224268 kB
# MemFree: 2913660 kB
# Buffers: 84712 kB
# Cached: 1779052 kB
# SwapCached: 0 kB
# Active: 1931528 kB
# Inactive: 1276156 kB
# HighTotal: 5373824 kB
# HighFree: 2233984 kB
# LowTotal: 850444 kB
# LowFree: 679676 kB
# SwapTotal: 1052280 kB
# SwapFree: 1052280 kB
# Dirty: 55680 kB
# Writeback: 0 kB
# Mapped: 1469268 kB
# Slab: 71724 kB
# Committed_AS: 2758332 kB
# PageTables: 7672 kB
# VmallocTotal: 112632 kB
# VmallocUsed: 9324 kB
# VmallocChunk: 103180 kB
# HugePages_Total: 0
# HugePages_Free: 0
# Hugepagesize: 2048 kB


factory_settings["mem_linux_default_levels"] = {
    "levels_virtual":       ("perc_used", ( 80.0,  90.0)),
    "levels_total":         ("perc_used", (120.0, 150.0)),
    "levels_shm":           ("perc_used", ( 20.0,  30.0)),
    "levels_pagetables":    ("perc_used", (  8.0,  16.0)),
    "levels_committed":     ("perc_used", (100.0, 150.0)),
    "levels_commitlimit":   ("perc_free", ( 20.0,  10.0)),
    "levels_vmalloc":       ("abs_free",  (50*1024*1024, 30*1024*1024)),
}

def is_linux_meminfo(meminfo):
    return "PageTables" in meminfo and "Writeback" in meminfo and "Committed_AS" in meminfo

def inventory_mem_linux(info):
    meminfo = parse_proc_meminfo_bytes(info)
    if is_linux_meminfo(meminfo):
        return [ (None, {}) ]


def check_mem_levels(title, used, total, levels, of_what=None, of_value=None, show_percentage=False, show_free=False):
    if of_value == None:
        of_value = total # Reference for percentage levels
    state = 0
    if of_what:
        if show_free:
            value = total - used
        else:
            value = used
        infotext = "%s: %s" % (title, get_bytes_human_readable(value))
    else:
        infotext = "%s used: %s of %s" % (
             title, get_bytes_human_readable(used), get_bytes_human_readable(total))

    perc_shown = False
    if levels and levels != "ignore":
        how = levels[0]
        if how == "predictive":
            return 3, "Predictive levels for memory check not yet implemented"
            # TODO: Hier brauchen wir den Namen der RRD-variable! Das klappt aber
            # nicht, wenn hier Werte zusammengerechnet wurden. Wir sollten die
            # Predictive Levels nur für ausgewählte Werte zulassen.
            # return check_levels(used, levels[1], unit = "GB")

        warn, crit = levels[1]
        if how.startswith("perc_"):
            perc_used = 100.0 * float(used) / of_value
            perc_free = 100 - perc_used
            if how == "perc_used":
                if of_what:
                    t = " of " + of_what
                else:
                    t = ""
                levels_text = " (%.1f%%%s, " % (perc_used, t)
                if perc_used >= crit:
                    state = 2
                elif perc_used >= warn:
                    state = 1

            elif how == "perc_free":
                if of_what:
                    t = "of " + of_what
                else:
                    t = "free"
                levels_text = " (%.1f%% %s, " % (perc_free, t)
                if perc_free < crit:
                    state = 2
                elif perc_free < warn:
                    state = 1

            if state:
                perc_shown = True
                infotext += levels_text + "warn/crit at %.1f%%/%.1f%%)" % (warn, crit)

        else:
            if how == "abs_used":
                if used >= crit:
                    state = 2
                elif used >= warn:
                    state = 1
            else:
                free = total - used
                if free < crit:
                    state = 2
                elif free < warn:
                    state = 1

            if state:
                infotext += " (warn/crit at %s/%s)" % (get_bytes_human_readable(warn), get_bytes_human_readable(crit))

    if not perc_shown and show_percentage:
        infotext += " (%.1f%%)" % (100.0 * float(used) / of_value)
    return state, infotext

def check_mem_linux(_no_item, params, info):
    meminfo = parse_proc_meminfo_bytes(info)

    # SReclaimable is not available for older kernels
    if "SReclaimable" not in meminfo:
        meminfo["SReclaimable"] = 0
        meminfo["SUnreclaim"] = meminfo["Slab"]

    # Compute memory used by caches, that can be considered "free"
    meminfo["Caches"] = meminfo["Cached"] + meminfo["Buffers"] \
                      + meminfo["SwapCached"] + meminfo["SReclaimable"]

    # RAM
    meminfo["MemUsed"] = meminfo["MemTotal"] - meminfo["MemFree"] - meminfo["Caches"]
    yield check_mem_levels("RAM", meminfo["MemUsed"], meminfo["MemTotal"],
              params.get("levels_ram"), show_percentage=not meminfo["SwapTotal"])

    # Swap - but only if available
    meminfo["SwapUsed"] = meminfo["SwapTotal"] - meminfo["SwapFree"]
    if meminfo["SwapTotal"]:
        yield check_mem_levels("Swap", meminfo["SwapUsed"], meminfo["SwapTotal"],
                  params.get("levels_swap"))

    # Total virtual memory
    meminfo["TotalTotal"] = meminfo["MemTotal"] + meminfo["SwapTotal"]
    meminfo["TotalUsed"] = meminfo["MemUsed"] + meminfo["SwapUsed"]
    r = check_mem_levels("Total virtual memory", meminfo["TotalUsed"], meminfo["TotalTotal"],
              params.get("levels_virtual"), show_percentage=True)
    if r[0] or meminfo["SwapTotal"]:
        yield r # only display if there is swap or status is non-OK

    # Total memory / in relation to RAM
    r = check_mem_levels("RAM + Swap", meminfo["TotalUsed"], meminfo["TotalTotal"],
              params.get("levels_total"), of_what = "RAM", of_value = meminfo["MemTotal"])
    if r[0]:
        yield r # only display if non-OK

    # Shared memory
    if "Shmem" in meminfo:
        r = check_mem_levels("Shared memory", meminfo["Shmem"], meminfo["MemTotal"],
                params.get("levels_shm"), of_what = "RAM")
        if r[0]:
            yield r # only display if non-OK

    # Page tables
    r = check_mem_levels("Page tables", meminfo["PageTables"], meminfo["MemTotal"],
            params.get("levels_pagetables"), of_what = "RAM")
    if r[0]:
        yield r # only display if non-OK

    # Disk Writeback
    meminfo["Pending"] = \
         meminfo["Dirty"] \
       + meminfo.get("Writeback", 0) \
       + meminfo.get("NFS_Unstable", 0) \
       + meminfo.get("Bounce", 0) \
       + meminfo.get("WritebackTmp", 0)

    r = check_mem_levels("Disk Writeback", meminfo["Pending"], meminfo["MemTotal"],
            params.get("levels_writeback"), of_what = "RAM")
    if r[0]:
        yield r # only display if non-OK

    # Committed memory
    r = check_mem_levels("Committed", meminfo["Committed_AS"], meminfo["TotalTotal"],
            params.get("levels_committed"), of_what = "RAM + Swap")
    if r[0]:
        yield r # only display if non-OK

    # Commit limit
    if "CommitLimit" in meminfo:
        r = check_mem_levels("Commit Limit", meminfo["TotalTotal"] - meminfo["CommitLimit"],
                meminfo["TotalTotal"], params.get("levels_commitlimit"), of_what = "RAM + Swap")
        if r[0]:
            yield r # only display if non-OK

    # VMalloc,
    # newer kernel version report wrong data,
    # i.d. both VmallocUsed and Chunk equal zero
    if not (meminfo["VmallocUsed"] == 0 and meminfo["VmallocChunk"] == 0):
        r = check_mem_levels("Largest Free VMalloc Chunk", meminfo["VmallocTotal"] - meminfo["VmallocChunk"],
                meminfo["VmallocTotal"], params.get("levels_vmalloc"), of_what = "VMalloc Area", show_free=True)
        if r[0]:
            yield r # only display if non-OK

    # HardwareCorrupted
    hwc = meminfo.get("HardwareCorrupted")
    if hwc:
        yield params.get("handle_hw_corrupted_error", 2), "Hardware defect of %s" % get_bytes_human_readable(hwc)

    # Now send performance data. We simply output *all* fields of meminfo
    # except for a few really useless values
    perfdata = []
    items = meminfo.items()
    items.sort()
    for name, value in items:
        if name.startswith("DirectMap"):
            continue
        if name.startswith("Vmalloc") and meminfo["VmallocTotal"] > 2**40: # useless on 64 Bit system
            continue
        if name.startswith("Huge"):
            if meminfo["HugePages_Total"] == 0: # omit useless data
                continue
            if name == "Hugepagesize":
                continue # not needed
            value = value * meminfo["Hugepagesize"] # convert number to actual memory size
        perfdata.append((camelcase_to_underscored(name.replace("(", "_").replace(")","")), value))
    yield 0, "", perfdata


check_info["mem.linux"] = {
    'inventory_function'        : inventory_mem_linux,
    'check_function'            : check_mem_linux,
    'service_description'       : 'Memory',
    'default_levels_variable'   : 'mem_linux_default_levels',
    'has_perfdata'              : True,
    'group'                     : 'memory_linux',
    "handle_real_time_checks"   : True,
    'includes'                  : [ 'mem.include' ],
}

#.
#   .--mem.used------------------------------------------------------------.
#   |                                                        _             |
#   |           _ __ ___   ___ _ __ ___   _   _ ___  ___  __| |            |
#   |          | '_ ` _ \ / _ \ '_ ` _ \ | | | / __|/ _ \/ _` |            |
#   |          | | | | | |  __/ | | | | || |_| \__ \  __/ (_| |            |
#   |          |_| |_| |_|\___|_| |_| |_(_)__,_|___/\___|\__,_|            |
#   |                                                                      |
#   +----------------------------------------------------------------------+
#   | Memory check that takes into account the swap space. This check is   |
#   | used for unixoide operating systems.                                 |
#   '----------------------------------------------------------------------'

def parse_proc_meminfo(info):
    return dict([ (i[0][:-1], int(i[1])) for i in info ])

# The following variable is obsolete. It is kept here so that Check_MK
# won't fail if it's found in main.mk
mem_extended_perfdata = None

def inventory_mem_used(info):
    meminfo = parse_proc_meminfo(info)
    if "MemTotal" in meminfo \
        and "PageTotal" not in meminfo \
        and not is_linux_meminfo(meminfo): # handled by more modern check
        return [(None, {})]

def check_mem_used(_no_item, params, info):
    meminfo = parse_proc_meminfo(info)
    return check_memory(params, meminfo)

check_info['mem.used'] = {
    "check_function"          : check_mem_used,
    "inventory_function"      : inventory_mem_used,
    "service_description"     : "Memory used",
    "has_perfdata"            : True,
    "group"                   : "memory",
    "default_levels_variable" : "memory_default_levels",
    "includes"                : [ "mem.include" ],
    "handle_real_time_checks" : True,
}

#.
#   .--mem.win-------------------------------------------------------------.
#   |                                                _                     |
#   |              _ __ ___   ___ _ __ ___ __      _(_)_ __                |
#   |             | '_ ` _ \ / _ \ '_ ` _ \\ \ /\ / / | '_ \               |
#   |             | | | | | |  __/ | | | | |\ V  V /| | | | |              |
#   |             |_| |_| |_|\___|_| |_| |_(_)_/\_/ |_|_| |_|              |
#   |                                                                      |
#   +----------------------------------------------------------------------+
#   | Windows now has a dedicated memory check that reflect the special    |
#   | nature of the page file.                                             |
#   '----------------------------------------------------------------------'

# Special memory and page file check for Windows
factory_settings["memory_win_default_levels"] = {
    "memory"   : ( 80.0, 90.0 ),
    "pagefile" : ( 80.0, 90.0 ),
}


def inventory_mem_win(info):
    meminfo = parse_proc_meminfo(info)
    if "MemTotal" in meminfo and "PageTotal" in meminfo:
        return [(None, {})]


def check_mem_windows(item, params, info):
    meminfo = parse_proc_meminfo(info)
    MB = 1024.0 * 1024
    now = time.time()

    for title,             what,   paramname in [
        ("Memory usage",  "Mem",  "memory"),
        ("Commit Charge", "Page", "pagefile")]:

        key_total = what + "Total"
        key_free  = what + "Free"
        if not ( key_total in meminfo and key_free in meminfo ):
            continue

        total_kb = meminfo[key_total]
        free_kb  = meminfo[key_free]
        used_kb  = total_kb - free_kb
        used_mb  = used_kb / 1024.0
        free_mb  = free_kb / 1024.0
        perc     = 100.0 * used_kb / total_kb

        infotext = "%s: %.1f%% (%.1f/%.1f GB)" % \
                (title, perc, used_kb / MB, total_kb / MB)

        if type(params[paramname]) == tuple:
            warn, crit = params[paramname]

            # In perfdata show warn/crit as absolute values
            if type(warn) == float:
                warn_kb = total_kb * warn / 100 / 1024
            else:
                warn_kb = warn * 1024

            if type(crit) == float:
                crit_kb = total_kb * crit / 100 / 1024
            else:
                crit_kb = crit * 1024

            perfdata = [(paramname, used_kb / 1024.0, warn_kb, crit_kb, 0, total_kb / 1024.0)]

        # Predictive levels have no level information in the performance data
        else:
            perfdata = [(paramname, used_kb / 1024.0, None, None, 0, total_kb / 1024.0)]

        # Do averaging, if configured, just for matching the levels
        if "average" in params:
            average_min = params["average"]
            used_kb = get_average("mem.win.%s" % paramname,
                                           now, used_kb, average_min, initialize_zero = False)
            used_mb  = used_kb / 1024.0
            free_mb  = (total_kb / 1024.0) - used_mb
            perc     = 100.0 * used_kb / total_kb
            infotext += ", %d min average: %.1f%% (%.1f GB)" % (average_min, perc, used_kb / MB)
            perfdata.append((paramname + "_avg", used_kb / 1024.0))

        # Now check the levels
        if type(params[paramname]) == tuple:
            if (type(crit) == int and free_mb <= crit) or \
                (type(crit) == float and perc >= crit):
                state = 2
            elif (type(warn) == int and free_mb <= warn) or \
                (type(warn) == float and perc >= warn):
                state = 1
            else:
                state = 0

        # Predictive levels
        else:
            state, infoadd, perfadd = check_levels(
                used_kb / 1024.0,                                            # Current value stored in MB in RRDs
                "average" in params and paramname + "_avg" or paramname,     # Name of RRD variable
                params[paramname],
                unit = "GB",                                                 # Levels are specified in GB...
                scale = 1024,                                                # ... in WATO ValueSpec
            )
            if infoadd:
                infotext += ", " + infoadd
            perfdata += perfadd

        yield state, infotext, perfdata


check_info["mem.win"] = {
    'check_function':          check_mem_windows,
    'inventory_function':      inventory_mem_win,
    'service_description':     'Memory and pagefile',
    'has_perfdata':            True,
    'group':                   'memory_pagefile_win',
    'default_levels_variable': 'memory_win_default_levels',
    "handle_real_time_checks" : True,
}

#.
#   .--mem.vmalloc---------------------------------------------------------.
#   |                                                   _ _                |
#   |    _ __ ___   ___ _ __ ___ __   ___ __ ___   __ _| | | ___   ___     |
#   |   | '_ ` _ \ / _ \ '_ ` _ \\ \ / / '_ ` _ \ / _` | | |/ _ \ / __|    |
#   |   | | | | | |  __/ | | | | |\ V /| | | | | | (_| | | | (_) | (__     |
#   |   |_| |_| |_|\___|_| |_| |_(_)_/ |_| |_| |_|\__,_|_|_|\___/ \___|    |
#   |                                                                      |
#   +----------------------------------------------------------------------+
#   | This very specific check checks the usage and fragmentation of the   |
#   | address space 'vmalloc' that can be problematic on 32-Bit systems.   |
#   | It is superseeded by the new check mem.linux and will be removed     |
#   | soon.                                                                |
#   '----------------------------------------------------------------------'

# warn, crit, warn_chunk, crit_chunk. Integers are in MB, floats are in percent
mem_vmalloc_default_levels = ( 80.0, 90.0, 64, 32 )

def inventory_mem_vmalloc(info):
    meminfo = parse_proc_meminfo(info)
    if is_linux_meminfo(meminfo):
        return # handled by new Linux memory check

    # newer kernel version report wrong data,
    # i.d. both VmallocUsed and Chunk equal zero
    if "VmallocTotal" in meminfo and \
        not (meminfo["VmallocUsed"] == 0 and meminfo["VmallocChunk"] == 0):
        # Do not checks this on 64 Bit systems. They have almost
        # infinitive vmalloc
        vmalloc = meminfo["VmallocTotal"] / 1024.4
        if vmalloc < 4096:
            return [ (None, "mem_vmalloc_default_levels") ]

def check_mem_vmalloc(item, params, info):
    meminfo = parse_proc_meminfo(info)
    total_mb = meminfo["VmallocTotal"] / 1024.0
    used_mb  = meminfo["VmallocUsed"] / 1024.0
    free_mb  = total_mb - used_mb
    chunk_mb = meminfo["VmallocChunk"] / 1024.0
    warn, crit, warn_chunk, crit_chunk = params

    state = 0
    infotxts = []
    perfdata = []
    for var, w, c, v, neg, what in [
        ( "used",  warn,       crit,       used_mb,  False, "used" ),
        ( "chunk", warn_chunk, crit_chunk, chunk_mb, True,  "largest chunk" )]:

        # convert levels from percentage to MB values
        if type(w) == float:
            w_mb = total_mb * w / 100
        else:
            w_mb = float(w)

        if type(c) == float:
            c_mb = total_mb * c / 100
        else:
            c_mb = float(c)

        infotxt = "%s %.1f MB" % (what, v)
        if (v >= c_mb) != neg:
            s = 2
            infotxt += " (critical at %.1f MB!!)" % c_mb
        elif (v >= w_mb) != neg:
            s = 1
            infotxt += " (warning at %.1f MB!)" % w_mb
        else:
            s = 0
        state = max(state, s)
        infotxts.append(infotxt)
        perfdata.append( (var, v, w_mb, c_mb, 0, total_mb) )
    return (state, ("total %.1f MB, " % total_mb) + ", ".join(infotxts), perfdata)

check_info["mem.vmalloc"] = {
    'inventory_function':      inventory_mem_vmalloc,
    'check_function':          check_mem_vmalloc,
    'service_description':     'Vmalloc address space',
    'has_perfdata':            True,
    "handle_real_time_checks" : True,
}
