#! /usr/bin/env python # Copyright (c) 2022 NVIDIA Corporation # Reply-To: Allison Vacanti # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # Released under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. help_text = """%(prog)s [reference.json compare.json | reference_dir/ compare_dir/] This script: 1. Runs `top -bco RES`, continuously extracting the memory usage of each process. 2. If a process uses more than `log_threshold` GiB and exceeds any other recorded entry for the process, it is stored in `entries`. 3. When this script receives SIGINT, it writes two files: * `log_file` will contain all recorded max-memory-per-process entries * `fail_file` will contain all entries that exceed `fail_threshold` """ import argparse import os import re import signal import sys from subprocess import Popen, PIPE, STDOUT parser = argparse.ArgumentParser(prog='memmon.py', usage=help_text) parser.add_argument('--log-threshold', type=float, dest='log_threshold', default=0.5, help='Logging threshold in GiB.') parser.add_argument('--fail-threshold', type=float, dest='fail_threshold', default=2, help='Failure threshold in GiB.') parser.add_argument('--log-file', type=str, dest='log_file', default='memmon_log', help='Output file for log entries.') args, unused = parser.parse_known_args() entries = {} def signal_handler(sig, frame): # Sort by mem: sortentries = sorted(entries.items(), key=lambda x:x[1], reverse=True) lf = open(args.log_file, "w") for com, mem in sortentries: status="PASS" if mem >= args.fail_threshold: status="FAIL" line = "%4s | %3.1f GiB | %s\n"%(status, mem, com) lf.write(line) lf.close() sys.exit(0) signal.signal(signal.SIGINT, signal_handler) # Find the toprc config file and configure top's env. # This config: # - Hides all columns except for RES and COMMAND # - Sorts by RES # - Enables long command strings (-c) script_dir = os.path.dirname(os.path.realpath(__file__)) config_dir = os.path.join(script_dir, 'memmon_config') proc = Popen(["top", "-b", "-w", "512"], stdin=PIPE, stdout=PIPE, stderr=STDOUT, env={"XDG_CONFIG_HOME": config_dir}) regex = re.compile("^\\s*([0-9.]+[kmgtp]?)\\s+(.+)\\s*$") # Convert a memory string from top into floating point GiB def parse_mem(mem_str): if mem_str[-1] == "k": return float(mem_str[:-1]) / (1024 * 1024) elif mem_str[-1] == "m": return float(mem_str[:-1]) / (1024) elif mem_str[-1] == "g": return float(mem_str[:-1]) elif mem_str[-1] == "t": return float(mem_str[:-1]) * 1024 elif mem_str[-1] == "p": # please no return float(mem_str[:-1]) * 1024 * 1024 # bytes: return float(mem_str) / (1024 * 1024 * 1024) for line in proc.stdout: line = line.decode() match = regex.match(line) if match: mem = parse_mem(match.group(1)) if mem < args.log_threshold and mem < args.fail_threshold: continue com = match.group(2) if com in entries and entries[com] > mem: continue entries[com] = mem