Jupyter笔记本:每个笔记本的内存使用情况

15

我的实验室服务器(Ubuntu)的内存不断填满,因为用户从未关闭旧笔记本。我想更好地了解每个笔记本占用了多少内存。我可以汇总每个用户运行的所有Jupyter笔记本的(粗略)内存使用情况,但我想获得每个单独笔记本的总内存使用情况,以便我可以关闭那些特别占用内存的笔记本(或告诉另一个用户关闭他/她的笔记本)。我快速编写了以下代码以获取每个jupyter内核的近似内存使用情况,但我不知道如何将内核ID与特定笔记本关联起来。

import os
import pwd
import pandas as pd

UID   = 1
EUID  = 2

pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]

df = []
for pid in pids:
    try:
        ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
    except IOError: # proc has already terminated
        continue
    
    # jupyter notebook processes
    if len(ret) > 0 and 'share/jupyter/runtime' in ret:
        process = psutil.Process(int(pid))
        mem = process.memory_info()[0] 
           
        # user name for pid
        for ln in open('/proc/%d/status' % int(pid)):
            if ln.startswith('Uid:'):
                uid = int(ln.split()[UID])
                uname = pwd.getpwuid(uid).pw_name

        # user, pid, memory, proc_desc
        df.append([uname, pid, mem, ret])
        
df = pd.DataFrame(df)
df.columns = ['user', 'pid', 'memory', 'proc_desc']
df

我无法运行这个程序,因为出现了“PackageNotFoundError: Packages missing in current channels: - pwd”错误,找不到模块。有没有其他解决方案,不需要使用pwd?我正在使用安装了Anaconda Python 2.7的Windows服务器。 - ihightower
2个回答

7

我对 sharchaea的脚本进行了一些改进,以提高可移植性和速度。

主要是仅检查笔记本电脑正在运行的端口,检查不同的主机名选项,改进内核进程检查并检查ipython或jupyter。

import argparse
import re
import subprocess

import pandas as pd
import psutil
import requests
import tabulate

kernel_regex = re.compile(r".+kernel-(.+)\.json")
notebook_regex = re.compile(r"(https?://([^:/]+):?(\d+)?)/?(\?token=([a-z0-9]+))?")


def get_proc_info():
    pids = psutil.pids()

    # memory info from psutil.Process
    df_mem = []

    for pid in pids:
        try:
            proc = psutil.Process(pid)
            cmd = " ".join(proc.cmdline())
        except psutil.NoSuchProcess:
            continue

        if len(cmd) > 0 and ("jupyter" in cmd or "ipython" in cmd) and "kernel" in cmd:
            # kernel
            kernel_ID = re.sub(kernel_regex, r"\1", cmd)

            # memory
            mem = proc.memory_info()[0] / float(1e9)

            uname = proc.username()

            # user, pid, memory, kernel_ID
            df_mem.append([uname, pid, mem, kernel_ID])

    df_mem = pd.DataFrame(df_mem)
    df_mem.columns = ["user", "pid", "memory_GB", "kernel_ID"]
    return df_mem


def get_running_notebooks():
    notebooks = []

    for n in subprocess.Popen(
        ["jupyter", "notebook", "list"], stdout=subprocess.PIPE
    ).stdout.readlines()[1:]:
        match = re.match(notebook_regex, n.decode())
        if match:
            base_url, host, port, _, token = match.groups()
            notebooks.append({"base_url": base_url, "token": token})
        else:
            print("Unknown format: {}".format(n.decode()))

    return notebooks


def get_session_info(password=None):
    df_nb = []
    kernels = []

    for notebook in get_running_notebooks():
        s = requests.Session()
        if notebook["token"] is not None:
            s.get(notebook["base_url"] + "/?token=" + notebook["token"])
        else:
            # do a get to the base url to get the session cookies
            s.get(notebook["base_url"])
        if password is not None:
            # Seems jupyter auth process has changed, need to first get a cookie,
            # then add that cookie to the data being sent over with the password
            data = {"password": password}
            data.update(s.cookies)
            s.post(notebook["base_url"] + "/login", data=data)

        res = s.get(notebook["base_url"] + "/api/sessions")

        if res.status_code != 200:
            raise Exception(res.json())

        for sess in res.json():
            kernel_ID = sess["kernel"]["id"]
            if kernel_ID not in kernels:
                kernel = {
                    "kernel_ID": kernel_ID,
                    "kernel_name": sess["kernel"]["name"],
                    "kernel_state": sess["kernel"]["execution_state"],
                    "kernel_connections": sess["kernel"]["connections"],
                    # "notebook_url": notebook["base_url"] + "/notebook/" + sess["id"],
                    "notebook_path": sess["path"],
                }
                kernel.update(notebook)
                df_nb.append(kernel)
                kernels.append(kernel_ID)

    df_nb = pd.DataFrame(df_nb)
    del df_nb["token"]
    return df_nb


def parse_args():
    parser = argparse.ArgumentParser(description="Find memory usage.")
    parser.add_argument("--password", help="password (only needed if pass-protected)")

    return parser.parse_args()


def main(password=None, print_ascii=False):
    df_mem = get_proc_info()
    df_nb = get_session_info(password)

    # joining tables
    df = pd.merge(df_nb, df_mem, on=["kernel_ID"], how="inner")
    df = df.sort_values("memory_GB", ascending=False).reset_index(drop=True)
    if print_ascii:
        print(tabulate.tabulate(df, headers=(df.columns.tolist())))
    return df


if __name__ == "__main__":
    args = vars(parse_args())
    main(args["password"], print_ascii=True)

我可能会继续在这个gist上进行更新。

编辑:代码已更新,使用令牌身份验证适用于较新版本的Jupyter,利用仅psutil使其与Windows兼容,并可在Python 3上运行。


由于出现错误PackageNotFoundError: Packages missing in current channels: - pwd,我无法运行这个程序。有没有其他不需要pwd的解决方案?我正在使用Anaconda Python 2.7在Windows服务器上。 - ihightower
@ihightower 嗯,我怀疑这个在Windows上不会起作用。它大部分关于进程的信息都是从proc获取的,而proc是一种Unix存储有关正在运行的进程、服务等数据的方式。https://dev59.com/p2025IYBdhLWcg3wyJCV 可能会给你一个起点。 - aiguofer
1
@ihightower 我已经更新了代码,不再使用 pwd/proc。现在只用 psutil 库,应该是可移植的,可以在 Windows 上工作。 - aiguofer
@aiguofer 当你在ifmain中调用它时,应该在主参数中添加password=args(["password"])。否则它可能会被解释为主机名,是吧? - tricky
1
@tricky 谢谢你指出这个问题 :) 我已经对脚本进行了相当大的修改,忘记更改 main 的签名。现在可以使用 jupyter notebook list 找到主机名,因此您不必传递它。将来我可能会添加使用 Fabric 运行远程服务器的选项。 - aiguofer

5
我似乎已经想出了一个解决自己问题的可行方案:
import os
import pwd
import psutil
import re
import string
import json
import urllib2
import pandas as pd

UID   = 1
EUID  = 2
regex = re.compile(r'.+kernel-(.+)\.json')

pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]

# memory info from psutil.Process
df_mem = []
for pid in pids:
    try:
        ret = open(os.path.join('/proc', pid, 'cmdline'), 'rb').read()
    except IOError: # proc has already terminated
        continue

    # jupyter notebook processes
    if len(ret) > 0 and 'share/jupyter/runtime' in ret:
        # kernel
        kernel_ID = re.sub(regex, r'\1', ret)
        kernel_ID = filter(lambda x: x in string.printable, kernel_ID)

        # memory
        process = psutil.Process(int(pid))
        mem = process.memory_info()[0] / float(1e9)


        # user name for pid
        for ln in open('/proc/{}/status'.format(int(pid))):
            if ln.startswith('Uid:'):
                uid = int(ln.split()[UID])
                uname = pwd.getpwuid(uid).pw_name

        # user, pid, memory, kernel_ID
        df_mem.append([uname, pid, mem, kernel_ID])

df_mem = pd.DataFrame(df_mem)
df_mem.columns = ['user', 'pid', 'memory_GB', 'kernel_ID']


# notebook info from assessing ports
df_nb = []
for port in xrange(5000,30000):
    sessions = None
    try:
        url = 'http://127.0.0.1:{}/api/sessions'.format(port)
        sessions = json.load(urllib2.urlopen(url))
    except urllib2.URLError:
        sessions = None

    if sessions:
        for sess in sessions:
            kernel_ID = str(sess['kernel']['id'])
            notebook_path = sess['notebook']['path']
            df_nb.append([port, kernel_ID, notebook_path])

df_nb = pd.DataFrame(df_nb)
df_nb.columns = ['port', 'kernel_ID', 'notebook_path']


# joining tables
df = pd.merge(df_nb, df_mem, on=['kernel_ID'], how='inner')
df.sort(['memory_GB'], ascending=False)

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接