如何在Python中正确引用Bazel数据文件?

7

假设我有以下的 BUILD 文件

py_library(
  name = "foo",
  src = ["foo.py"],
  data = ["//bar:data.json"],
)

我应该如何在foo.py文件中引用data.json文件?我想要类似下面的东西,那我应该用什么替代some_path

with open(os.path.join(some_path, "bar/data.json"), 'r') as fp:
    data = json.load(fp)

我在网上找不到关于*.runfiles的通用文档,希望能得到一些指引!

3个回答

6
这里有一个函数,应该可以返回任何py_binary的runfiles根路径,在我所知道的所有情况下:
import os
import re

def find_runfiles():
    """Find the runfiles tree (useful when _not_ run from a zip file)"""
    # Follow symlinks, looking for my module space
    stub_filename = os.path.abspath(sys.argv[0])
    while True:
        # Found it?
        module_space = stub_filename + '.runfiles'
        if os.path.isdir(module_space):
            break

        runfiles_pattern = r"(.*\.runfiles)"
        matchobj = re.match(runfiles_pattern, os.path.abspath(sys.argv[0]))
        if matchobj:
            module_space = matchobj.group(1)
            break

        raise RuntimeError('Cannot find .runfiles directory for %s' %
                           sys.argv[0])
    return module_space

对于您提出的问题中的示例,您可以像这样使用它:
with open(os.path.join(find_runfiles(), "name_of_workspace/bar/data.json"), 'r') as fp:
    data = json.load(fp)

请注意,如果您构建Python应用程序的压缩可执行文件(可能使用 subpar),则此函数将无法帮助您;对于这些文件,您需要一些额外的代码。下面的代码段包括get_resource_filename()get_resource_directory(),它们将适用于常规的py_binary和.par二进制文件。
import atexit
import os
import re
import shutil
import sys
import tempfile
import zipfile


 def get_resource_filename(path):
    zip_path = get_zip_path(sys.modules.get("__main__").__file__)
    if zip_path:
        tmpdir = tempfile.mkdtemp()
        atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
        zf = BetterZipFile(zip_path)
        zf.extract(member=path, path=tmpdir)
        return os.path.join(tmpdir, path)
    elif os.path.exists(path):
        return path
    else:
        path_in_runfiles = os.path.join(find_runfiles(), path)
        if os.path.exists(path_in_runfiles):
            return path_in_runfiles
        else:
            raise ResourceNotFoundError


def get_resource_directory(path):
    """Find or extract an entire subtree and return its location."""
    zip_path = get_zip_path(sys.modules.get("__main__").__file__)
    if zip_path:
        tmpdir = tempfile.mkdtemp()
        atexit.register(lambda: shutil.rmtree(tmpdir, ignore_errors=True))
        zf = BetterZipFile(zip_path)
        members = []
        for fn in zf.namelist():
            if fn.startswith(path):
                members += [fn]
        zf.extractall(members=members, path=tmpdir)
        return os.path.join(tmpdir, path)
    elif os.path.exists(path):
        return path
    else:
        path_in_runfiles = os.path.join(find_runfiles(), path)
        if os.path.exists(path_in_runfiles):
            return path_in_runfiles
        else:
            raise ResourceNotFoundError


def get_zip_path(path):
    """If path is inside a zip file, return the zip file's path."""
    if path == os.path.sep:
        return None
    elif zipfile.is_zipfile(path):
        return path
    return get_zip_path(os.path.dirname(path))


class ResourceNotFoundError(RuntimeError):
    pass

def find_runfiles():
    """Find the runfiles tree (useful when _not_ run from a zip file)"""
    # Follow symlinks, looking for my module space
    stub_filename = os.path.abspath(sys.argv[0])
    while True:
        # Found it?
        module_space = stub_filename + '.runfiles'
        if os.path.isdir(module_space):
            break

        runfiles_pattern = r"(.*\.runfiles)"
        matchobj = re.match(runfiles_pattern, os.path.abspath(sys.argv[0]))
        if matchobj:
            module_space = matchobj.group(1)
            break

        raise RuntimeError('Cannot find .runfiles directory for %s' %
                           sys.argv[0])
    return module_space


class BetterZipFile(zipfile.ZipFile):
    """Shim around ZipFile that preserves permissions on extract."""

    def extract(self, member, path=None, pwd=None):

        if not isinstance(member, zipfile.ZipInfo):
            member = self.getinfo(member)

        if path is None:
            path = os.getcwd()

        ret_val = self._extract_member(member, path, pwd)
        attr = member.external_attr >> 16
        os.chmod(ret_val, attr)
        return ret_val

使用第二个代码片段,你的示例将如下所示:
with open(get_resource_filename("name_of_workspace/bar/data.json"), 'r') as fp:
    data = json.load(fp)

6

简短回答: os.path.dirname(__file__)

以下是完整示例:

$ ls
bar/  BUILD  foo.py  WORKSPACE

$ cat BUILD
py_binary(
    name = "foo",
    srcs = ["foo.py"],
    data = ["//bar:data.json"],
)

$ cat foo.py
import json
import os

ws = os.path.dirname(__file__)
with open(os.path.join(ws, "bar/data.json"), 'r') as fp:
  print(json.load(fp))

$ cat bar/BUILD
exports_files(["data.json"])

$ bazel run :foo

编辑:当您的软件包位于子目录中时,它可能无法正常工作。您可能需要使用os.path.dirname返回上一级目录。


1
为了让其他人受益,找到这里的人可以使用提供的 "runfiles" 库,这是由 bazel 的 rules_python 包提供的规范解决方案。
在我的环境中,大致如下:
from rules_python.python.runfiles import runfiles

resource = "workspace/bar/data.json"
r = runfiles.Create()
path = r.Rlocation(resource)

更多文档可以在这里找到:

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接