如何在使用 pyinstaller 创建可执行文件时处理 pyproj datadir/CRS 错误

4
我正在创建的程序旨在在多个邮政编码内创建几个点,并找到每个与所需邮政编码相距5英里以内的其他邮政编码中的每个点到这些点的距离。这是通过利用谷歌地图距离矩阵服务并捕获距离数据来完成的。我创建了一个复杂的函数,它执行了几件事情(我认为需要进一步细分)。我认为问题出在这里。
一切都按预期工作,但是当我尝试使用Pyinstaller创建可执行文件时,加载时会收到几个错误消息,当我尝试运行程序的主要函数时也有错误。这些错误似乎集中在pyproj和geopandas上。
我在其他地方看到过这个问题。我无法成功应用那些地方讨论的解决方案。提出的解决方案包括:
- 将pyproj降级到1.9.6 - 参见下面的错误 - 在pyinstaller中添加hook文件 - 目录中已经有一个hook文件 - 在创建的规格文件的hidden imports部分中包含pyproj._datadir和pyproj.datadir。 - 使用os.environ ['PROJ_LIB']并将其设置为“/ share”中找到的共享文件夹
我正在使用的软件包:
import pandas as pd
import tkinter as tk
import tkinter.filedialog
import os
import geopandas as gpd
from shapely.geometry import Point,LineString
import shapely.wkt
import googlemaps
from googlemaps.exceptions import ApiError
import datetime
from statistics import median
import _thread

规格文件:


# -*- mode: python -*-

block_cipher = None


a = Analysis(['main.py'],
             pathex=['C:\\Users\\Keagan\\PycharmProjects\\upwork_jobs\\pet_sitting2\\gui'],
             binaries=[],
             datas=[],
             hiddenimports=['fiona._shim','fiona.schema','pyproj._datadir','pyproj.datadir'],
             hookspath=[],
             runtime_hooks=[],
             excludes=[],
             win_no_prefer_redirects=False,
             win_private_assemblies=False,
             cipher=block_cipher,
             noarchive=False)
pyz = PYZ(a.pure, a.zipped_data,
             cipher=block_cipher)
exe = EXE(pyz,
          a.scripts,
          [],
          exclude_binaries=True,
          name='main',
          debug=False,
          bootloader_ignore_signals=False,
          strip=False,
          upx=True,
          console=True )
coll = COLLECT(exe,
               a.binaries,
               a.zipfiles,
               a.datas,
               strip=False,
               upx=True,
               name='main')

主要功能:

    def model_distances(self, reference_names_list, reference_zips_df,zips_and_points_gdf,api_key):

        gmaps = googlemaps.Client(api_key)

        def find_key_value_connection(poi, to_location, list_of_dicts):

            for item in list_of_dicts:
                if poi == item["poi"] and to_location == item["to_location"] or to_location == item[
                    "poi"] and poi == \
                        item["to_location"]:
                    return True

            return False

        def projection(origin_projection, to_projection, geometry_object):
            project = partial(
                pyproj.transform,
                pyproj.Proj(init=origin_projection),
                pyproj.Proj(init=to_projection)
            )

            return transform(project, geometry_object)

        zip_code_intersect_list = []

        completed_locations_dict = {}
        completed_locations_list = []

        count = 0
        google_credit_count = 0
        completed_locations_df = None

        buffer_list = []



        for name in reference_names_list:
            print("we are on: {}".format(name))

            if os.path.isfile("output_files/completed_locations_{}.xlsx".format(name)) and completed_locations_df is None:
                print("found backup, opening it")

                completed_locations_df = pd.read_excel("output_files/completed_locations_{}.xlsx".format(name))

                for item in completed_locations_df.itertuples():
                    completed_locations_dict["poi"] = int(item.poi)
                    completed_locations_dict["to_location"] = int(item.to_location)
                    completed_locations_dict["poi_zip"] = item.poi_zip
                    completed_locations_dict["to_zip"] = item.to_zip
                    completed_locations_dict["poi_name"] = item.poi_name
                    completed_locations_dict["to_name"] = item.to_name
                    completed_locations_dict["id"] = item.id
                    completed_locations_dict["distance"] = float(item.distance)
                    completed_locations_dict["time"] = float(item.time)

                    completed_locations_list.append(completed_locations_dict.copy())

            elif not os.path.isfile("output_files/completed_locations_{}.xlsx".format(name)):
                print("creating a backup")
                completed_locations_df = pd.DataFrame()
                completed_locations_df.to_excel("completed_locations_{}.xlsx".format(name))

            for zip in reference_zips_df.itertuples():

                if zip.name == name:
                    print("we are in zipcode: {}".format(zip.zip))
                    for poi in zips_and_points_gdf.itertuples():
                        if str(poi.zip_left) == str(zip.zip):
                            buffer = ""
                            poi_zip = ""
                            if poi_zip == None or poi.zip_left != poi_zip:
                                poi_zip = poi.zip_left
                                buffer = shapely.wkt.loads(poi.zip_center_point).buffer(8046)
                                buffer_list.append(buffer)

                            for to_location in zips_and_points_gdf.itertuples():
                                if poi.zip_left != to_location.zip_left and to_location.geometry.intersects(
                                        buffer) and to_location.zip_left not in zip_code_intersect_list:
                                    zip_code_intersect_list.append(to_location.zip_left)

                            for to_location in zips_and_points_gdf.itertuples():
                                if to_location.zip_left in zip_code_intersect_list and to_location.name_left == name:

                                    if find_key_value_connection(int(poi.Index), int(to_location.Index),
                                                                 completed_locations_list):

                                        print(
                                            "point at index {} was already calculated to point at index {}, google credit at: {}".format(
                                                poi.Index, to_location.Index, google_credit_count))
                                    else:
                                        google_credit_count += 1
                                        count += 1

                                        print(
                                            "calculating point at index {} to index {}, google credit at: {}".format(
                                                poi.Index, to_location.Index, google_credit_count))

                                        new_poi = projection("epsg:26910", "epsg:4326", poi.geometry)
                                        new_to_location = projection("epsg:26910", "epsg:4326", to_location.geometry)


                                        result = gmaps.distance_matrix((new_poi.y, new_poi.x),
                                                                       (new_to_location.y,new_to_location.x))

                                        completed_locations_dict["poi"] = int(poi.Index)
                                        completed_locations_dict["to_location"] = int(to_location.Index)
                                        completed_locations_dict["poi_zip"] = poi.zip_left
                                        completed_locations_dict["to_zip"] = to_location.zip_left
                                        completed_locations_dict["poi_name"] = zip.name
                                        completed_locations_dict["to_name"] = to_location.name_left
                                        completed_locations_dict["id"] = str(poi.zip_left) + str(
                                            poi.Index) + "-" + str(to_location.zip_left) + str(to_location.Index)

                                        try:
                                            completed_locations_dict["time"] = \
                                            result["rows"][0]["elements"][0]["duration"]["value"] / 60
                                            completed_locations_dict["distance"] = \
                                            result["rows"][0]["elements"][0]["distance"]["value"] / 1609.3
                                        except KeyError:
                                            completed_locations_dict["time"] = "nan"
                                            completed_locations_dict["distance"] = "nan"

                                        completed_locations_list.append(completed_locations_dict.copy())



                                        if count > 500:
                                            print("backup exists appending new df to backup")


                                            completed_locations_df = pd.DataFrame(completed_locations_list)
                                            completed_locations_df.to_excel("output_files/completed_locations_{}.xlsx".format(name))

                                            count = 0

                                        if google_credit_count >= 10000:
                                            continue_program = input(
                                                "desired google credit has hit $50, continue or change keys?(continue/change/quit): ")
                                            while continue_program != "continue":
                                                if continue_program == "quit":
                                                    # with open("backup_save.json", "w") as backup_file:
                                                    #     json.dump(completed_locations_list.copy(), backup_file)
                                                    completed_locations_df = pd.DataFrame(completed_locations_list)
                                                    completed_locations_df.to_excel("output_files/completed_locations_{}.xlsx".format(name))

                                                    print("saving to excel")
                                                    quit()
                                                new_key = input("please insert a new key: ")
                                                gmaps = googlemaps.Client(key=new_key)
                                                try:
                                                    # res = gmaps.geocode("Austin, Texas")
                                                    continue_program = input("valid key, continue? (continue/quit): ")
                                                except (ValueError, ApiError):
                                                    new_key = input("invalid key, try again: ")

                                            google_credit_count = 0

                        zip_code_intersect_list = []

        completed_locations_df = pd.DataFrame(completed_locations_list)


        return completed_locations_df

尝试将pyproj降级到1.9.6版本时:
UnsatisfiableError: The following specifications were found to be incompatible with each other:

当您首次启动该工具时:

Warning:
The MATPLOTLIBDATA environment variable was deprecated in Matplotlib 3.1 and will be removed in 3.3.
  exec(bytecode, module.__dict__)
Traceback (most recent call last):
  File "site-packages\pyproj\datadir.py", line 101, in get_data_dir
pyproj.exceptions.DataDirError: Valid PROJ data directory not found.Either set the path using the environmental variable PROJ_LIB or with `pyproj.datadir.set_data_dir`.
Exception ignored in: 'pyproj._datadir.get_pyproj_context'
Traceback (most recent call last):
  File "site-packages\pyproj\datadir.py", line 101, in get_data_dir
pyproj.exceptions.DataDirError: Valid PROJ data directory not found.Either set the path using the environmental variable PROJ_LIB or with `pyproj.datadir.set_data_dir`.
proj_create: Cannot find proj.db
proj_create: init=epsg:/init=IGNF: syntax not supported in non-PROJ4 emulation mode
Invalid projection: +init=epsg:4326 +type=crs

运行该工具时:
<code that runs fine before>


Traceback (most recent call last):
  File "site-packages\pyproj\datadir.py", line 101, in get_data_dir
pyproj.exceptions.DataDirError: Valid PROJ data directory not found.Either set the path using the environmental variable PROJ_LIB or with `pyproj.datadir.set_data_dir`.
Exception ignored in: 'pyproj._datadir.get_pyproj_context'
Traceback (most recent call last):
  File "site-packages\pyproj\datadir.py", line 101, in get_data_dir
pyproj.exceptions.DataDirError: Valid PROJ data directory not found.Either set the path using the environmental variable PROJ_LIB or with `pyproj.datadir.set_data_dir`.
proj_create: Cannot find proj.db
proj_create: init=epsg:/init=IGNF: syntax not supported in non-PROJ4 emulation mode
Unhandled exception in thread started by <bound method ZipAnalysisGUI.analyze_data of <__main__.ZipAnalysisGUI object at 0x000001DAAD51A668>>
Traceback (most recent call last):
  File "main.py", line 480, in analyze_data
  File "main.py", line 237, in model_distances
  File "main.py", line 157, in projection
  File "site-packages\pyproj\proj.py", line 147, in __init__
  File "site-packages\pyproj\crs.py", line 391, in from_user_input
  File "site-packages\pyproj\crs.py", line 260, in __init__
  File "pyproj/_crs.pyx", line 1292, in pyproj._crs._CRS.__init__
pyproj.exceptions.CRSError: Invalid projection: +init=epsg:26910 +type=crs

我认为根据上述错误,问题出在以下方面:
        def projection(origin_projection, to_projection, geometry_object):
            project = partial(
                pyproj.transform,
                pyproj.Proj(init=origin_projection),
                pyproj.Proj(init=to_projection)
            )

            return transform(project, geometry_object)

当我从PyCharm运行时,一切都正常。但一旦我尝试将其作为可执行文件运行,它就开始崩溃。我相当确定它会影响上面的函数,但我无法确定原因。如果需要,我可以分享更多的代码或整个文件。


可能。我在一个使用Miniconda作为Python运行环境,并从crontab调用Bash来激活代码的应用程序中遇到了同样的问题。当我以相同的方式调用同样的代码“bash /path/to/my/bas.sh”时,它可以完美地运行。 - Iron Banker Of Braavos
你可以尝试使用 pyproj==2.4.0,看看问题是否仍然存在? - snowman2
1个回答

0

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接