如何使用Python保存已知URL地址的图像到本地？

Question

如何使用Python保存已知URL地址的图像到本地？

pythonweb-scraping

204

我知道一个互联网上的图片URL。例如，http://www.digimouth.com/news/media/2011/09/google-logo.jpg，其中包含Google的标志。

现在，我该如何使用Python下载这张图片而不用实际打开浏览器中的URL并手动保存文件？

- Pankaj Vatsa

1

可能是如何使用Python通过HTTP下载文件？的重复问题。 - Jaydev

18个回答

网页内容由stack overflow 提供, 点击上面的

可以查看英文原文，
原文链接

- Praveen Kumar · Answer 1

这是下载图片最简单的方法。

import requests
from slugify import slugify

img_url = 'https://apod.nasa.gov/apod/image/1701/potw1636aN159_HST_2048.jpg'
img = requests.get(img_url).content
img_file = open(slugify(img_url) + '.' + str(img_url).split('.')[-1], 'wb')
img_file.write(img)
img_file.close()

- Pavel Pančocha · Answer 2

使用Requests为Python 3提供新鲜内容：

代码中的注释。可直接使用的函数。


import requests
from os import path

def get_image(image_url):
    """
    Get image based on url.
    :return: Image name if everything OK, False otherwise
    """
    image_name = path.split(image_url)[1]
    try:
        image = requests.get(image_url)
    except OSError:  # Little too wide, but work OK, no additional imports needed. Catch all conection problems
        return False
    if image.status_code == 200:  # we could have retrieved error page
        base_dir = path.join(path.dirname(path.realpath(__file__)), "images") # Use your own path or "" to use current working directory. Folder must exist.
        with open(path.join(base_dir, image_name), "wb") as f:
            f.write(image.content)
        return image_name

get_image("https://apod.nasddfda.gov/apod/image/2003/S106_Mishra_1947.jpg")

- PythonProgrammi · Answer 3

适用于Python 3版本

我调整了@madprops的代码，使其适用于Python 3

# getem.py
# python2 script to download all images in a given url
# use: python getem.py http://url.where.images.are

from bs4 import BeautifulSoup
import urllib.request
import shutil
import requests
from urllib.parse import urljoin
import sys
import time

def make_soup(url):
    req = urllib.request.Request(url, headers={'User-Agent' : "Magic Browser"}) 
    html = urllib.request.urlopen(req)
    return BeautifulSoup(html, 'html.parser')

def get_images(url):
    soup = make_soup(url)
    images = [img for img in soup.findAll('img')]
    print (str(len(images)) + " images found.")
    print('Downloading images to current working directory.')
    image_links = [each.get('src') for each in images]
    for each in image_links:
        try:
            filename = each.strip().split('/')[-1].strip()
            src = urljoin(url, each)
            print('Getting: ' + filename)
            response = requests.get(src, stream=True)
            # delay to avoid corrupted previews
            time.sleep(1)
            with open(filename, 'wb') as out_file:
                shutil.copyfileobj(response.raw, out_file)
        except:
            print('  An error occured. Continuing.')
    print('Done.')

if __name__ == '__main__':
    get_images('http://www.wookmark.com')

- Pedro Lobito · Answer 4

虽然回答有点晚，但对于python>=3.6，您可以使用dload。示例如下：

import dload
dload.save("http://www.digimouth.com/news/media/2011/09/google-logo.jpg")

如果您需要将图像以bytes形式使用，可以使用以下方式：

img_bytes = dload.bytes("http://www.digimouth.com/news/media/2011/09/google-logo.jpg")

使用 pip3 install dload 进行安装。

- emehex · Answer 5

如果您还没有图片的URL，您可以使用gazpacho进行爬取：

from gazpacho import Soup
base_url = "http://books.toscrape.com"

soup = Soup.get(base_url)
links = [img.attrs["src"] for img in soup.find("img")]

然后，如前所述，使用urllib下载资产：

from pathlib import Path
from urllib.request import urlretrieve as download

directory = "images"
Path(directory).mkdir(exist_ok=True)

link = links[0]
name = link.split("/")[-1]

download(f"{base_url}/{link}", f"{directory}/{name}")

- Ondiek Elijah · Answer 6

# import the required libraries from Python
import pathlib,urllib.request 

# Using pathlib, specify where the image is to be saved
downloads_path = str(pathlib.Path.home() / "Downloads")

# Form a full image path by joining the path to the 
# images' new name

picture_path  = os.path.join(downloads_path, "new-image.png")

# "/home/User/Downloads/new-image.png"

# Using "urlretrieve()" from urllib.request save the image 
urllib.request.urlretrieve("//example.com/image.png", picture_path)

# urlretrieve() takes in 2 arguments
# 1. The URL of the image to be downloaded
# 2. The image new name after download. By default, the image is saved
#    inside your current working directory

- JayRizzo · Answer 7

好的，这是我初步尝试，可能有些过度设计。如果需要更新，请更新，因为它没有处理任何超时，但是我为了好玩而使其工作。

代码在此处列出：https://github.com/JayRizzo/JayRizzoTools/blob/master/pyImageDownloader.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# =============================================================================
# Created Syst: MAC OSX High Sierra 21.5.0 (17G65)
# Created Plat: Python 3.9.5 ('v3.9.5:0a7dcbdb13', 'May  3 2021 13:17:02')
# Created By  : Jeromie Kirchoff
# Created Date: Thu Jun 15 23:31:01 2022 CDT
# Last ModDate: Thu Jun 16 01:41:01 2022 CDT
# =============================================================================
# NOTE: Doesn't work on SVG images at this time.
# I will look into this further: https://dev59.com/omw15IYBdhLWcg3weLoF#6599172
# =============================================================================
import requests                                 # to get image from the web
import shutil                                   # to save it locally
import os                                       # needed
from os.path import exists as filepathexist     # check if file paths exist
from os.path import join                        # joins path for different os
from os.path import expanduser                  # expands current home
from pyuser_agent import UA                     # generates random UserAgent

class ImageDownloader(object):
    """URL ImageDownloader.
    Input : Full Image URL
    Output: Image saved to your ~/Pictures/JayRizzoDL folder.
    """
    def __init__(self, URL: str):
        self.url = URL
        self.headers = {"User-Agent" : UA().random}
        self.currentHome = expanduser('~')
        self.desktop = join(self.currentHome + "/Desktop/")
        self.download = join(self.currentHome + "/Downloads/")
        self.pictures = join(self.currentHome + "/Pictures/JayRizzoDL/")
        self.outfile = ""
        self.filename = ""
        self.response = ""
        self.rawstream = ""
        self.createdfilepath = ""
        self.imgFileName = ""
        # Check if the JayRizzoDL exists in the pictures folder.
        # if it doesn't exist create it.
        if not filepathexist(self.pictures):
            os.mkdir(self.pictures)
        self.main()

    def getFileNameFromURL(self, URL: str):
        """Parse the URL for the name after the last forward slash."""
        NewFileName = self.url.strip().split('/')[-1].strip()
        return NewFileName

    def getResponse(self, URL: str):
        """Try streaming the URL for the raw data."""
        self.response = requests.get(self.url, headers=self.headers, stream=True)
        return self.response

    def gocreateFile(self, name: str, response):
        """Try creating the file with the raw data in a custom folder."""
        self.outfile = join(self.pictures, name)
        with open(self.outfile, 'wb') as outFilePath:
            shutil.copyfileobj(response.raw, outFilePath)
        return self.outfile

    def main(self):
        """Combine Everything and use in for loops."""
        self.filename = self.getFileNameFromURL(self.url)
        self.rawstream = self.getResponse(self.url)
        self.createdfilepath = self.gocreateFile(self.filename, self.rawstream)
        print(f"File was created: {self.createdfilepath}")
        return

if __name__ == '__main__':
    # Example when calling the file directly.
    ImageDownloader("https://stackoverflow.design/assets/img/logos/so/logo-stackoverflow.png")

- PRINCE ARYA · Answer 8

下载图像文件，避免所有可能的错误：

import requests
import validators
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError


def is_downloadable(url):
  valid=validators. url(url)
  if valid==False:
    return False
  req = Request(url)
  try:
    response = urlopen(req)
  except HTTPError as e:
    return False
  except URLError as e:
    return False
  else:
    return True



for i in range(len(File_data)):   #File data Contain list of address for image 
                                                      #file
  url = File_data[i][1]
  try:
    if (is_downloadable(url)):
      try:
        r = requests.get(url, allow_redirects=True)
        if url.find('/'):
          fname = url.rsplit('/', 1)[1]
          fname = pth+File_data[i][0]+"$"+fname #Destination to save 
                                                   #image file
          open(fname, 'wb').write(r.content)
      except Exception as e:
        print(e)
  except Exception as e:
    print(e)