使用Python脚本翻译URL时,请使用Google翻译。

4

我正在尝试从Python脚本使用Google翻译:

#!/usr/bin/env python
from urllib2 import urlopen
from urllib import urlencode

base_url = "http://www.google.com/translate?"
params = (('langpair','en|es'), ('u','http://asdf.com'),)
url = base_url+urlencode(params)
print "Encoded URL: %s" % url 
print urlopen(url).read()

当我使用它时,我遇到了403错误。

# ./1.py 
Encoded URL: http://www.google.com/translate?langpair=en%7Ces&u=http%3A%2F%2Fasdf.com
Traceback (most recent call last):
...
urllib2.HTTPError: HTTP Error 403: Forbidden

然而,当通过浏览器访问相同的URL时却没有问题。有人能找出错误吗?还是说Google不允许这种用法?提前致谢。
5个回答

4
如果 Google 不允许您这样做,您可以通过 Google 的 API 编程地翻译正常网站的源代码。我之前写了一个函数来实现这个功能:
def translate(text, src = '', to = 'en'):
  parameters = ({'langpair': '{0}|{1}'.format(src, to), 'v': '1.0' })
  translated = ''

  for text in (text[index:index + 4500] for index in range(0, len(text), 4500)):
    parameters['q'] = text
    response = json.loads(urllib.request.urlopen('http://ajax.googleapis.com/ajax/services/language/translate', data = urllib.parse.urlencode(parameters).encode('utf-8')).read().decode('utf-8'))

    try:
      translated += response['responseData']['translatedText']
    except:
      pass

  return translated

4

你应该使用谷歌API。我找到并测试了这段代码,它可以正常工作:

#!/usr/bin/env python
from urllib2 import urlopen
from urllib import urlencode
import sys

lang1=sys.argv[1] lang2=sys.argv[2] langpair='%s|%s'%(lang1,lang2) text=' '.join(sys.argv[3:]) base_url='http://ajax.googleapis.com/ajax/services/language/translate?' params=urlencode( (('v',1.0), ('q',text), ('langpair',langpair),) ) url=base_url+params content=urlopen(url).read() start_idx=content.find('"translatedText":"')+18 translation=content[start_idx:] end_idx=translation.find('"}, "') translation=translation[:end_idx] print translation

source


然而,这是要翻译文本,而不是网页(保留HTML标签)。是否可能翻译整个网页而不是仅限于文本? - facha

1

我该如何使用Google API翻译整个网页(而不是文本)? - facha

0

0

你可以使用更好的Python代码来使用Google进行翻译:

来源:https://neculaifantanaru.com/en/python-code-text-google-translate-website-translation-beautifulsoup-new.html

from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
import requests
import sys
import os

class UnsortedAttributes(HTMLFormatter):
    def attributes(self, tag):
        for k, v in tag.attrs.items():
            yield k, v

files_from_folder = r"c:\Folder2"
use_translate_folder = True
destination_language = 'vi'  #aici schimbi limba in care vrei sa traduci
extension_file = ".html"
directory = os.fsencode(files_from_folder)

def translate(text, target_language):
    url = "https://translate.google.com/translate_a/single"
    headers = {
        "Host": "translate.google.com",
        "Accept": "*/*",
        "Cookie": "",
        "User-Agent": "GoogleTranslate/5.9.59004 (iPhone; iOS 10.2; ja; iPhone9,1)",
        "Accept-Language": "fr",
        "Accept-Encoding": "gzip, deflate",
        "Connection": "keep-alive",
        }
    sentence = text
    params = {
        "client": "it",
        "dt": ["t", "rmt", "bd", "rms", "qca", "ss", "md", "ld", "ex"],
        "otf": "2",
        "dj": "1",
        "q": sentence,
        "hl": "ja",
        "ie": "UTF-8",
        "oe": "UTF-8",
        "sl": "en",
        "tl": target_language,
        }

    res = requests.get(
        url=url,
        headers=headers,
        params=params,
        )

    res = res.json()

    paragraph = ''
    for i in range(0, len(res["sentences"])):
        paragraph += res["sentences"][i]["trans"]

    return paragraph

def recursively_translate(node):
    for x in range(len(node.contents)):
        if isinstance(node.contents[x], str):
            if node.contents[x].strip() != '':
                try:
                    node.contents[x].replaceWith(translate(text=node.contents[x], target_language=destination_language))
                except:
                    pass
        elif node.contents[x] != None:
            recursively_translate(node.contents[x])

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    print(filename)
    if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html': #ignore this 2 files
        continue
    if filename.endswith(extension_file):
        with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
            soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
            for title in soup.findAll('title'):
                recursively_translate(title)

            for meta in soup.findAll('meta', {'name':'description'}):
                try:
                    meta['content'] = translate(text=meta['content'], target_language=destination_language)
                except:
                    pass

            for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h1)) < end_comment:
                    recursively_translate(h1)

            for p in soup.findAll('p', class_='text_obisnuit'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(p)) < end_comment:
                    recursively_translate(p)

            for p in soup.findAll('p', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(p)) < end_comment:
                    recursively_translate(p)

            for span in soup.findAll('span', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(span)) < end_comment:
                    recursively_translate(span)

            for li in soup.findAll('li', class_='text_obisnuit'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(li)) < end_comment:
                    recursively_translate(li)

            for a in soup.findAll('a', class_='linkMare'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(a)) < end_comment:
                    recursively_translate(a)

            for h4 in soup.findAll('h4', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h4)) < end_comment:
                    recursively_translate(h4)

            for h5 in soup.findAll('h5', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h5)) < end_comment:
                    recursively_translate(h5)

            for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_webinar'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h1)) < end_comment:
                    recursively_translate(h1)

        print(f'{filename} translated')
        soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
        new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
        if use_translate_folder:
            try:
                with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
                    new_html.write(soup[5:-6])
            except:
                os.mkdir(files_from_folder+r'\translated')
                with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
                    new_html.write(soup[5:-6])
        else:
            with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
                html.write(soup[5:-6])

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接