Python imaplib获取Gmail邮件正文

14

我已经阅读过这篇文章,并编写了以下脚本来获取某个邮箱中以"$"开头且由特定发件人发送的电子邮件正文。

import email, getpass, imaplib, os

detach_dir = "F:\PYTHONPROJECTS" # where you will save attachments
user = raw_input("Enter your GMail username --> ")
pwd = getpass.getpass("Enter your password --> ")

# connect to the gmail imap server
m = imaplib.IMAP4_SSL("imap.gmail.com")
m.login(user, pwd)

m.select("PETROLEUM") # here you a can choose a mail box like INBOX instead
# use m.list() to get all the mailboxes

resp, items = m.search(None, '(FROM "EIA_eLists@eia.gov")')
items = items[0].split() # getting the mails id

my_msg = [] # store relevant msgs here in please
msg_cnt = 0
break_ = False
for emailid in items[::-1]:
    resp, data = m.fetch(emailid, "(RFC822)")
    if ( break_ ):
        break 
    for response_part in data:
      if isinstance(response_part, tuple):
          msg = email.message_from_string(response_part[1])
          varSubject = msg['subject']
          if varSubject[0] == '$':
              msg_cnt += 1
              my_msg.append(msg)
              print msg_cnt
              print email.message_from_string(response_part[1])
              if ( msg_cnt == 5 ):
                  break_ = True 
如果我打印email.message_from_string(response_part[1]),我可以看到它包含了第一个信息(标题、发件人、收件人、日期等),然后是完整的正文。但是,我无法获取正文本身。email.message_from_string(response_part[0])会打印出邮件ID,而email.message_from_string(response_part[2])超出了范围。email.message_from_string(response_part[1][0])也不起作用。

谢谢和问候。

更新

现在,我几乎可以获得正文文本。然而,它仍然被一个首先出现的信息声明破坏了。我的结果是

From nobody Tue Dec 25 11:42:58 2012

US=3D$4.030

EastCst=3D$4.036

NewEng=3D$4.205

CenAtl=3D$4.149

LwrAtl=3D$3.921

Midwst=3D$3.984

GulfCst=3D$3.945

RkyMt=3D$4.195

WCst=3D$4.187

CA=3D$4.268

我希望去掉 From nobody Tue Dec 25 11:42:58 2012 这段信息。我知道可以解析文本并查找第一行相关的内容... 我知道。

实现这个功能的代码(插入我的第一个示例)是:

  if varSubject[0] == '$':
      r, d = m.fetch(emailid, "(UID BODY[TEXT])")
      msg_cnt += 1
      my_msg.append(msg)
      print email.message_from_string(d[0][1])

你有更好的方法吗(无信息字符串)?更多信息:现在获取日期的命令是什么? 我知道我可以在上面适当的情况下使用varDate = msg ['date'],但如何只获取日-月-年?谢谢。

4个回答

14

我已经成功使用Gmail使这个程序工作了,它会提取有用的部分并将其输出到文本文件中:

import datetime
import email
import imaplib
import mailbox


EMAIL_ACCOUNT = "your@gmail.com"
PASSWORD = "your password"

mail = imaplib.IMAP4_SSL('imap.gmail.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.list()
mail.select('inbox')
result, data = mail.uid('search', None, "UNSEEN") # (ALL/UNSEEN)
i = len(data[0].split())

for x in range(i):
    latest_email_uid = data[0].split()[x]
    result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
    # result, email_data = conn.store(num,'-FLAGS','\\Seen') 
    # this might work to set flag to seen, if it doesn't already
    raw_email = email_data[0][1]
    raw_email_string = raw_email.decode('utf-8')
    email_message = email.message_from_string(raw_email_string)

    # Header Details
    date_tuple = email.utils.parsedate_tz(email_message['Date'])
    if date_tuple:
        local_date = datetime.datetime.fromtimestamp(email.utils.mktime_tz(date_tuple))
        local_message_date = "%s" %(str(local_date.strftime("%a, %d %b %Y %H:%M:%S")))
    email_from = str(email.header.make_header(email.header.decode_header(email_message['From'])))
    email_to = str(email.header.make_header(email.header.decode_header(email_message['To'])))
    subject = str(email.header.make_header(email.header.decode_header(email_message['Subject'])))

    # Body details
    for part in email_message.walk():
        if part.get_content_type() == "text/plain":
            body = part.get_payload(decode=True)
            file_name = "email_" + str(x) + ".txt"
            output_file = open(file_name, 'w')
            output_file.write("From: %s\nTo: %s\nDate: %s\nSubject: %s\n\nBody: \n\n%s" %(email_from, email_to,local_message_date, subject, body.decode('utf-8')))
            output_file.close()
        else:
            continue

6

3
from imap_tools import MailBox

# get all attachments from INBOX and save them to files
with MailBox('imap.my.ru').login('acc', 'pwd', 'INBOX') as mailbox:
    for msg in mailbox.fetch():
        print(msg.text)
        print(msg.html)
    

这需要外部库的支持,但操作更加简单。

https://github.com/ikvk/imap_tools


-1
import datetime
import email
import imaplib
import mailbox
import re

EMAIL_ACCOUNT = "yourmail@yahoo.com"
PASSWORD = "password"
mail = imaplib.IMAP4_SSL('imap.mail.yahoo.com')
mail.login(EMAIL_ACCOUNT, PASSWORD)
mail.select('INBOX')
result, data = mail.search(None, '(FROM "Sender Email")','ALL')
result, data = mail.search(None, '(SUBJECT "Message")','ALL')
i = len(data[0].split())

if i == 1:
   latest_email_uid = data[0].split()[0]
   result, email_data = mail.uid('fetch', latest_email_uid, '(RFC822)')
   raw_email = email_data[0][1]
   raw_email_string = raw_email.decode('utf-8')
   email_message = email.message_from_string(raw_email_string)
   body = email_message.get_payload(decode=True)
   for part in email_message.walk():
        if part.get_content_type() == "text/plain":
           emailBody = part.get_payload(decode=True)
           print(emailBody)            
        else:
           continue
else:
  print('Email NOT ' + EMAIL_ACCOUNT  )

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接