import imaplib,email,re from email.header import decode_header from bs4 import BeautifulSoup from wordpress_xmlrpc import Client,WordPressPost,WordPressPost,WordPressTerm from wordpress_xmlrpc.methods import posts from wordpress_xmlrpc.methods.posts import NewPost,EditPost from wordpress_xmlrpc.methods.users import GetUserInfo
# 這樣子可以傳回中文字 def decode_subject(subject,encoding): if encoding == None: return subject else: return subject.decode(encoding) # 用意跟上面的一樣, 只是為了處理編碼的問題 def showmessage(mail): if mail.is_multipart(): for part in mail.get_payload(): showmessage(part) else: type = mail.get_content_charset() if type == None: return mail.get_payload() else: try: return mail.get_payload(decode=1).decode(type) except UnicodeDecodeError: return mail conn = imaplib.IMAP4_SSL("imap.gmail.com", 993) conn.login('account','password) conn.select("inbox") # connect to inbox. typ, data = conn.search(None, 'ALL') try: for num in data[0].split()[-1:]: #從最新的信件開始讀起 typ, msg_data = conn.fetch(num, '(RFC822)') for response_part in msg_data: if isinstance(response_part, tuple): msg = email.message_from_string(response_part[1].decode()) subject,encoding = decode_header(msg['subject'])[0] mailsubject = decode_subject(subject,encoding) #_from = msg['from'] body = showmessage(msg) # 編碼轉換 # 分析並將轉錄到wordpress上 for link in soup.find_all("table")[8:]:#start with 8th table element content = link.encode('big5').decode('cp950') if content.find('class="contentword"') > -1: news = BeautifulSoup(content) for part in news.find_all('tr'): td = part.find_all("td") title = td[0].get_text() puretext = re.sub('(\xa0)+',' ',td[1].get_text()) if title == '公布日期': publishdate = puretext elif title=='標題': posttitle = puretext elif title=='詳細內容': postcontent = puretext postToWP.post(posttitle,postcontent,"post tag") move_mail(conn,num,'Archive.Today') finally: try: conn.close() except: pass conn.logout()