From d53837451873cd9f71fec86823f2e81ecf7c93a3 Mon Sep 17 00:00:00 2001 From: MorrisLCY <33283279+MorrisLCY@users.noreply.github.com> Date: Sun, 11 Nov 2018 16:39:41 +0800 Subject: [PATCH] Add files via upload --- main.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 main.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..2f184c5 --- /dev/null +++ b/main.py @@ -0,0 +1,73 @@ + +#------------------------------------------- +# 匯入必要模組 +#------------------------------------------- +from selenium import webdriver +from html.parser import HTMLParser + + +#------------------------------------------- +# 定義一個HTML解譯類別 +#------------------------------------------- +class MyHTMLParser(HTMLParser): + content='' + print=False + + def handle_data(self, data): + if data.strip()=='驚奇4超人': + self.print=True + + if '期待度' in data.strip(): + self.print=False + if data.strip()=='劇情介紹': + self.print=True + + if '展開劇情簡介' in data.strip(): + self.print=False + + if self.print: + self.content+=data + + def get_content(self): + return self.content + +#------------------------------------------- +# 載入Chrome驅動程式 +#------------------------------------------- +driver = webdriver.Chrome("chromedriver.exe") + + +#------------------------------------------- +# 待拜訪的網址 +#------------------------------------------- +urls=[ + 'https://movies.yahoo.com.tw/movieinfo_main.html/id=5644' + ] + + +#------------------------------------------- +# 依序將範例網址交給瀏覽器 +#------------------------------------------- +for url in urls: + driver.get(url) + + # 取得網頁原始碼 + with open('out.txt', 'w', encoding='utf-8') as outfile: + pageSource = driver.page_source + + #------------------------------------------- + # 取出沒有標籤的內容 + #------------------------------------------- + parser = MyHTMLParser() + parser.feed(pageSource) + + content=parser.get_content() + print(content) + + outfile.write(content) + + +#------------------------------------------- +# 關閉Chrome驅動程式 +#------------------------------------------- +driver.close()