From 745f8206e4ac41c3b6d3c5531b38e2af412e1443 Mon Sep 17 00:00:00 2001 From: cse0518 <60170616+cse0518@users.noreply.github.com> Date: Thu, 3 Dec 2020 23:45:09 +0900 Subject: [PATCH 1/2] =?UTF-8?q?=EA=B3=BC=EC=A0=9C=20commit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crawler.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++-- today_weather.py | 27 +++++++++++++++++++++++- 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/crawler.py b/crawler.py index e2803ef..7f71ce6 100644 --- a/crawler.py +++ b/crawler.py @@ -10,6 +10,9 @@ from today_weather import ClimateHumidity from today_weather import ClimateWind from today_weather import TimeClimate +from today_weather import box_item +from today_weather import WeeklyClimate +from today_weather import Weekly def parse_now_temperature(today_weather_tag): now_temperature_tag = today_weather_tag.select_one('div.weather_area > strong') @@ -136,6 +139,53 @@ def parse_time_climate(soup): return TimeClimate(climate_rains,climate_humidities,climate_winds) +def box_today(box_color): + today_tags = box_color.select( + 'li.item > span > span > strong.ttl').text + today_twoone_tags = box_color.select( + 'li.item > span > span > strong.ttl > span.sub').text + today_rain_tags = box_color.select( + 'li.item > span > span > strong.rainfall > span.blind').text + today_rainfall_tags = box_color.select( + 'li.item > span > span > strong.rainfall').text + today_rainfall_i_tags = box_color.select( + 'li.item > span > span > i > span.blind').text + today_temp_tags = box_color.select( + 'li.item > span > span > span.data > span.blind').text + today_temp_de_tags = box_color.select( + 'li.item > span > span > span.data').text + today_temp_o_tags = box_color.select( + 'li.item > span > span > span.data > span.degree').text + return [box_item(today_tags,today_twoone_tags,today_rain_tags,today_rainfall_tags,today_rainfall_i_tags,today_temp_tags,today_temp_de_tags,today_temp_o_tags) + for today_tags,today_twoone_tags,today_rain_tags,today_rainfall_tags,today_rainfall_i_tags,today_temp_tags,today_temp_de_tags,today_temp_o_tags + in zip(today_tags,today_twoone_tags,today_rain_tags,today_rainfall_tags,today_rainfall_i_tags,today_temp_tags,today_temp_de_tags,today_temp_o_tags)] + +def scroll(scroll): + scroll_values = [] + for scroll_value_li in scroll: + cell_date = scroll_value_li.select_one('div > div.cell_date > span') + cell_weather = scroll_value_li.select_one('div > div.cell_weather > span') + cell_temperature = scroll_value_li.select_one('div > div.cell_temperature > span') + scroll_value = scroll_value_tag.text.strip() + if 'colspan' in scroll_value_li.attrs: + for i in range(int(scroll_value_li['colspan'])): + scroll_values.append(scroll_value) + else: + scroll_values.append(scroll_value) + + return [WeeklyClimate(cell_date, cell_weather, cell_temperature) for cell_date, cell_weather, cell_temperature + in zip(cell_date, cell_weather, cell_temperature)] + +def parse_weekly(soup): + weekly_tag = soup.select_one('#weekly') + + box_color = weekly_tag.select_one('ul.box_color') + scroll = weekly_tag.select_one('div.scroll_control end_left > div.scroll_area > ul.week_list') + + week_box_color = box_today(box_color) + week_scroll = scroll(scroll) + + return Weekly(week_box_color,week_scroll) def crawl(): url = 'https://weather.naver.com/' @@ -148,13 +198,13 @@ def crawl(): soup = BeautifulSoup(html,'lxml') today_weather = parse_today_weather(soup) - print(today_weather) time_climate = parse_time_climate(soup) - print(time_climate) + weekly = parse_weekly(soup) + print(weekly) if __name__ == "__main__": crawl() \ No newline at end of file diff --git a/today_weather.py b/today_weather.py index 70e9f28..37498a4 100644 --- a/today_weather.py +++ b/today_weather.py @@ -119,4 +119,29 @@ def __str__(self): f'{self.time_climate}' def add_time_climate(self,time_climate): - self.time_climate = time_climate \ No newline at end of file + self.time_climate = time_climate + +class box_item: + def __init__(self,today_tags,today_twoone_tags,today_chart_level_tags): + self.today_tags = today_tags + self.today_twoone_tags = today_twoone_tags + self.today_chart_level_tags = today_chart_level_tags + + def __str__(self): + return f'{self.today_tags}\t{self.today_twoone_tags}\t{self.today_rain_tags}\t{self.today_rainfall_tags}\t{self.today_rainfall_i_tags}' \ + f'\t{self.today_temp_tags}\t{self.today_temp_de_tags}\t{self.today_temp_o_tags}\n' + +class WeeklyClimate: + def __init__(self, cell_date, cell_weather, cell_temperature): + self.cell_date = cell_date + self.cell_weather = cell_weather + self.cell_temperature = cell_temperature + + def __str__(self): + return f'{self.cell_date}\t{self.cell_weather}\t{self.cell_temperature}\\n' + +class Weekly: + def __init__(self, cell_date, cell_weather, cell_temperature): + self.cell_date = cell_date + self.cell_weather = cell_weather + self.cell_temperature = cell_temperature \ No newline at end of file From 201f8017da005cd682e3ae61bb15dc55e1479863 Mon Sep 17 00:00:00 2001 From: cse0518 <60170616+cse0518@users.noreply.github.com> Date: Fri, 4 Dec 2020 00:00:08 +0900 Subject: [PATCH 2/2] Add files via upload --- __pycache__/today_weather.cpython-38.pyc | Bin 0 -> 6685 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 __pycache__/today_weather.cpython-38.pyc diff --git a/__pycache__/today_weather.cpython-38.pyc b/__pycache__/today_weather.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ffc3c9f69e4cc1ed778764b721ded7c2f5de52c8 GIT binary patch literal 6685 zcmbtY+m9Pp8J~;q_Ih`-yV(Rt71}~$Afeo$Bw=A&dO@-RX%#SwMviBaIC$;t89V7( zyAmQ%DWHmiRMd(tgolc*ghYf&)rbB8Jo7lOJSFS=1Cdbq{k}6}kG(rdw6S$A-0KO4jnivM?1&l;}bn%7aTSv69YYlE^unV?c` z8dMsT1uElaL1jVNpmJ^=R320cRKYERDuPObD!Egjra)ytmECDj)1b1TX53j&v!HUI z4!Com=0N2^&ASVr7C;q19ds8#ErKe#ht3+6!wu|s>}P>mLh(O^$}?)FYt$^)bgk<~ z&30`!g%&(uZYC(a*m3LY%a`h^+n@}hANyQq8O47A)dp5LY-|_~dzSP}^la%_=%u7* zZa5-iq{I}d#$dj zPmV6g=#^%t<#ioZUk_g{`MtJk`kvEmu6pscuGCvSZ`WFSbk|xkT(N_c@3k%j2fGxH z)1Z7DdE~U3es_Fa_A2AshMEN{cTX=rwephhDSu`8Qm46cW}OSHo>864UZd-;G*ta+ z%Tvd%diCxlPp#;Wgw%<(^&sas&33cvI24RuMrBw9Q~uKS=SIhNB6_z$c}73>r%~km zPNU^Wo7`%Q$KcH2~(EOOP=0gyq~ar~}w9P;QBLx$NuFeVgvT-{DnnC<$i=Qu&Z zaaKESuf=}JabE7#Tl$Hb!vboN6&Z?E{FtIJS^N~LoRurdNCU*!kK0&6c@VuT$zIR) zR#)q4Jz4Xwi`aV)LU)3&F*@|)(7wi)(j4ettgnE6c$BEN%~@N&5VYv z*=qC(n1i)dry&{n#fY zM6P5gf)s#dZa|ujC5G9oZo}}=eAY^#HFCP%E8U=o71umf@Ai~eNohPhhIN8$_?mhg z)TEe*gwLY*8B_+4kTLs*Cia;O1Wu9-Q#MIJ*vjDGQ`NgxKHtwA#Ve!>^_f& z(b+BT-q8Jy?r!iQuf?{vYt#?D`H%lia ze$WXIDAM9%cw1FiO<FAiYtWA^d{{|dC!wu@~ z)5bOPujV4mQzc#PA1NG9{uKga%$e~!l=WE@AJLm}5i9!5Iwl4^8T*G_+fi}NcP=t}>IapnaQCr-9Hje5&J zjZQQRa-~Rv)a6dIErP9Bq93)H*nx-^O?+G2%~$wQx<11!S(Z8fF+BT7ksGz#BEzWh9)Sksm}TO`DP0qBuGo- zv>h`XijGZ@`$?$~hfkrzjmCUzXG0`bvEE@W@CjC*WW~9$^+pEwNj&%#iYR@~RHXMM zqv>!b&xOu$U%Dz166PbVq{tGbvBZt@cttA@P3&MK5A`*?vM2jr$Aj;nMC_B^7yA)J z&LK?LH$=d+KqT)r6~f_Wr>$iXhsQU`N)kMtAuBPT62XJJdJu6?)1T%Vv@1XZ>11&{ zKoTX+H$0d9I$zNq^rTO}klWL@v-d zbC~2bc&5>head?o#eWo4JWAFH&Iax`9Gj`6BRO)#s>+!mO`dLdt|CGkW;jYx-J(u) z#igaomGB(t;F*FgyOi_hH zVvUw~kJOkcGOaCJ+~#cM#g_CRDV(~;uh2hokEf>!>bv-0B4FBh7};Q*36D4tC^mRB z(T{x=VT8%PKB`eFW2$E`8b;C5pAcg(Q+HjR+=NH4jtS8Yjz}&Z?O^iFY9+I0(8!El zeV^6HE=o`G^(8#;P(&?qW-ga2DvmW!#(rdM3FSd(7HzkH+9bQe%@LSjrG29rM`p>sIRgbX~Pf_zv5oCe-KC<9Q)`H7uDo+{Ztxd~H8F(QeqWNM1}ibUxW(1=43{S!&a^w>sa%iL{{JE?DbPt$n% ZRHpKLd8V8%7t5J)8l_yGDle7`{|EA-X;lCK literal 0 HcmV?d00001