INVESTAR · darren9080 · Aug 4, 2020 · Sep 10, 2020 · Sep 10, 2020 · Sep 10, 2020
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/StockAnalysisInPython.iml b/.idea/StockAnalysisInPython.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/other.xml b/.idea/other.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/01_Stock_Investment/Investar/Analyzer.py b/01_Stock_Investment/Investar/Analyzer.py
@@ -0,0 +1,93 @@
+import pandas as pd
+import pymysql
+from datetime import datetime
+from datetime import timedelta
+import re
+
+class MarketDB:
+    def __init__(self):
+        """생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
+        self.conn = pymysql.connect(host='localhost', user='root', 
+            password='snake.land.', db='INVESTAR', charset='utf8')
+        self.codes = {}
+        self.get_comp_info()
+
+    def __del__(self):
+        """소멸자: MariaDB 연결 해제"""
+        self.conn.close()
+
+    def get_comp_info(self):
+        """company_info 테이블에서 읽어와서 codes에 저장"""
+        sql = "SELECT * FROM company_info"
+        krx = pd.read_sql(sql, self.conn)
+        for idx in range(len(krx)):
+            self.codes[krx['code'].values[idx]] = krx['company'].values[idx]
+
+    def get_daily_price(self, code, start_date=None, end_date=None):
+        """KRX 종목의 일별 시세를 데이터프레임 형태로 반환
+            - code       : KRX 종목코드('005930') 또는 상장기업명('삼성전자')
+            - start_date : 조회 시작일('2020-01-01'), 미입력 시 1년 전 오늘
+            - end_date   : 조회 종료일('2020-12-31'), 미입력 시 오늘 날짜
+        """
+        if start_date is None:
+            one_year_ago = datetime.today() - timedelta(days=365)
+            start_date = one_year_ago.strftime('%Y-%m-%d')
+            print("start_date is initialized to '{}'".format(start_date))
+        else:
+            start_lst = re.split('\D+', start_date)
+            if start_lst[0] == '':
+                start_lst = start_lst[1:]
+            start_year = int(start_lst[0])
+            start_month = int(start_lst[1])
+            start_day = int(start_lst[2])
+            if start_year < 1900 or start_year > 2200:
+                print(f"ValueError: start_year({start_year:d}) is wrong.")
+                return
+            if start_month < 1 or start_month > 12:
+                print(f"ValueError: start_month({start_month:d}) is wrong.")
+                return
+            if start_day < 1 or start_day > 31:
+                print(f"ValueError: start_day({start_day:d}) is wrong.")
+                return
+            start_date=f"{start_year:04d}-{start_month:02d}-{start_day:02d}"
+
+        if end_date is None:
+            end_date = datetime.today().strftime('%Y-%m-%d')
+            print("end_date is initialized to '{}'".format(end_date))
+        else:
+            end_lst = re.split('\D+', end_date)
+            if end_lst[0] == '':
+                end_lst = end_lst[1:] 
+            end_year = int(end_lst[0])
+            end_month = int(end_lst[1])
+            end_day = int(end_lst[2])
+            if end_year < 1800 or end_year > 2200:
+                print(f"ValueError: end_year({end_year:d}) is wrong.")
+                return
+            if end_month < 1 or end_month > 12:
+                print(f"ValueError: end_month({end_month:d}) is wrong.")
+                return
+            if end_day < 1 or end_day > 31:
+                print(f"ValueError: end_day({end_day:d}) is wrong.")
+                return
+            end_date = f"{end_year:04d}-{end_month:02d}-{end_day:02d}"
+
+        codes_keys = list(self.codes.keys())
+        codes_values = list(self.codes.values())
+
+        if code in codes_keys:
+            pass
+        elif code in codes_values:
+            idx = codes_values.index(code)
+            code = codes_keys[idx]
+        else:
+            print(f"ValueError: Code({code}) doesn't exist.")
+        sql = f"SELECT * FROM daily_price WHERE code = '{code}'"\
+            f" and date >= '{start_date}' and date <= '{end_date}'"
+        df = pd.read_sql(sql, self.conn)
+        df.index = df['date']
+        return df 
+
+
+
+
diff --git a/01_Stock_Investment/Investar/DBUpdater.py b/01_Stock_Investment/Investar/DBUpdater.py
@@ -0,0 +1,170 @@
+
+import pandas as pd
+from bs4 import BeautifulSoup
+import urllib, pymysql, calendar, time, json
+from urllib.request import urlopen
+from datetime import datetime
+from threading import Timer
+
+class DBUpdater:  
+    def __init__(self):
+        """생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
+        self.conn = pymysql.connect(host='localhost', user='root',
+            password='snake.land.', db='INVESTAR', charset='utf8')
+
+        with self.conn.cursor() as curs:
+            sql = """
+            CREATE TABLE IF NOT EXISTS company_info (
+                code VARCHAR(20),
+                company VARCHAR(40),
+                last_update DATE,
+                PRIMARY KEY (code))
+            """
+            curs.execute(sql)
+            sql = """
+            CREATE TABLE IF NOT EXISTS daily_price (
+                code VARCHAR(20),
+                date DATE,
+                open BIGINT(20),
+                high BIGINT(20),
+                low BIGINT(20),
+                close BIGINT(20),
+                diff BIGINT(20),
+                volume BIGINT(20),
+                PRIMARY KEY (code, date))
+            """
+            curs.execute(sql)
+        self.conn.commit()
+        self.codes = dict()
+
+    def __del__(self):
+        """소멸자: MariaDB 연결 해제"""
+        self.conn.close() 
+
+    def read_krx_code(self):
+        """KRX로부터 상장기업 목록 파일을 읽어와서 데이터프레임으로 반환"""
+        url = 'http://kind.krx.co.kr/corpgeneral/corpList.do?method='\
+            'download&searchType=13'
+        krx = pd.read_html(url, header=0)[0]
+        krx = krx[['종목코드', '회사명']]
+        krx = krx.rename(columns={'종목코드': 'code', '회사명': 'company'})
+        krx.code = krx.code.map('{:06d}'.format)
+        return krx
+
+    def update_comp_info(self):
+        """종목코드를 company_info 테이블에 업데이트 한 후 딕셔너리에 저장"""
+        sql = "SELECT * FROM company_info"
+        df = pd.read_sql(sql, self.conn)
+        for idx in range(len(df)):
+            self.codes[df['code'].values[idx]] = df['company'].values[idx]
+
+        with self.conn.cursor() as curs:
+            sql = "SELECT max(last_update) FROM company_info"
+            curs.execute(sql)
+            rs = curs.fetchone()
+            today = datetime.today().strftime('%Y-%m-%d')
+            if rs[0] == None or rs[0].strftime('%Y-%m-%d') < today:
+                krx = self.read_krx_code()
+                for idx in range(len(krx)):
+                    code = krx.code.values[idx]
+                    company = krx.company.values[idx]                
+                    sql = f"REPLACE INTO company_info (code, company, last"\
+                        f"_update) VALUES ('{code}', '{company}', '{today}')"
+                    curs.execute(sql)
+                    self.codes[code] = company
+                    tmnow = datetime.now().strftime('%Y-%m-%d %H:%M')
+                    print(f"[{tmnow}] #{idx+1:04d} REPLACE INTO company_info "\
+                        f"VALUES ({code}, {company}, {today})")
+                self.conn.commit()
+                print('')              
+
+    def read_naver(self, code, company, pages_to_fetch):
+        """네이버에서 주식 시세를 읽어서 데이터프레임으로 반환"""
+        try:
+            url = f"http://finance.naver.com/item/sise_day.nhn?code={code}"
+            with urlopen(url) as doc:
+                if doc is None:
+                    return None
+                html = BeautifulSoup(doc, "lxml")
+                pgrr = html.find("td", class_="pgRR")
+                if pgrr is None:
+                    return None
+                s = str(pgrr.a["href"]).split('=')
+                lastpage = s[-1] 
+            df = pd.DataFrame()
+            pages = min(int(lastpage), pages_to_fetch)
+            for page in range(1, pages + 1):
+                pg_url = '{}&page={}'.format(url, page)
+                df = df.append(pd.read_html(pg_url, header=0)[0])
+                tmnow = datetime.now().strftime('%Y-%m-%d %H:%M')
+                print('[{}] {} ({}) : {:04d}/{:04d} pages are downloading...'.
+                    format(tmnow, company, code, page, pages), end="\r")
+            df = df.rename(columns={'날짜':'date','종가':'close','전일비':'diff'
+                ,'시가':'open','고가':'high','저가':'low','거래량':'volume'})
+            df['date'] = df['date'].replace('.', '-')
+            df = df.dropna()
+            df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close',
+                'diff', 'open', 'high', 'low', 'volume']].astype(int)
+            df = df[['date', 'open', 'high', 'low', 'close', 'diff', 'volume']]
+        except Exception as e:
+            print('Exception occured :', str(e))
+            return None
+        return df
+
+    def replace_into_db(self, df, num, code, company):
+        """네이버에서 읽어온 주식 시세를 DB에 REPLACE"""
+        with self.conn.cursor() as curs:
+            for r in df.itertuples():
+                sql = f"REPLACE INTO daily_price VALUES ('{code}', "\
+                    f"'{r.date}', {r.open}, {r.high}, {r.low}, {r.close}, "\
+                    f"{r.diff}, {r.volume})"
+                curs.execute(sql)
+            self.conn.commit()
+            print('[{}] #{:04d} {} ({}) : {} rows > REPLACE INTO daily_'\
+                'price [OK]'.format(datetime.now().strftime('%Y-%m-%d'\
+                ' %H:%M'), num+1, company, code, len(df)))
+
+    def update_daily_price(self, pages_to_fetch):
+        """KRX 상장법인의 주식 시세를 네이버로부터 읽어서 DB에 업데이트"""  
+        for idx, code in enumerate(self.codes):
+            df = self.read_naver(code, self.codes[code], pages_to_fetch)
+            if df is None:
+                continue
+            self.replace_into_db(df, idx, code, self.codes[code])            
+
+    def execute_daily(self):
+        """실행 즉시 및 매일 오후 다섯시에 daily_price 테이블 업데이트"""
+        self.update_comp_info()
+
+        try:
+            with open('config.json', 'r') as in_file:
+                config = json.load(in_file)
+                pages_to_fetch = config['pages_to_fetch']
+        except FileNotFoundError:
+            with open('config.json', 'w') as out_file:
+                pages_to_fetch = 100 
+                config = {'pages_to_fetch': 1}
+                json.dump(config, out_file)
+        self.update_daily_price(pages_to_fetch)
+
+        tmnow = datetime.now()
+        lastday = calendar.monthrange(tmnow.year, tmnow.month)[1]
+        if tmnow.month == 12 and tmnow.day == lastday:
+            tmnext = tmnow.replace(year=tmnow.year+1, month=1, day=1,
+                hour=17, minute=0, second=0)
+        elif tmnow.day == lastday:
+            tmnext = tmnow.replace(month=tmnow.month+1, day=1, hour=17,
+                minute=0, second=0)
+        else:
+            tmnext = tmnow.replace(day=tmnow.day+1, hour=17, minute=0,
+                second=0)   
+        tmdiff = tmnext - tmnow
+        secs = tmdiff.seconds
+        t = Timer(secs, self.execute_daily)
+        print("Waiting for next update ({}) ... ".format(tmnext.strftime
+            ('%Y-%m-%d %H:%M')))
+        t.start()
+
+if __name__ == '__main__':
+    dbu = DBUpdater()
+    dbu.execute_daily()
diff --git a/01_Stock_Investment/Investar/MarketDB.py b/01_Stock_Investment/Investar/MarketDB.py
@@ -0,0 +1,38 @@
+import pandas as pd
+#from bs4 import BeautifulSoup
+#import urllib
+#from urllib.request import urlopen
+import pymysql
+#import time
+#import pandas.io.sql as sql
+from datetime import datetime
+#from threading import Timer
+#import matplotlib.pyplot as plt
+
+class MarketDB:
+    def __init__(self):
+        """생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
+        self.conn = pymysql.connect(host='localhost', user='root', password='snake.land.', db='INVESTAR', charset='utf8')
+        self.codes = dict()
+        self.getCompanyInfo()
+
+    def __del__(self):
+        """소멸자: MariaDB 연결 해제"""
+        self.conn.close()
+
+    def getCompanyInfo(self):
+        """company_info 테이블에서 읽어와서 companyData와 codes에 저장"""
+        sql = "SELECT * FROM company_info"
+        companyInfo = pd.read_sql(sql, self.conn)
+        for idx in range(len(companyInfo)):
+            self.codes[companyInfo['code'].values[idx]] = companyInfo['company'].values[idx]
+
+    def getDailyPrice(self, code, startDate, endDate):
+        """daily_price 테이블에서 읽어와서 데이터프레임으로 반환"""
+        sql = "SELECT * FROM daily_price WHERE code = '{}' and date >= '{}' and date <= '{}'".format(code, startDate, endDate)
+        df = pd.read_sql(sql, self.conn)
+        df.index = df['date']
+        return df
+
+
+
diff --git a/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py b/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py
@@ -6,6 +6,8 @@
 dow = pdr.get_data_yahoo('^DJI', '2000-01-04')
 kospi = pdr.get_data_yahoo('^KS11', '2000-01-04')
 
+dow
+
 df = pd.DataFrame({'DOW' dow['Close'], 'KOSPI' kospi['Close']})
 df = df.fillna(method='bfill')
 df = df.fillna(method='ffill')

diff --git a/README.md b/README.md
@@ -8,4 +8,5 @@
 
 - 서적에 삽입된 그림의 PPT 원본은 PowerPoint_Materials.pptx 파일에 있습니다.
 
-![Portpolio_optimization](./06_Trading_Strategy/imgs/Portpolio_optimization.jpg)
+![Portpolio_optimization](06_Trading_Strategy/imgs/Portpolio_optimization.jpg)
+
diff --git a/stockTest/getData.py b/stockTest/getData.py
@@ -0,0 +1,3 @@
+import pandas as pd
+import numpy as np
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,4 +8,5 @@

		- 서적에 삽입된 그림의 PPT 원본은 PowerPoint_Materials.pptx 파일에 있습니다.

		![Portpolio_optimization](./06_Trading_Strategy/imgs/Portpolio_optimization.jpg)
		![Portpolio_optimization](06_Trading_Strategy/imgs/Portpolio_optimization.jpg)