Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions .idea/StockAnalysisInPython.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/other.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

93 changes: 93 additions & 0 deletions 01_Stock_Investment/Investar/Analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import pandas as pd
import pymysql
from datetime import datetime
from datetime import timedelta
import re

class MarketDB:
def __init__(self):
"""생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
self.conn = pymysql.connect(host='localhost', user='root',
password='snake.land.', db='INVESTAR', charset='utf8')
self.codes = {}
self.get_comp_info()

def __del__(self):
"""소멸자: MariaDB 연결 해제"""
self.conn.close()

def get_comp_info(self):
"""company_info 테이블에서 읽어와서 codes에 저장"""
sql = "SELECT * FROM company_info"
krx = pd.read_sql(sql, self.conn)
for idx in range(len(krx)):
self.codes[krx['code'].values[idx]] = krx['company'].values[idx]

def get_daily_price(self, code, start_date=None, end_date=None):
"""KRX 종목의 일별 시세를 데이터프레임 형태로 반환
- code : KRX 종목코드('005930') 또는 상장기업명('삼성전자')
- start_date : 조회 시작일('2020-01-01'), 미입력 시 1년 전 오늘
- end_date : 조회 종료일('2020-12-31'), 미입력 시 오늘 날짜
"""
if start_date is None:
one_year_ago = datetime.today() - timedelta(days=365)
start_date = one_year_ago.strftime('%Y-%m-%d')
print("start_date is initialized to '{}'".format(start_date))
else:
start_lst = re.split('\D+', start_date)
if start_lst[0] == '':
start_lst = start_lst[1:]
start_year = int(start_lst[0])
start_month = int(start_lst[1])
start_day = int(start_lst[2])
if start_year < 1900 or start_year > 2200:
print(f"ValueError: start_year({start_year:d}) is wrong.")
return
if start_month < 1 or start_month > 12:
print(f"ValueError: start_month({start_month:d}) is wrong.")
return
if start_day < 1 or start_day > 31:
print(f"ValueError: start_day({start_day:d}) is wrong.")
return
start_date=f"{start_year:04d}-{start_month:02d}-{start_day:02d}"

if end_date is None:
end_date = datetime.today().strftime('%Y-%m-%d')
print("end_date is initialized to '{}'".format(end_date))
else:
end_lst = re.split('\D+', end_date)
if end_lst[0] == '':
end_lst = end_lst[1:]
end_year = int(end_lst[0])
end_month = int(end_lst[1])
end_day = int(end_lst[2])
if end_year < 1800 or end_year > 2200:
print(f"ValueError: end_year({end_year:d}) is wrong.")
return
if end_month < 1 or end_month > 12:
print(f"ValueError: end_month({end_month:d}) is wrong.")
return
if end_day < 1 or end_day > 31:
print(f"ValueError: end_day({end_day:d}) is wrong.")
return
end_date = f"{end_year:04d}-{end_month:02d}-{end_day:02d}"

codes_keys = list(self.codes.keys())
codes_values = list(self.codes.values())

if code in codes_keys:
pass
elif code in codes_values:
idx = codes_values.index(code)
code = codes_keys[idx]
else:
print(f"ValueError: Code({code}) doesn't exist.")
sql = f"SELECT * FROM daily_price WHERE code = '{code}'"\
f" and date >= '{start_date}' and date <= '{end_date}'"
df = pd.read_sql(sql, self.conn)
df.index = df['date']
return df




170 changes: 170 additions & 0 deletions 01_Stock_Investment/Investar/DBUpdater.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@

import pandas as pd
from bs4 import BeautifulSoup
import urllib, pymysql, calendar, time, json
from urllib.request import urlopen
from datetime import datetime
from threading import Timer

class DBUpdater:
def __init__(self):
"""생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
self.conn = pymysql.connect(host='localhost', user='root',
password='snake.land.', db='INVESTAR', charset='utf8')

with self.conn.cursor() as curs:
sql = """
CREATE TABLE IF NOT EXISTS company_info (
code VARCHAR(20),
company VARCHAR(40),
last_update DATE,
PRIMARY KEY (code))
"""
curs.execute(sql)
sql = """
CREATE TABLE IF NOT EXISTS daily_price (
code VARCHAR(20),
date DATE,
open BIGINT(20),
high BIGINT(20),
low BIGINT(20),
close BIGINT(20),
diff BIGINT(20),
volume BIGINT(20),
PRIMARY KEY (code, date))
"""
curs.execute(sql)
self.conn.commit()
self.codes = dict()

def __del__(self):
"""소멸자: MariaDB 연결 해제"""
self.conn.close()

def read_krx_code(self):
"""KRX로부터 상장기업 목록 파일을 읽어와서 데이터프레임으로 반환"""
url = 'http://kind.krx.co.kr/corpgeneral/corpList.do?method='\
'download&searchType=13'
krx = pd.read_html(url, header=0)[0]
krx = krx[['종목코드', '회사명']]
krx = krx.rename(columns={'종목코드': 'code', '회사명': 'company'})
krx.code = krx.code.map('{:06d}'.format)
return krx

def update_comp_info(self):
"""종목코드를 company_info 테이블에 업데이트 한 후 딕셔너리에 저장"""
sql = "SELECT * FROM company_info"
df = pd.read_sql(sql, self.conn)
for idx in range(len(df)):
self.codes[df['code'].values[idx]] = df['company'].values[idx]

with self.conn.cursor() as curs:
sql = "SELECT max(last_update) FROM company_info"
curs.execute(sql)
rs = curs.fetchone()
today = datetime.today().strftime('%Y-%m-%d')
if rs[0] == None or rs[0].strftime('%Y-%m-%d') < today:
krx = self.read_krx_code()
for idx in range(len(krx)):
code = krx.code.values[idx]
company = krx.company.values[idx]
sql = f"REPLACE INTO company_info (code, company, last"\
f"_update) VALUES ('{code}', '{company}', '{today}')"
curs.execute(sql)
self.codes[code] = company
tmnow = datetime.now().strftime('%Y-%m-%d %H:%M')
print(f"[{tmnow}] #{idx+1:04d} REPLACE INTO company_info "\
f"VALUES ({code}, {company}, {today})")
self.conn.commit()
print('')

def read_naver(self, code, company, pages_to_fetch):
"""네이버에서 주식 시세를 읽어서 데이터프레임으로 반환"""
try:
url = f"http://finance.naver.com/item/sise_day.nhn?code={code}"
with urlopen(url) as doc:
if doc is None:
return None
html = BeautifulSoup(doc, "lxml")
pgrr = html.find("td", class_="pgRR")
if pgrr is None:
return None
s = str(pgrr.a["href"]).split('=')
lastpage = s[-1]
df = pd.DataFrame()
pages = min(int(lastpage), pages_to_fetch)
for page in range(1, pages + 1):
pg_url = '{}&page={}'.format(url, page)
df = df.append(pd.read_html(pg_url, header=0)[0])
tmnow = datetime.now().strftime('%Y-%m-%d %H:%M')
print('[{}] {} ({}) : {:04d}/{:04d} pages are downloading...'.
format(tmnow, company, code, page, pages), end="\r")
df = df.rename(columns={'날짜':'date','종가':'close','전일비':'diff'
,'시가':'open','고가':'high','저가':'low','거래량':'volume'})
df['date'] = df['date'].replace('.', '-')
df = df.dropna()
df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close',
'diff', 'open', 'high', 'low', 'volume']].astype(int)
df = df[['date', 'open', 'high', 'low', 'close', 'diff', 'volume']]
except Exception as e:
print('Exception occured :', str(e))
return None
return df

def replace_into_db(self, df, num, code, company):
"""네이버에서 읽어온 주식 시세를 DB에 REPLACE"""
with self.conn.cursor() as curs:
for r in df.itertuples():
sql = f"REPLACE INTO daily_price VALUES ('{code}', "\
f"'{r.date}', {r.open}, {r.high}, {r.low}, {r.close}, "\
f"{r.diff}, {r.volume})"
curs.execute(sql)
self.conn.commit()
print('[{}] #{:04d} {} ({}) : {} rows > REPLACE INTO daily_'\
'price [OK]'.format(datetime.now().strftime('%Y-%m-%d'\
' %H:%M'), num+1, company, code, len(df)))

def update_daily_price(self, pages_to_fetch):
"""KRX 상장법인의 주식 시세를 네이버로부터 읽어서 DB에 업데이트"""
for idx, code in enumerate(self.codes):
df = self.read_naver(code, self.codes[code], pages_to_fetch)
if df is None:
continue
self.replace_into_db(df, idx, code, self.codes[code])

def execute_daily(self):
"""실행 즉시 및 매일 오후 다섯시에 daily_price 테이블 업데이트"""
self.update_comp_info()

try:
with open('config.json', 'r') as in_file:
config = json.load(in_file)
pages_to_fetch = config['pages_to_fetch']
except FileNotFoundError:
with open('config.json', 'w') as out_file:
pages_to_fetch = 100
config = {'pages_to_fetch': 1}
json.dump(config, out_file)
self.update_daily_price(pages_to_fetch)

tmnow = datetime.now()
lastday = calendar.monthrange(tmnow.year, tmnow.month)[1]
if tmnow.month == 12 and tmnow.day == lastday:
tmnext = tmnow.replace(year=tmnow.year+1, month=1, day=1,
hour=17, minute=0, second=0)
elif tmnow.day == lastday:
tmnext = tmnow.replace(month=tmnow.month+1, day=1, hour=17,
minute=0, second=0)
else:
tmnext = tmnow.replace(day=tmnow.day+1, hour=17, minute=0,
second=0)
tmdiff = tmnext - tmnow
secs = tmdiff.seconds
t = Timer(secs, self.execute_daily)
print("Waiting for next update ({}) ... ".format(tmnext.strftime
('%Y-%m-%d %H:%M')))
t.start()

if __name__ == '__main__':
dbu = DBUpdater()
dbu.execute_daily()
38 changes: 38 additions & 0 deletions 01_Stock_Investment/Investar/MarketDB.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pandas as pd
#from bs4 import BeautifulSoup
#import urllib
#from urllib.request import urlopen
import pymysql
#import time
#import pandas.io.sql as sql
from datetime import datetime
#from threading import Timer
#import matplotlib.pyplot as plt

class MarketDB:
def __init__(self):
"""생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
self.conn = pymysql.connect(host='localhost', user='root', password='snake.land.', db='INVESTAR', charset='utf8')
self.codes = dict()
self.getCompanyInfo()

def __del__(self):
"""소멸자: MariaDB 연결 해제"""
self.conn.close()

def getCompanyInfo(self):
"""company_info 테이블에서 읽어와서 companyData와 codes에 저장"""
sql = "SELECT * FROM company_info"
companyInfo = pd.read_sql(sql, self.conn)
for idx in range(len(companyInfo)):
self.codes[companyInfo['code'].values[idx]] = companyInfo['company'].values[idx]

def getDailyPrice(self, code, startDate, endDate):
"""daily_price 테이블에서 읽어와서 데이터프레임으로 반환"""
sql = "SELECT * FROM daily_price WHERE code = '{}' and date >= '{}' and date <= '{}'".format(code, startDate, endDate)
df = pd.read_sql(sql, self.conn)
df.index = df['date']
return df



2 changes: 2 additions & 0 deletions 03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
dow = pdr.get_data_yahoo('^DJI', '2000-01-04')
kospi = pdr.get_data_yahoo('^KS11', '2000-01-04')

dow

df = pd.DataFrame({'DOW' dow['Close'], 'KOSPI' kospi['Close']})
df = df.fillna(method='bfill')
df = df.fillna(method='ffill')
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@

- 서적에 삽입된 그림의 PPT 원본은 PowerPoint_Materials.pptx 파일에 있습니다.

![Portpolio_optimization](./06_Trading_Strategy/imgs/Portpolio_optimization.jpg)
![Portpolio_optimization](06_Trading_Strategy/imgs/Portpolio_optimization.jpg)

3 changes: 3 additions & 0 deletions stockTest/getData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import pandas as pd
import numpy as np