From 7b017a4f3c89aee83253b1ed46b5f32f1856d8ac Mon Sep 17 00:00:00 2001
From: darrenlee <dongyup1.lee@samsung.com>
Date: Tue, 4 Aug 2020 16:11:39 +0900
Subject: [PATCH 1/5] StockAnalysis within overture

---
 .idea/.gitignore                               | 2 ++
 .idea/StockAnalysisInPython.iml                | 8 ++++++++
 .idea/inspectionProfiles/profiles_settings.xml | 6 ++++++
 .idea/misc.xml                                 | 6 ++++++
 .idea/modules.xml                              | 8 ++++++++
 .idea/vcs.xml                                  | 6 ++++++
 6 files changed, 36 insertions(+)
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/StockAnalysisInPython.iml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/vcs.xml
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..5c98b42
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,2 @@
+# Default ignored files
+/workspace.xml
\ No newline at end of file
diff --git a/.idea/StockAnalysisInPython.iml b/.idea/StockAnalysisInPython.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/StockAnalysisInPython.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..28a804d
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="JavaScriptSettings">
+    <option name="languageLevel" value="ES6" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..3f2e96a
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/StockAnalysisInPython.iml" filepath="$PROJECT_DIR$/.idea/StockAnalysisInPython.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file

From b7197b9a21fcdc9e198b6bb5761cc9b4b780d5c9 Mon Sep 17 00:00:00 2001
From: darrenlee <dongyup1.lee@samsung.com>
Date: Thu, 10 Sep 2020 23:53:05 +0900
Subject: [PATCH 2/5] commit test code

---
 testCode/eda1.py         | 147 +++++++++++++++++++++++++++++++++++++++
 testCode/stockpriceDB.py |  20 ++++++
 2 files changed, 167 insertions(+)
 create mode 100644 testCode/eda1.py
 create mode 100644 testCode/stockpriceDB.py

diff --git a/testCode/eda1.py b/testCode/eda1.py
new file mode 100644
index 0000000..f1c653d
--- /dev/null
+++ b/testCode/eda1.py
@@ -0,0 +1,147 @@
+import pandas as pd
+import numpy as np
+import re
+from collections import Counter
+
+
+movies_col =['movie_id','movie_name','genre']
+movies = pd.read_table('/Users/darrenlee/PycharmProjects/overture/data/pfda/ml-1m/movies.dat',sep = '::',header= None,names = movies_col, engine = 'python')
+
+ratings_col =['user_id','movie_id','rating','timestamp']
+ratings = pd.read_table('/Users/darrenlee/PycharmProjects/overture/data/pfda/ml-1m/ratings.dat',sep = '::',header= None,names = ratings_col, engine = 'python')
+
+users_col = ['user_id','gender','age','occupation','zip_code']
+users = pd.read_table('/Users/darrenlee/PycharmProjects/overture/data/pfda/ml-1m/users.dat',sep = '::',header= None,names= users_col, engine = 'python')
+
+mv_rt = pd.merge(movies, ratings, on = "movie_id", how = "left")
+
+mv_rating = pd.merge(mv_rt, users, on = "user_id", how = 'left' )
+
+
+occupation_dict = {0: "other"
+,1: "academic/educator"
+,2: "artist"
+,3: "clerical/admin"
+,4: "college/grad student"
+,5: "customer service"
+,6: "doctor/health care"
+,7: "executive/managerial"
+,8: "farmer"
+,9: "homemaker"
+,10: "K-12 student"
+,11: "lawyer"
+,12: "programmer"
+,13: "retired"
+,14: "sales/marketing"
+,15: "scientist"
+,16: "self-employed"
+,17: "technician/engineer"
+,18: "tradesman/craftsman"
+,19: "unemployed"
+,20: "writer"}
+
+# occupation title merge
+occupation_df = pd.DataFrame.from_dict(occupation_dict,orient='index', columns =["occupation_title"]).reset_index()
+mv_rating = pd.merge(mv_rating, occupation_df, left_on = 'occupation' , right_on = 'index', how = 'left')
+
+
+bins = [0,18,24,34,44,55,100]
+labels = ['Under 18', '18-24', '25-34', '35-44', '45-55','56+']
+
+mv_rating['age_range'] = pd.cut(x=mv_rating['age'], bins=bins,
+                    labels= labels, include_lowest= True)
+
+mv_rating[['age','age_range']]
+
+
+#4 mv_rating의 영화 제목 컬럼에서 개봉 연도를 분리해서 추가 컬럼을 생성해 주세요.
+# mv_rating["year"]
+
+def split_it(year):
+    return re.findall('\(.*?\)', year)
+
+
+mv_rating["year"] = mv_rating.movie_name.apply(lambda x : re.findall('(?<=\()\d+', x)[0])
+
+mv_rating
+
+#5 전체 영화의 개수와 평균 평점, 평점 개수를 구해 출력해 주세요.
+
+# 전체 영화 갯수
+total_movie_num = mv_rating.movie_name.nunique()
+
+#5
+# 평균 평점
+avg_rating = mv_rating.rating.mean()
+
+# 평균 평점 갯수
+avg_rating_by_mv = mv_rating["movie_name"].value_counts().mean()
+
+print(f"총 영화 갯수 :{total_movie_num} \n 평균 평점 :{avg_rating:.2f} \n 영화 별 평균 평점 갯수: {avg_rating_by_mv:.2f}")
+
+
+#6 영화별 평균 평점/ 평점 갯수 구하고 dataframe칼럼으로 추가
+movies_info = pd.DataFrame()
+
+func_list = ["size","mean"]
+
+movies_info = mv_rating[["movie_name","rating"]].groupby(mv_rating["movie_name"]).agg(func_list)
+movies_info.columns = movies_info.columns.droplevel(0)
+movies_info.reset_index(inplace= True)
+
+movies_info = movies_info.rename(columns = {'' : "movieName","size": "numberOfReviews", "mean":"averageRating"})
+
+# 평균 평점이 가장 높은 영화 TOP10과 가장 낮은 WORST10을 구해 출력해 주세요.
+movies_info.sort_values(by = ["averageRating","numberOfReviews"], ascending = False).head(10)
+movies_info.sort_values(by = ["averageRating","numberOfReviews"], ascending = True).head(10)
+
+# User 당 평균 평점 개수를 구해 출력해 주시고, 평점을 가장 많이 남긴 User TOP10을 출력해 주세요.
+mv_rating.columns
+# 평균 평점 갯수
+mv_rating.groupby(mv_rating["user_id"]).agg("size").mean()
+# top 10 number of reviews
+mv_rating.groupby(mv_rating["user_id"]).agg("size").sort_values(ascending=False).head(10)
+
+# mv_rating의 장르 컬럼에 들어갈 수 있는 장르를 별도의 데이터 프레임으로 생성해주세요.데이터 프레임 이름은 mv_genre_list로 지정해 주세요.
+mv_genre_list = list(mv_rating["genre"].str.split("|", expand =True).unstack().unique())
+
+
+
+#10. mv_genre_list를 활용하여 각 장르에 포함되는 영화의 개수와 평균 평점, 장르별 평점 개수를 구해 출력해 주세요.
+mv_genre_df = pd.DataFrame(mv_genre_list, columns= ["genre"])
+
+# movies_info_genre = pd.merge(movies_info ,mv_rating[["movie_name","genre"]], on = "movie_name" , how = "left")
+movies_info = pd.merge(movies_info, movies, on = "movie_name", how = "inner")
+
+# 장르별 영화 갯수
+movieNumByGenre = movies_info["genre"].str.split("|", expand = True).unstack().value_counts()
+
+# 장르 별 평균 평점
+genre_unstack = mv_rating["genre"].str.split("|", expand = True)
+tmp_df = genre_unstack.rename(columns = {0:"genre1",1:"genre2",2:"genre3",3:"genre4",4:"genre5",5:"genre6"})
+tmp_df.columns
+
+mv_rating2 = pd.concat([mv_rating, tmp_df], axis = 1)
+# melt
+
+melted_rating2 = mv_rating2.melt(id_vars =['movie_id', 'movie_name', 'genre', 'user_id', 'rating', 'timestamp',
+       'gender', 'age', 'occupation', 'zip_code', 'index', 'occupation_title',
+       'age_range', 'year'], value_vars = ['genre1', 'genre2', 'genre3', 'genre4', 'genre5',
+       'genre6'] , var_name = ["genre"])
+
+
+# drop rows with None (genre columns)
+melted_rating2 = melted_rating2.dropna(subset = ['value'],axis = 0)
+
+# value counts
+mv_rating[["movie_name","rating"]].groupby(mv_rating["movie_name"]).agg(func_list)
+
+rating_by_genre = melted_rating2[["movie_name","rating"]].groupby(melted_rating2["value"]).agg(func_list)
+rating_by_genre = rating_by_genre.reset_index()
+rating_by_genre
+
+
+# 장르 별 평점 갯수
+mv_rating_df = mv_rating["genre"].str.split("|", expand = True).unstack().value_counts()
+
+
diff --git a/testCode/stockpriceDB.py b/testCode/stockpriceDB.py
new file mode 100644
index 0000000..2233231
--- /dev/null
+++ b/testCode/stockpriceDB.py
@@ -0,0 +1,20 @@
+from pandas_datareader import data as pdr
+import yfinance as yf
+
+import matplotlib.pyplot as plt
+
+yf.pdr_override()
+
+# samsung electronics
+sec = pdr.get_data_yahoo('005930.KS', start = '2018-05-04')
+# microsoft
+msft = pdr.get_data_yahoo("MSFT",start = '2018-05-04')
+amzn = pdr.get_data_yahoo("AMZN", start = '2018-05-04')
+
+plt.plot(sec.index, sec.Close, 'b', label ="Samsung Electronics")
+plt.plot(amzn.index, amzn.Close, 'r--', label ="Amazon")
+plt.plot(msft.index, msft.Close, 'g--', label = "Microsoft")
+
+plt.legend(loc = 'best')
+plt.show()
+

From c0d73c1052ced2799816a49995ef89b3f0e0e8a5 Mon Sep 17 00:00:00 2001
From: darrenlee <dongyup1.lee@samsung.com>
Date: Thu, 10 Sep 2020 23:58:45 +0900
Subject: [PATCH 3/5] test commmit

---
 .idea/StockAnalysisInPython.iml                | 11 +++++++++++
 .idea/inspectionProfiles/profiles_settings.xml |  6 ++++++
 .idea/misc.xml                                 |  7 +++++++
 .idea/modules.xml                              |  8 ++++++++
 .idea/other.xml                                |  7 +++++++
 .idea/vcs.xml                                  |  6 ++++++
 README.md                                      |  1 +
 7 files changed, 46 insertions(+)
 create mode 100644 .idea/StockAnalysisInPython.iml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/other.xml
 create mode 100644 .idea/vcs.xml

diff --git a/.idea/StockAnalysisInPython.iml b/.idea/StockAnalysisInPython.iml
new file mode 100644
index 0000000..78f8be5
--- /dev/null
+++ b/.idea/StockAnalysisInPython.iml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.8 (default) (2)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="renderExternalDocumentation" value="true" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..ae7c9ee
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="JavaScriptSettings">
+    <option name="languageLevel" value="ES6" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (default) (2)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..3f2e96a
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/StockAnalysisInPython.iml" filepath="$PROJECT_DIR$/.idea/StockAnalysisInPython.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/other.xml b/.idea/other.xml
new file mode 100644
index 0000000..640fd80
--- /dev/null
+++ b/.idea/other.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PySciProjectComponent">
+    <option name="PY_SCI_VIEW" value="true" />
+    <option name="PY_SCI_VIEW_SUGGESTED" value="true" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/README.md b/README.md
index 3cdcd0d..7439813 100644
--- a/README.md
+++ b/README.md
@@ -9,3 +9,4 @@
 - 서적에 삽입된 그림의 PPT 원본은 PowerPoint_Materials.pptx 파일에 있습니다.
 
 ![Portpolio_optimization](./06_Trading_Strategy/imgs/Portpolio_optimization.jpg)
+

From a2b05b3de0f320b9ac2df197cc60cf6b5151bc2b Mon Sep 17 00:00:00 2001
From: darrenlee <dongyup1.lee@samsung.com>
Date: Fri, 11 Sep 2020 00:09:10 +0900
Subject: [PATCH 4/5] test commmit

---
 testCode/eda1.py         | 147 ---------------------------------------
 testCode/stockpriceDB.py |  20 ------
 2 files changed, 167 deletions(-)
 delete mode 100644 testCode/eda1.py
 delete mode 100644 testCode/stockpriceDB.py

diff --git a/testCode/eda1.py b/testCode/eda1.py
deleted file mode 100644
index f1c653d..0000000
--- a/testCode/eda1.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import pandas as pd
-import numpy as np
-import re
-from collections import Counter
-
-
-movies_col =['movie_id','movie_name','genre']
-movies = pd.read_table('/Users/darrenlee/PycharmProjects/overture/data/pfda/ml-1m/movies.dat',sep = '::',header= None,names = movies_col, engine = 'python')
-
-ratings_col =['user_id','movie_id','rating','timestamp']
-ratings = pd.read_table('/Users/darrenlee/PycharmProjects/overture/data/pfda/ml-1m/ratings.dat',sep = '::',header= None,names = ratings_col, engine = 'python')
-
-users_col = ['user_id','gender','age','occupation','zip_code']
-users = pd.read_table('/Users/darrenlee/PycharmProjects/overture/data/pfda/ml-1m/users.dat',sep = '::',header= None,names= users_col, engine = 'python')
-
-mv_rt = pd.merge(movies, ratings, on = "movie_id", how = "left")
-
-mv_rating = pd.merge(mv_rt, users, on = "user_id", how = 'left' )
-
-
-occupation_dict = {0: "other"
-,1: "academic/educator"
-,2: "artist"
-,3: "clerical/admin"
-,4: "college/grad student"
-,5: "customer service"
-,6: "doctor/health care"
-,7: "executive/managerial"
-,8: "farmer"
-,9: "homemaker"
-,10: "K-12 student"
-,11: "lawyer"
-,12: "programmer"
-,13: "retired"
-,14: "sales/marketing"
-,15: "scientist"
-,16: "self-employed"
-,17: "technician/engineer"
-,18: "tradesman/craftsman"
-,19: "unemployed"
-,20: "writer"}
-
-# occupation title merge
-occupation_df = pd.DataFrame.from_dict(occupation_dict,orient='index', columns =["occupation_title"]).reset_index()
-mv_rating = pd.merge(mv_rating, occupation_df, left_on = 'occupation' , right_on = 'index', how = 'left')
-
-
-bins = [0,18,24,34,44,55,100]
-labels = ['Under 18', '18-24', '25-34', '35-44', '45-55','56+']
-
-mv_rating['age_range'] = pd.cut(x=mv_rating['age'], bins=bins,
-                    labels= labels, include_lowest= True)
-
-mv_rating[['age','age_range']]
-
-
-#4 mv_rating의 영화 제목 컬럼에서 개봉 연도를 분리해서 추가 컬럼을 생성해 주세요.
-# mv_rating["year"]
-
-def split_it(year):
-    return re.findall('\(.*?\)', year)
-
-
-mv_rating["year"] = mv_rating.movie_name.apply(lambda x : re.findall('(?<=\()\d+', x)[0])
-
-mv_rating
-
-#5 전체 영화의 개수와 평균 평점, 평점 개수를 구해 출력해 주세요.
-
-# 전체 영화 갯수
-total_movie_num = mv_rating.movie_name.nunique()
-
-#5
-# 평균 평점
-avg_rating = mv_rating.rating.mean()
-
-# 평균 평점 갯수
-avg_rating_by_mv = mv_rating["movie_name"].value_counts().mean()
-
-print(f"총 영화 갯수 :{total_movie_num} \n 평균 평점 :{avg_rating:.2f} \n 영화 별 평균 평점 갯수: {avg_rating_by_mv:.2f}")
-
-
-#6 영화별 평균 평점/ 평점 갯수 구하고 dataframe칼럼으로 추가
-movies_info = pd.DataFrame()
-
-func_list = ["size","mean"]
-
-movies_info = mv_rating[["movie_name","rating"]].groupby(mv_rating["movie_name"]).agg(func_list)
-movies_info.columns = movies_info.columns.droplevel(0)
-movies_info.reset_index(inplace= True)
-
-movies_info = movies_info.rename(columns = {'' : "movieName","size": "numberOfReviews", "mean":"averageRating"})
-
-# 평균 평점이 가장 높은 영화 TOP10과 가장 낮은 WORST10을 구해 출력해 주세요.
-movies_info.sort_values(by = ["averageRating","numberOfReviews"], ascending = False).head(10)
-movies_info.sort_values(by = ["averageRating","numberOfReviews"], ascending = True).head(10)
-
-# User 당 평균 평점 개수를 구해 출력해 주시고, 평점을 가장 많이 남긴 User TOP10을 출력해 주세요.
-mv_rating.columns
-# 평균 평점 갯수
-mv_rating.groupby(mv_rating["user_id"]).agg("size").mean()
-# top 10 number of reviews
-mv_rating.groupby(mv_rating["user_id"]).agg("size").sort_values(ascending=False).head(10)
-
-# mv_rating의 장르 컬럼에 들어갈 수 있는 장르를 별도의 데이터 프레임으로 생성해주세요.데이터 프레임 이름은 mv_genre_list로 지정해 주세요.
-mv_genre_list = list(mv_rating["genre"].str.split("|", expand =True).unstack().unique())
-
-
-
-#10. mv_genre_list를 활용하여 각 장르에 포함되는 영화의 개수와 평균 평점, 장르별 평점 개수를 구해 출력해 주세요.
-mv_genre_df = pd.DataFrame(mv_genre_list, columns= ["genre"])
-
-# movies_info_genre = pd.merge(movies_info ,mv_rating[["movie_name","genre"]], on = "movie_name" , how = "left")
-movies_info = pd.merge(movies_info, movies, on = "movie_name", how = "inner")
-
-# 장르별 영화 갯수
-movieNumByGenre = movies_info["genre"].str.split("|", expand = True).unstack().value_counts()
-
-# 장르 별 평균 평점
-genre_unstack = mv_rating["genre"].str.split("|", expand = True)
-tmp_df = genre_unstack.rename(columns = {0:"genre1",1:"genre2",2:"genre3",3:"genre4",4:"genre5",5:"genre6"})
-tmp_df.columns
-
-mv_rating2 = pd.concat([mv_rating, tmp_df], axis = 1)
-# melt
-
-melted_rating2 = mv_rating2.melt(id_vars =['movie_id', 'movie_name', 'genre', 'user_id', 'rating', 'timestamp',
-       'gender', 'age', 'occupation', 'zip_code', 'index', 'occupation_title',
-       'age_range', 'year'], value_vars = ['genre1', 'genre2', 'genre3', 'genre4', 'genre5',
-       'genre6'] , var_name = ["genre"])
-
-
-# drop rows with None (genre columns)
-melted_rating2 = melted_rating2.dropna(subset = ['value'],axis = 0)
-
-# value counts
-mv_rating[["movie_name","rating"]].groupby(mv_rating["movie_name"]).agg(func_list)
-
-rating_by_genre = melted_rating2[["movie_name","rating"]].groupby(melted_rating2["value"]).agg(func_list)
-rating_by_genre = rating_by_genre.reset_index()
-rating_by_genre
-
-
-# 장르 별 평점 갯수
-mv_rating_df = mv_rating["genre"].str.split("|", expand = True).unstack().value_counts()
-
-
diff --git a/testCode/stockpriceDB.py b/testCode/stockpriceDB.py
deleted file mode 100644
index 2233231..0000000
--- a/testCode/stockpriceDB.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from pandas_datareader import data as pdr
-import yfinance as yf
-
-import matplotlib.pyplot as plt
-
-yf.pdr_override()
-
-# samsung electronics
-sec = pdr.get_data_yahoo('005930.KS', start = '2018-05-04')
-# microsoft
-msft = pdr.get_data_yahoo("MSFT",start = '2018-05-04')
-amzn = pdr.get_data_yahoo("AMZN", start = '2018-05-04')
-
-plt.plot(sec.index, sec.Close, 'b', label ="Samsung Electronics")
-plt.plot(amzn.index, amzn.Close, 'r--', label ="Amazon")
-plt.plot(msft.index, msft.Close, 'g--', label = "Microsoft")
-
-plt.legend(loc = 'best')
-plt.show()
-

From a53d132f6222c8b1d232536a7035771cbc3c0e85 Mon Sep 17 00:00:00 2001
From: darrenlee <dongyup1.lee@samsung.com>
Date: Sun, 20 Sep 2020 17:52:23 +0900
Subject: [PATCH 5/5] sync 13inch

---
 01_Stock_Investment/Investar/Analyzer.py      |  93 ++++++++++
 01_Stock_Investment/Investar/DBUpdater.py     | 170 ++++++++++++++++++
 01_Stock_Investment/Investar/MarketDB.py      |  38 ++++
 .../ch03_02_DowKospi_Scatter.py               |   2 +
 README.md                                     |   2 +-
 stockTest/getData.py                          |   3 +
 6 files changed, 307 insertions(+), 1 deletion(-)
 create mode 100644 01_Stock_Investment/Investar/Analyzer.py
 create mode 100644 01_Stock_Investment/Investar/DBUpdater.py
 create mode 100644 01_Stock_Investment/Investar/MarketDB.py
 create mode 100644 stockTest/getData.py

diff --git a/01_Stock_Investment/Investar/Analyzer.py b/01_Stock_Investment/Investar/Analyzer.py
new file mode 100644
index 0000000..1aa7ef1
--- /dev/null
+++ b/01_Stock_Investment/Investar/Analyzer.py
@@ -0,0 +1,93 @@
+import pandas as pd
+import pymysql
+from datetime import datetime
+from datetime import timedelta
+import re
+
+class MarketDB:
+    def __init__(self):
+        """생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
+        self.conn = pymysql.connect(host='localhost', user='root', 
+            password='snake.land.', db='INVESTAR', charset='utf8')
+        self.codes = {}
+        self.get_comp_info()
+        
+    def __del__(self):
+        """소멸자: MariaDB 연결 해제"""
+        self.conn.close()
+
+    def get_comp_info(self):
+        """company_info 테이블에서 읽어와서 codes에 저장"""
+        sql = "SELECT * FROM company_info"
+        krx = pd.read_sql(sql, self.conn)
+        for idx in range(len(krx)):
+            self.codes[krx['code'].values[idx]] = krx['company'].values[idx]
+
+    def get_daily_price(self, code, start_date=None, end_date=None):
+        """KRX 종목의 일별 시세를 데이터프레임 형태로 반환
+            - code       : KRX 종목코드('005930') 또는 상장기업명('삼성전자')
+            - start_date : 조회 시작일('2020-01-01'), 미입력 시 1년 전 오늘
+            - end_date   : 조회 종료일('2020-12-31'), 미입력 시 오늘 날짜
+        """
+        if start_date is None:
+            one_year_ago = datetime.today() - timedelta(days=365)
+            start_date = one_year_ago.strftime('%Y-%m-%d')
+            print("start_date is initialized to '{}'".format(start_date))
+        else:
+            start_lst = re.split('\D+', start_date)
+            if start_lst[0] == '':
+                start_lst = start_lst[1:]
+            start_year = int(start_lst[0])
+            start_month = int(start_lst[1])
+            start_day = int(start_lst[2])
+            if start_year < 1900 or start_year > 2200:
+                print(f"ValueError: start_year({start_year:d}) is wrong.")
+                return
+            if start_month < 1 or start_month > 12:
+                print(f"ValueError: start_month({start_month:d}) is wrong.")
+                return
+            if start_day < 1 or start_day > 31:
+                print(f"ValueError: start_day({start_day:d}) is wrong.")
+                return
+            start_date=f"{start_year:04d}-{start_month:02d}-{start_day:02d}"
+
+        if end_date is None:
+            end_date = datetime.today().strftime('%Y-%m-%d')
+            print("end_date is initialized to '{}'".format(end_date))
+        else:
+            end_lst = re.split('\D+', end_date)
+            if end_lst[0] == '':
+                end_lst = end_lst[1:] 
+            end_year = int(end_lst[0])
+            end_month = int(end_lst[1])
+            end_day = int(end_lst[2])
+            if end_year < 1800 or end_year > 2200:
+                print(f"ValueError: end_year({end_year:d}) is wrong.")
+                return
+            if end_month < 1 or end_month > 12:
+                print(f"ValueError: end_month({end_month:d}) is wrong.")
+                return
+            if end_day < 1 or end_day > 31:
+                print(f"ValueError: end_day({end_day:d}) is wrong.")
+                return
+            end_date = f"{end_year:04d}-{end_month:02d}-{end_day:02d}"
+         
+        codes_keys = list(self.codes.keys())
+        codes_values = list(self.codes.values())
+
+        if code in codes_keys:
+            pass
+        elif code in codes_values:
+            idx = codes_values.index(code)
+            code = codes_keys[idx]
+        else:
+            print(f"ValueError: Code({code}) doesn't exist.")
+        sql = f"SELECT * FROM daily_price WHERE code = '{code}'"\
+            f" and date >= '{start_date}' and date <= '{end_date}'"
+        df = pd.read_sql(sql, self.conn)
+        df.index = df['date']
+        return df 
+
+
+
+        
diff --git a/01_Stock_Investment/Investar/DBUpdater.py b/01_Stock_Investment/Investar/DBUpdater.py
new file mode 100644
index 0000000..3e1e204
--- /dev/null
+++ b/01_Stock_Investment/Investar/DBUpdater.py
@@ -0,0 +1,170 @@
+
+import pandas as pd
+from bs4 import BeautifulSoup
+import urllib, pymysql, calendar, time, json
+from urllib.request import urlopen
+from datetime import datetime
+from threading import Timer
+
+class DBUpdater:  
+    def __init__(self):
+        """생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
+        self.conn = pymysql.connect(host='localhost', user='root',
+            password='snake.land.', db='INVESTAR', charset='utf8')
+        
+        with self.conn.cursor() as curs:
+            sql = """
+            CREATE TABLE IF NOT EXISTS company_info (
+                code VARCHAR(20),
+                company VARCHAR(40),
+                last_update DATE,
+                PRIMARY KEY (code))
+            """
+            curs.execute(sql)
+            sql = """
+            CREATE TABLE IF NOT EXISTS daily_price (
+                code VARCHAR(20),
+                date DATE,
+                open BIGINT(20),
+                high BIGINT(20),
+                low BIGINT(20),
+                close BIGINT(20),
+                diff BIGINT(20),
+                volume BIGINT(20),
+                PRIMARY KEY (code, date))
+            """
+            curs.execute(sql)
+        self.conn.commit()
+        self.codes = dict()
+               
+    def __del__(self):
+        """소멸자: MariaDB 연결 해제"""
+        self.conn.close() 
+     
+    def read_krx_code(self):
+        """KRX로부터 상장기업 목록 파일을 읽어와서 데이터프레임으로 반환"""
+        url = 'http://kind.krx.co.kr/corpgeneral/corpList.do?method='\
+            'download&searchType=13'
+        krx = pd.read_html(url, header=0)[0]
+        krx = krx[['종목코드', '회사명']]
+        krx = krx.rename(columns={'종목코드': 'code', '회사명': 'company'})
+        krx.code = krx.code.map('{:06d}'.format)
+        return krx
+    
+    def update_comp_info(self):
+        """종목코드를 company_info 테이블에 업데이트 한 후 딕셔너리에 저장"""
+        sql = "SELECT * FROM company_info"
+        df = pd.read_sql(sql, self.conn)
+        for idx in range(len(df)):
+            self.codes[df['code'].values[idx]] = df['company'].values[idx]
+                    
+        with self.conn.cursor() as curs:
+            sql = "SELECT max(last_update) FROM company_info"
+            curs.execute(sql)
+            rs = curs.fetchone()
+            today = datetime.today().strftime('%Y-%m-%d')
+            if rs[0] == None or rs[0].strftime('%Y-%m-%d') < today:
+                krx = self.read_krx_code()
+                for idx in range(len(krx)):
+                    code = krx.code.values[idx]
+                    company = krx.company.values[idx]                
+                    sql = f"REPLACE INTO company_info (code, company, last"\
+                        f"_update) VALUES ('{code}', '{company}', '{today}')"
+                    curs.execute(sql)
+                    self.codes[code] = company
+                    tmnow = datetime.now().strftime('%Y-%m-%d %H:%M')
+                    print(f"[{tmnow}] #{idx+1:04d} REPLACE INTO company_info "\
+                        f"VALUES ({code}, {company}, {today})")
+                self.conn.commit()
+                print('')              
+
+    def read_naver(self, code, company, pages_to_fetch):
+        """네이버에서 주식 시세를 읽어서 데이터프레임으로 반환"""
+        try:
+            url = f"http://finance.naver.com/item/sise_day.nhn?code={code}"
+            with urlopen(url) as doc:
+                if doc is None:
+                    return None
+                html = BeautifulSoup(doc, "lxml")
+                pgrr = html.find("td", class_="pgRR")
+                if pgrr is None:
+                    return None
+                s = str(pgrr.a["href"]).split('=')
+                lastpage = s[-1] 
+            df = pd.DataFrame()
+            pages = min(int(lastpage), pages_to_fetch)
+            for page in range(1, pages + 1):
+                pg_url = '{}&page={}'.format(url, page)
+                df = df.append(pd.read_html(pg_url, header=0)[0])
+                tmnow = datetime.now().strftime('%Y-%m-%d %H:%M')
+                print('[{}] {} ({}) : {:04d}/{:04d} pages are downloading...'.
+                    format(tmnow, company, code, page, pages), end="\r")
+            df = df.rename(columns={'날짜':'date','종가':'close','전일비':'diff'
+                ,'시가':'open','고가':'high','저가':'low','거래량':'volume'})
+            df['date'] = df['date'].replace('.', '-')
+            df = df.dropna()
+            df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close',
+                'diff', 'open', 'high', 'low', 'volume']].astype(int)
+            df = df[['date', 'open', 'high', 'low', 'close', 'diff', 'volume']]
+        except Exception as e:
+            print('Exception occured :', str(e))
+            return None
+        return df
+
+    def replace_into_db(self, df, num, code, company):
+        """네이버에서 읽어온 주식 시세를 DB에 REPLACE"""
+        with self.conn.cursor() as curs:
+            for r in df.itertuples():
+                sql = f"REPLACE INTO daily_price VALUES ('{code}', "\
+                    f"'{r.date}', {r.open}, {r.high}, {r.low}, {r.close}, "\
+                    f"{r.diff}, {r.volume})"
+                curs.execute(sql)
+            self.conn.commit()
+            print('[{}] #{:04d} {} ({}) : {} rows > REPLACE INTO daily_'\
+                'price [OK]'.format(datetime.now().strftime('%Y-%m-%d'\
+                ' %H:%M'), num+1, company, code, len(df)))
+
+    def update_daily_price(self, pages_to_fetch):
+        """KRX 상장법인의 주식 시세를 네이버로부터 읽어서 DB에 업데이트"""  
+        for idx, code in enumerate(self.codes):
+            df = self.read_naver(code, self.codes[code], pages_to_fetch)
+            if df is None:
+                continue
+            self.replace_into_db(df, idx, code, self.codes[code])            
+
+    def execute_daily(self):
+        """실행 즉시 및 매일 오후 다섯시에 daily_price 테이블 업데이트"""
+        self.update_comp_info()
+        
+        try:
+            with open('config.json', 'r') as in_file:
+                config = json.load(in_file)
+                pages_to_fetch = config['pages_to_fetch']
+        except FileNotFoundError:
+            with open('config.json', 'w') as out_file:
+                pages_to_fetch = 100 
+                config = {'pages_to_fetch': 1}
+                json.dump(config, out_file)
+        self.update_daily_price(pages_to_fetch)
+
+        tmnow = datetime.now()
+        lastday = calendar.monthrange(tmnow.year, tmnow.month)[1]
+        if tmnow.month == 12 and tmnow.day == lastday:
+            tmnext = tmnow.replace(year=tmnow.year+1, month=1, day=1,
+                hour=17, minute=0, second=0)
+        elif tmnow.day == lastday:
+            tmnext = tmnow.replace(month=tmnow.month+1, day=1, hour=17,
+                minute=0, second=0)
+        else:
+            tmnext = tmnow.replace(day=tmnow.day+1, hour=17, minute=0,
+                second=0)   
+        tmdiff = tmnext - tmnow
+        secs = tmdiff.seconds
+        t = Timer(secs, self.execute_daily)
+        print("Waiting for next update ({}) ... ".format(tmnext.strftime
+            ('%Y-%m-%d %H:%M')))
+        t.start()
+
+if __name__ == '__main__':
+    dbu = DBUpdater()
+    dbu.execute_daily()
diff --git a/01_Stock_Investment/Investar/MarketDB.py b/01_Stock_Investment/Investar/MarketDB.py
new file mode 100644
index 0000000..51966df
--- /dev/null
+++ b/01_Stock_Investment/Investar/MarketDB.py
@@ -0,0 +1,38 @@
+import pandas as pd
+#from bs4 import BeautifulSoup
+#import urllib
+#from urllib.request import urlopen
+import pymysql
+#import time
+#import pandas.io.sql as sql
+from datetime import datetime
+#from threading import Timer
+#import matplotlib.pyplot as plt
+
+class MarketDB:
+    def __init__(self):
+        """생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
+        self.conn = pymysql.connect(host='localhost', user='root', password='snake.land.', db='INVESTAR', charset='utf8')
+        self.codes = dict()
+        self.getCompanyInfo()
+        
+    def __del__(self):
+        """소멸자: MariaDB 연결 해제"""
+        self.conn.close()
+
+    def getCompanyInfo(self):
+        """company_info 테이블에서 읽어와서 companyData와 codes에 저장"""
+        sql = "SELECT * FROM company_info"
+        companyInfo = pd.read_sql(sql, self.conn)
+        for idx in range(len(companyInfo)):
+            self.codes[companyInfo['code'].values[idx]] = companyInfo['company'].values[idx]
+
+    def getDailyPrice(self, code, startDate, endDate):
+        """daily_price 테이블에서 읽어와서 데이터프레임으로 반환"""
+        sql = "SELECT * FROM daily_price WHERE code = '{}' and date >= '{}' and date <= '{}'".format(code, startDate, endDate)
+        df = pd.read_sql(sql, self.conn)
+        df.index = df['date']
+        return df
+
+
+
diff --git a/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py b/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py
index 4a733bd..5511f63 100644
--- a/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py
+++ b/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py
@@ -6,6 +6,8 @@
 dow = pdr.get_data_yahoo('^DJI', '2000-01-04')
 kospi = pdr.get_data_yahoo('^KS11', '2000-01-04')
 
+dow
+
 df = pd.DataFrame({'DOW' dow['Close'], 'KOSPI' kospi['Close']})
 df = df.fillna(method='bfill')
 df = df.fillna(method='ffill')
diff --git a/README.md b/README.md
index 7439813..4660ec9 100644
--- a/README.md
+++ b/README.md
@@ -8,5 +8,5 @@
 
 - 서적에 삽입된 그림의 PPT 원본은 PowerPoint_Materials.pptx 파일에 있습니다.
 
-![Portpolio_optimization](./06_Trading_Strategy/imgs/Portpolio_optimization.jpg)
+![Portpolio_optimization](06_Trading_Strategy/imgs/Portpolio_optimization.jpg)
 
diff --git a/stockTest/getData.py b/stockTest/getData.py
new file mode 100644
index 0000000..5f27803
--- /dev/null
+++ b/stockTest/getData.py
@@ -0,0 +1,3 @@
+import pandas as pd
+import numpy as np
+