From db12165c8b1ebed4774da1f2a5d6ea1a303a9bb6 Mon Sep 17 00:00:00 2001
From: brandonkw123 <54588120+brandonkw123@users.noreply.github.com>
Date: Sat, 5 Sep 2020 18:21:39 -0400
Subject: [PATCH 1/2] Add files via upload

---
 .../KMeansClustering.py                       | 130 ++++++++++++++
 .../KNearestNeighborClassifier.py             |  67 ++++++++
 .../NearestNeighborClassifier.py              | 112 ++++++++++++
 .../README.md                                 |  97 +++++++++++
 .../KNearestNeighborClassifier.cpython-37.pyc | Bin 0 -> 1500 bytes
 .../NearestNeighborClassifier.cpython-37.pyc  | Bin 0 -> 2756 bytes
 .../NearestNeighborClassifier.cpython-38.pyc  | Bin 0 -> 2761 bytes
 .../ckd.csv                                   | 159 ++++++++++++++++++
 8 files changed, 565 insertions(+)
 create mode 100644 Summer-2020-Data-Analysis-Project-master/KMeansClustering.py
 create mode 100644 Summer-2020-Data-Analysis-Project-master/KNearestNeighborClassifier.py
 create mode 100644 Summer-2020-Data-Analysis-Project-master/NearestNeighborClassifier.py
 create mode 100644 Summer-2020-Data-Analysis-Project-master/README.md
 create mode 100644 Summer-2020-Data-Analysis-Project-master/__pycache__/KNearestNeighborClassifier.cpython-37.pyc
 create mode 100644 Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-37.pyc
 create mode 100644 Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-38.pyc
 create mode 100644 Summer-2020-Data-Analysis-Project-master/ckd.csv

diff --git a/Summer-2020-Data-Analysis-Project-master/KMeansClustering.py b/Summer-2020-Data-Analysis-Project-master/KMeansClustering.py
new file mode 100644
index 0000000..31e4484
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-master/KMeansClustering.py
@@ -0,0 +1,130 @@
+# =============================================================================
+# KMeansClustering.py
+# Name: Alycia Wong and Brandon Wong
+# Date: June 2020
+# Description: Process and graph a CSV file containing biomedical data that 
+# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+# Randomly generate up to 10 centroids without issue. Each centroid will have a
+# classification. The nearest centroid to a point will determine the point's 
+# classicfication (decide what to do if the distances are equal yourself).
+# Create random test cases until centroids stop mocing and determine whether 
+# each case is likely to have CKD depending on the classification of the
+# nearest centroid.
+# Bonus: Create lines roughly separating each centroid group
+# =============================================================================
+
+# =============================================================================
+# Import statements
+# =============================================================================
+import matplotlib.pyplot as plt
+import numpy as np
+import NearestNeighborClassifier as NNC
+from scipy.spatial import KDTree as kdt
+
+# =============================================================================
+# Functions
+# =============================================================================
+# randomCentroids function takes in an integer number of clusters to be
+# generated. 
+# OR asks for k number of integer clusters
+# Outputs a 2D array filled with random values between 0-1. The 
+# first column represents glucose and the second column represents hemoglobin.
+# There are k number of rows representing the number of centroids and the
+# classification of each centroid (i.e.: row index = classification value).
+# OR you can have a third column with the classification value.
+def randomCentroids(k):
+    return np.random.rand(k,2)
+
+# assignCentroids function takes in an array of normalized x (hemoglobin) and y 
+# (glucose) values from the CSV file and the randomly generated array of 
+# centroids from randomCentroids. Using the findDistance function from 
+# NearestNeighborClassifier, points are assigned the same classification as the 
+# nearest centroid. A 2D array of the normalized data and its classification 
+# are returned.
+def assignToCentroids(normArr, centArr):
+    return kdt(centArr).query(normArr)[1]
+# print(assignToCentroids(NNC.normalizeData(NNC.openCSVFile('ckd.csv')).paras, np.array([[.5, .5],[.25,.25]])))
+
+# updateCentroids function inputs the 2D array of centroid locations and of 
+# classified and normalized CSV data. The average x (hemo) and y (gluc) 
+# positions of all data points for each classifications are found and an 
+# updated 2D array with these average cartesian points as the location for the
+# new centroids is returned along with the original cartesian points. 
+#avg of all 1s will be new cent, avg of all 0s will be new cent
+
+def updateCentroids(centArr, classArr, normArr):
+    upCentArr = centArr.copy()
+    for i in range(len(centArr[:,0])):
+        upCentArr[i,0] = np.mean(normArr.gluc[classArr==i])
+        upCentArr[i,1] = np.mean(normArr.hemo[classArr==i])
+    return upCentArr
+# centArr = np.array([[0.5, 0.5], [.25, .25]])
+# print(updateCentroids(
+#     centArr, assignToCentroids(
+#         NNC.normalizeData(NNC.openCSVFile('ckd.csv')).paras, centArr),
+#         NNC.normalizeData(NNC.openCSVFile('ckd.csv'))
+#     ))
+# print(centArr)
+
+# iterate void function can either
+# a) input information and iterate the original information until centArr ~ 
+#     upCentArr
+def iterate(normArr, centArr):
+    # classArr = np.zeros(len(normArr.gluc))
+    classArr = assignToCentroids(normArr, centArr)
+    upCentArr = updateCentroids(centArr, classArr, normArr)
+    # print(classArr)
+    if (upCentArr != centArr).any():
+        centArr = upCentArr
+        return iterate(normArr, centArr)
+    return centArr
+print(iterate(
+    NNC.normalizeData(NNC.openCSVFile('ckd.csv')), np.array([[.5, .5],[.25,.25]])
+    ))
+
+# graphClusters void function takes in a 1D and a 2D numpy array to graph. The
+# 1D array of centroid locations and classifactions have distinct points on the 
+# graph. The 2D array graphs points of normalized CSV data and colors them the
+# same color as their corresponding centroids. A legend is generated in a
+# reasonable position.
+# Bonus: Create lines roughly separating each centroid group
+def graphClusters():
+    
+    return
+
+# dataAnalysis void function takes in the original parsed CSV classifications 
+# and the final classifications of the data based on K-means clustering (use of
+# centroids) and compares the two to find false/true positives/negatives.
+# Note: This should only run when there are two centroids (i.e.: k = 2)
+# False positive: Percentage of non-CKD were incorrectly labelled by K-Means as
+# being in the CKD cluster
+# True positive (sensitivity): Percentage of CKD patients were correctly 
+# labeled by K-Means 
+# False negative: Percentage of non-CKD were incorrectly labelled by K-Means as
+# being in the CKD cluster
+# True negative (specificity): Percentage of non-CKD patients were correctly 
+# labelled by K-Means 
+# Note: True positive (~93 %) + False positive (~7%) = 100%
+# Note: True Negative (~100%) + False negative (~0%) = 100%
+def dataAnalysis():
+    return 
+# =============================================================================
+# Main Script
+# =============================================================================
+# mainDriver function takes in nothing and graphs both the orginial CSV file,
+# the k number of nearest neighbors, and the test case. This function returns 
+# 0.
+def mainDriver():
+    # Open the CSV file using the parsing method from 
+    # NearestNeighborClassifier. No input, outputs 2D numpy array.
+    NNC.openCSVFile
+    
+    # Normalize data using method from NearestNeighborClassifier. Input and
+    # outputs a 2D numpy array
+    NNC.normalizeData()
+    
+    # Graph CSV file using method from NearestNeighborClassifier. Input 2D 
+    # numpy array. Void function.
+    NNC.graphCSVFile()
+    
+    return 0
\ No newline at end of file
diff --git a/Summer-2020-Data-Analysis-Project-master/KNearestNeighborClassifier.py b/Summer-2020-Data-Analysis-Project-master/KNearestNeighborClassifier.py
new file mode 100644
index 0000000..5f53890
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-master/KNearestNeighborClassifier.py
@@ -0,0 +1,67 @@
+# =============================================================================
+# KNearestNeighborClassifier.py
+# Name: Alycia Wong and Brandon Wong
+# Date: June 2020
+# Description: Process and graph a CSV file containing biomedical data that 
+# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+# Create a random test case and determine whether the case is
+# likely to have CKD depending on the mode of the classifications of the
+# k number of nearest points.
+# =============================================================================
+
+# =============================================================================
+# Import statements
+# =============================================================================
+import matplotlib.pyplot as plt
+import numpy as np
+import NearestNeighborClassifier as NNC
+from statistics import mode
+
+# =============================================================================
+# Functions
+# =============================================================================
+# findDistanceArray inputs a numpy array, a random point, and an integer k and
+# uses the findDistance function from NearestNeighborClassifier. The function
+# outputs a 1D array containing the k number of nearst points to the random
+# test case.
+def findDistanceArray(normArr, testCase, k):
+    distArr = np.zeros(normArr.len)
+    for i in range(len(distArr)):
+        distArr[i] = NNC.findDistance(normArr.hemo[i], normArr.gluc[i], testCase[1], testCase[0])
+        kindex = np.argsort(distArr)[:k]
+    return kindex
+
+# graphKNearestNeighbor void function takes in two 1D and one 2D numpy arrays
+# to graph. One of the 1D arrays is a random testCase with its own distinct
+# points. The other 1D array is used to circle the k number of points closest 
+# to the test case. The 2D array contains information parsed from the CSV 
+# column. The first column (hemoglobin) is graphed as the x-axis and the second
+# column (glucose) as the y-axis. The third column  (classification) determines
+# the color of the points. A legend is generated in a reasonable position.
+def graphKNearestNeighbor(testCase, normArr, k):
+    kindex = findDistanceArray(normArr, testCase, k)
+    NNC.graphCSVFile(normArr)
+    plt.scatter(testCase[1], testCase[0],
+                c = ('b' if mode(normArr.disease[kindex])==0 else 'r'),
+                label = 'Test Case',
+                marker = "x")
+    plt.scatter(normArr.hemo[kindex], normArr.gluc[kindex],
+                c='y', label = 'Nearest neighbor(s)')
+    print("butts")
+    plt.legend(fontsize="small")
+    plt.show()
+    return
+
+# =============================================================================
+# Main Script
+# =============================================================================
+# mainDriver function takes in nothing and graphs both the orginial CSV file,
+# the k number of nearest neighbors, and the test case. This function returns 
+# 0.5
+def mainDriver():
+    val = int(input("How many neighbors are you looking for: "))
+    test = NNC.createTestCase()
+    normal = NNC.normalizeData(NNC.openCSVFile('ckd.csv'))
+    graphKNearestNeighbor(test, normal, val)
+    return 0
+mainDriver()
\ No newline at end of file
diff --git a/Summer-2020-Data-Analysis-Project-master/NearestNeighborClassifier.py b/Summer-2020-Data-Analysis-Project-master/NearestNeighborClassifier.py
new file mode 100644
index 0000000..cb298ce
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-master/NearestNeighborClassifier.py
@@ -0,0 +1,112 @@
+# =============================================================================
+# NearestNeighborClassifier.py
+# Name: Alycia Wong and Brandon Wong
+# Date: June 2020
+# Description: Process and graph a CSV file containing biomedical data that 
+# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+# Create n number of random test cases and determine whether the case is
+# likely to have CKD depending on the classification of the nearest point.
+# =============================================================================
+
+# =============================================================================
+# Import statements
+# =============================================================================
+import matplotlib.pyplot as plt
+import numpy as np
+
+# =============================================================================
+# Classes
+# =============================================================================
+class Butts:
+    def __init__(self, data):
+        self.gluc = data[:,0]
+        self.hemo = data[:,1]
+        self.disease = data[:,2]
+        self.len = len(data)
+        self.all = data[:,:3]
+        self.paras = data[:,:2]
+        self.shape = np.shape(data)
+        self.colmax = np.amax(data, axis = 0)
+        self.colmin = np.amin(data, axis = 0)
+
+# =============================================================================
+# Functions
+# =============================================================================
+# Parses in file and turns it into Butts class of data
+def openCSVFile(fileName):
+    return Butts(np.genfromtxt(fileName, delimiter=',',skip_header=1))
+
+# Takes in butts class
+# Loops over data normalizing it for every row
+# returns normalized butts class data
+def normalizeData(dataArr):
+    normArr = np.zeros(dataArr.shape)
+    for i in range(len(normArr)):
+        normArr[i] = (dataArr.all[i] - dataArr.colmin) / (dataArr.colmax - dataArr.colmin)
+    return Butts(normArr)
+
+# graphCSVFile void function takes in a 2D numpy array and graphs with the
+# first column (hemoglobin) as the x-axis and second column (glucose) as the 
+# y-axis. The third column (classification) is used to determine the color of
+# the points on the graph.
+def graphCSVFile(normArr):
+    plt.scatter(normArr.hemo[normArr.disease==0], normArr.gluc[normArr.disease==0],
+                c='b', label='No CKD' )
+    plt.scatter(normArr.hemo[normArr.disease==1], normArr.gluc[normArr.disease==1],
+                c='r', label='CKD')
+    plt.title('Hemoglobin and Glucose levels')
+    plt.xlabel('Hemoglobin')
+    plt.ylabel('Glucose')
+    return
+# findDistance function is either:
+# a) takes in an array and a point and returns an array of distances or the
+# minimum distance or
+# B) takes in cartesian coordinates and uses a simple use of the distance
+# formula to return the distance between the two points.
+def findDistance(x1, y1, x2, y2):
+    return np.sqrt((x1-x2)**2+(y1-y2)**2)
+
+# createTestCase function creates two random test cases (hemoglobin and 
+# glucose) from 0-1 and: 
+# creates a new 1D array with the two points
+# return the points raw
+def createTestCase():
+    return np.random.rand(2)
+
+# nearestNeighborIndex takes in the test case point and returns the index of the
+# nearest point to the test case
+def nearestNeighborIndex(testCase, normArr):
+    distArr = np.zeros(normArr.len)
+    for i in range(len(distArr)):
+        distArr[i] = findDistance(normArr.hemo[i], normArr.gluc[i], testCase[1], testCase[0])
+    nni = distArr.argmin()
+    return nni
+
+# graphNearestNeighbor void function takes in a 2D numpy array (and a cartesian 
+# coordinate depending on createTestCase) and graphs the first column 
+# (hemoglobin) as the x-axis and the second column (glucose) as the y-axis
+# the third column (classification) determines the color of the points. A 
+# randomly generated test case is graphed as a distinct point with a 
+# line connecting it to the nearest neighbor whose classification it takes on.
+# A legend is generated in a reasonable position.
+def graphNearestNeighbor(testCase, normArr):
+    nni = nearestNeighborIndex(testCase, normArr)
+    graphCSVFile(normArr)
+    plt.scatter(testCase[1], testCase[0],
+                c = ('b' if normArr.disease[nni]==0 else 'r'),
+                label = 'Test Case',
+                marker = "x")
+    plt.plot([testCase[1], normArr.hemo[nni]], [testCase[0], normArr.gluc[nni]], 'k-')
+    plt.legend()
+    plt.show()
+    return
+
+# =============================================================================
+# Main Script
+# =============================================================================
+# mainDriver function takes in no inputs and graphs both the orginial CSV
+# file and the test case. This function returns 0.
+def mainDriver():
+    graphNearestNeighbor(createTestCase(), normalizeData(openCSVFile('ckd.csv')))
+    return 0
+# mainDriver()
\ No newline at end of file
diff --git a/Summer-2020-Data-Analysis-Project-master/README.md b/Summer-2020-Data-Analysis-Project-master/README.md
new file mode 100644
index 0000000..d1d53d0
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-master/README.md
@@ -0,0 +1,97 @@
+# Summer-2020-ML-Project
+
+# Nearest Neighbor Classifier Script Description:
+Process and graph a CSV file containing biomedical data that relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+Create n number of random test cases and determine whether the case is likely to have CKD depending on the classification of the nearest point.
+
+# Nearest Neighbor Classifier Function Descriptions:
+openCSVFile function takes in no arguments and parses/organizes data from a CSV file into a 2-D numpy array with the columns being: 
+hemoglobin, glucose, classification and each row being a case.
+
+normalizeData function takes in a 2D numpy array and 
+scales down the first and second columns to range from 0-1 and 
+outputs a 2D array with the normalized data.
+
+graphCSVFile void function takes in a 2D numpy array and graphs with:
+the first column (hemoglobin) as the x-axis and second column (glucose) as the y-axis. 
+The third column (classification) is used to determine the color of the points on the graph.
+
+findDistance function is either takes in cartesian coordinates and
+uses a simple use of the distance formula
+to return the distance between the two points.
+
+createTestCase function creates two random test cases (hemoglobin and glucose) from 0-1 and
+creates/returns a new 1D array with the two points.
+
+graphNearestNeighbor void function takes in a 2D numpy array (and a cartesian 
+coordinate depending on createTestCase) and 
+graphs the first column (hemoglobin) as the x-axis and the second column (glucose) as the y-axis.
+The third column (classification) determines the color of the points. 
+A randomly generated test case is graphed as a distinct point with a line connecting it to the nearest neighbor whose classification it takes on.
+A legend is generated in a reasonable position.
+
+mainDriver function takes in no inputs and graphs both the orginial CSV file and the test case. 
+This function returns 0.
+
+# K Nearest Nearest Neighbor Classifier Script Description:
+
+Process and graph a CSV file containing biomedical data that relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+Create n number of random test cases and determine whether the case is likely to have CKD depending on the mode of the classifications of the k number of nearest points.
+
+# K Nearest Nearest Neighbor Classifier Functions Descriptions:
+
+findDistanceArray inputs a numpy array, a random point, and an integer k and
+uses the findDistance function from NearestNeighborClassifier. 
+The function outputs a 1D array containing the k number of nearst points to the random test case.
+
+graphKNearestNeighbor void function takes in two 1D and one 2D numpy arrays to graph.
+One of the 1D arrays is a random testCase with its own distinct points.
+The other 1D array is used to circle the k number of points closest to the test case.
+The 2D array contains information parsed from the CSV column.
+The first column (hemoglobin) is graphed as the x-axis and the second column (glucose) as the y-axis.
+The third column  (classification) determines the color of the points. 
+A legend is generated in a reasonable position.
+
+mainDriver function takes in nothing and graphs both the orginial CSV file, the k number of nearest neighbors, and the test case.
+This function returns 0.
+
+# K Means Clustering Script Description:
+
+Process and graph a CSV file containing biomedical data that relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+Randomly generate up to 10 centroids without issue. 
+Each centroid will have a classification. 
+The nearest centroid to a point will determine the point's classicfication (decide what to do if the distances are equal yourself).
+Create random test cases until centroids stop mocing and determine whether each case is likely to have CKD depending on the classification of the nearest centroid.
+
+# K Means ClusteringClassifier Functions Descriptions:
+
+randomCentroids function takes in an integer number of clusters to be generated. 
+OR asks for k number of integer clusters
+Outputs a 2D array filled with random values between 0-1. 
+The first column represents hemoglobin and the second column represents glucose.
+There are k number of rows representing the number of centroids and the classification of each centroid (i.e.: row index = classification value).
+OR you can have a third column with the classification value.
+
+assignCentroids function takes in an array of normalized x (hemoglobin) and y (glucose) values from the CSV file and the randomly generated array of centroids from randomCentroids. 
+Using the findDistance function from NearestNeighborClassifier, points are assigned the same classification as the nearest centroid.
+A 2D array of the normalized data and its classification are returned.
+
+updateCentroids function inputs the 2D array of centroid locations and of classified and normalized CSV data.
+The average x (hemo) and y (gluc) positions of all data points for each classifications are found and
+an updated 2D array with these average cartesian points as the location for the new centroids is returned along with the original cartesian points. 
+
+iterate void function can either
+a) input information and iterate the original information until centArr ~ upCentArr
+b) don't input any information and run by itself. Similar to a main script
+The function causes for the centroids to reassign points and update the centroid until the centroids do not move.
+
+graphClusters void function takes in a 1D and a 2D numpy array to graph. 
+The 1D array of centroid locations and classifactions have distinct points on the graph. 
+The 2D array graphs points of normalized CSV data and colors them the same color as their corresponding centroids.
+A legend is generated in a reasonable position.
+
+dataAnalysis void function takes in the original parsed CSV classifications and the final classifications of the data based on K-means clustering (use of centroids) and
+compares the two to find false/true positives/negatives.
+
+mainDriver function takes in nothing and graphs both the orginial CSV file, the k number of nearest neighbors, and the test case. 
+This function returns 0.
diff --git a/Summer-2020-Data-Analysis-Project-master/__pycache__/KNearestNeighborClassifier.cpython-37.pyc b/Summer-2020-Data-Analysis-Project-master/__pycache__/KNearestNeighborClassifier.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..14a61c6cbb1b28751cf21378d2680a221c1a70ac
GIT binary patch
literal 1500
zcma)6O>Y}T7@pbLFFQ_X+5!zo$P$;VP^zlpRE3J*2qBP6r2v&I^D*9;IGgN;nHi_C
zwLTfR<v+wnE}Z!-b446_=Eec>%-XH0N=S_Ke$33fGyA^J^Uf#R+iimJ=Rbda_j`ws
zKiuTS@Zfw5(Vs&JB4|zuI`HV)X9K@k3kD&qmq$fBh_T0n{}UOsL?A-gwunRwyCt`l
z1iSQvB(3K-kMt92{h|^wWp0WQLSje{qJMxg<c8Md+*{EFyXqTeyqYcPk{*+9A1{34
z-vrnVYk!ZxMvbL{Vn&6xM^=8#YX8uk64z`+1*yHCvAzg?eCH{t1N;f=Kc~1yIhC}m
zS59m&m#Wg1<+8Ma;^kOc*6;TQoztuo$C);~Or`bDWKmgvoKI65aW&SJGD&EovQov7
zQZ_bHn;zHF(u+YP@R?hsnGG*+jJ*1ls9oIo_iw#N!>_eedN{&eV)$h#k5zUlhsRYq
zEo5o*@X5@aRpszxS`<<}QPsIj&BF%|AAEn*d%9UYaPxy>Zur4b$@7`c^uZ?bpy1j_
z_5TCE=u58f^L?3(&qkH%<y`CRG?VK7WM+3?tpZZwvv-k(Za73c^fuk6Z&7szYcEmt
zF^M1zs=vT7z^4WlOSWK!-gq@#QA!Lu_iOOOVd>B_K}~@z+apV$dEu!;!Aei~j#nbO
z4mMv<;Eou?0^>a*V$l-qYahEUv9(;E+ZG+MeckvsTPw#x0_=DC$`P-@-qMk!YTo(^
ziFenqT_w!Y)CM^p$=rqoR~Hfp&gOSE1?rZY3hnD8@zmSytb0&{U^F#G+dvmQ&q4C|
zv;vOVTqd2SGRkq>cE*ZN&Uz=`e46E;*d#YL(kV9pOS#5tSR=t8)p94h3fo|!veMWv
zmt$E9>+7@XS>mg07l7s^YR9QtPC0h1)opn8&E~e(u}!e0-9Hmx*QU4VJ?hbz-i04y
zC8p|q?4>Ti4OgxJ(u3#^p$swTH`kF1V!Ru+A`1%ZLjr-WMc;ZR1lnFrmdw2ksJq{v
z?|)W3>lVD6y{w_`f^^+kHSOkAg%%liPb>APJCD)}aX;0U>kf@9bNL5ZIhmTKjDww2
zNp7SAuEA~Fbs*1CZPyNVtD4AiQ{*NYpvo?JZhhCFHe5%)UafY)v$E+!^^wc<1-)aG
zs6*8USW7x~x8P=yS0>L!XbtDtaBbClh_FF9EhaPN_;2_Av#Zu=wDGPOyb@We8}!#i
man)~bd*ufkU>i*1L%g2TTt0pSVcJ3OAOmo%cqhIahyMc6e1<Ck

literal 0
HcmV?d00001

diff --git a/Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-37.pyc b/Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e3fd4859571ab5e62655b4f398ec335c4ed1ed4f
GIT binary patch
literal 2756
zcma)8NpBlB6drOGjpQYc<7R2m!Y$H8LG7k)E)9Ypj*X@Zpae+T0ESaxEYkMKvnV9(
z%7)xSogDKIda%(`5B)>D_LOsPIrV!SSzgjZDUk1xd>lSLe&3_+PfRpCls{ho^Yd?)
zJnwJn92XzTJygApMtFi{Ud~oHt8{-Qc;E>x{1=|^5BN$b0ujFORw5CJ8e~nxq7E60
zh8Tmai*Ydl*$|WB0_2#O5*H!I#k9BtIU(3%uQ~G?dwH!6wa0}+xreG}&`58E32%i9
zCOB64!pEw>hWEFP(H%0SLNY*ILe;;c8DNp{USiogzvd0R_Q?nzj<Au9KOEu4AFu-k
zYslexPXsTIR|Ka<{n2P>qQlyN9nrd)iMlbYm<V5S^y}z1PW8ux2lu0vV({^`YsmH2
z6#6E!LDElkbKC|S*>=YUn=&tLO{7{TT3SDoh4qsxvtch$iMG7x*-&pLJ&A>R(zj8k
z%t)e_7F91Tn%o9jX6rT(iAnzS)H~Sg&0kBmSAWz}>D4ur6k_#ZA(vIUBUhKp&UP*f
zqgNm8n$5CUeY~CLQaw^-S9Z+8&4ruo#iggi(K+hRE#td$i$#*{>Qv7SE$8w?8>#-k
zVM``TYSWVG#^ze7ma;_abUl@7zPD@Rb~`On({7XVdK!(#rroc>F5@@Orr6%3!^Hft
zX$vsHx+2MCyKNioc3z6@jP&t#`}uZ~RWs@eKIVdQs<}*KbTb4&O-jsMze9IMg=B>4
z5Jy(VGpvg{C!h-dm486+4q9*6=gQqKw{;=2G*1x~JEphNUVBp}LaL_kzN{wj5!=|1
z#kwkU(>K6Fyq;#Vg#(;(&5o5lSu8#N=^M;n$2#Yt$J8wJk&CqLh!o$WW4uFvw9Aj!
zKI`8yz5$>Q@LTqtUEhc6-EiQ@(v7ZyGTj;~wD&-r0l(utfA^_p0B(0M)@s%(kHbBw
zO06dGeH#Kx8&Xk<QDfB5O{pnrD27cRXa-mom9n*>R5>XuJ%6|fno6>CPZF|jV4;rD
zc(~<hcAcqDppOoD<{ox1W4x|AB%JpJ+o}!}ob>2+MDQwLXD!1smaa#M?bz}#6!8(G
zHHVR?54D<Tkmfi&R&m<0wY{iS&My714C1kl4YOoTW)1?DDeGfm@BME<?2W8kON&{M
zZ}tJ$UTQg;$sL(#+c=rFwc%Vdwtg=&wx&CY0fH1oMAfOOp|PP!O(t#BciY)$*U2VR
zgpTt89^X(&Z*vHrTQJacMIM`F4ObhG4W_=rScd>ag=C28@csdm3QPlj6^Oq?5b-PR
zig!fF=PgA`n!G|upr0!PLh}BXmhYnW7a$kj@@zdV#B!=lQgo!-if{v9e~q?7>Tw}(
zJA-j3bmxc2^=EeWtDPgtZ)I)+lBf2gOrnVAcCw>nV&oHKpCu$(V#+a}?m!<sGIV?j
zw%hgz6c+-J9(>~c+u#!)@FAaW>^pXWQ9hm@){UG*sOT{g0Odh2-~+#6)qo$?j=cR~
z5Fl`V%Yntc5Y5<~L4AnLDpq|&8U>>QfV=xBQ5#73&CteXcm{Xv?&&b#nEDtCtX~vw
zQEjF;n{~e{gzP_rwI&W%jJv^*`xem~1%hHf3dFDI7#s?Tf$wvJSM`8+!n;1@!sFv2
z1UT?D9DR$cTf#3m{e2*Yy$Z?tFdSM_s9?82nUuK8kcM0z{PUe<e1znS6?Kx1d-YQx
zQ=h^gMM-0?PB%JBS87=wS@tuUX`-=Fo~SLU_W0IZv*FI>z^>-W;tOhsSvKfpr8ynw
zOd>rCyr-MxFZkeHvUCKyN18*_39M!u%g#M(&tT~O*LYx9j8|)haY%|WqWl1}9n?`7
zG43jc=!H4J$qn)v!R&zknsEo2y;^5W%y;xo1)IQqNfCNowsh~pDgREKY~)E=xR+lQ
zu-1<5a7ulPUUSl3%o9UV$kH{u!=%}$k}g;_EVlFBZiP~!YPAz(=qjQPs?HBWP~{eV
j`ahkC?^I9pU3yGhl*U;cPEN&9+=ws4wK$9?<BR_S5iM2(

literal 0
HcmV?d00001

diff --git a/Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-38.pyc b/Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..15a1e1132c9168b61b181b8af373fd7262ae29e3
GIT binary patch
literal 2761
zcma)8OK%%D5GJ_~t>j0Y#7!I&E!-e&qo8(EH>V(Nli0`u1%v=eTfj&mtVLR0d3U97
zY1cO7qJR?g<bTkEeav6twWs`r07d%^SF$Cihpxbx;c~bf4!>`xA7^H2jGpLu^Y=@P
z{pC7i!h?Met=NQ+O!Aavd|mL;_SXGJObY3}V$vIk^+5VEc*WL38OjQDMMkm;9m$%U
zg09MGIRjmjv+@JzDLE%EKu^ny@)Gon<j+`r{w>yGjTXuAN1$^Lt(b>UY@JKCE+m%%
zGd<~Hrf-7>JH`|(aveQn`)I3Z#h(y;OqA?3rf!O>tk0TfZ^ZBoKlSm3Z-nvse8Axi
zJ)F;^|9UjTAA9Saz70%xT<P-@npZJlCxRE3!I40}ihgaZKP4Hq5H>VNjBlNw(BIM#
z)VcNJy`-p5Tfd#|w5-3SvaYSjq)>68te2|XdU2ZCpcm`7up;l-pxBCg3KO$<&xWmT
zN*cW+FMCN|7uGLSx@mnGoA?dWA7ZU{e>}SL;@M7?Dg9J;JE~<?ZmryE-e3KB_;!i<
zOKX_1bU%;N{URxrhMr4VTo|SQzhgtiS{0_DlJ?d{SFfgVQ6!s*(#ySl8#S9ro|tBn
z!YVF8Fn-bfYWy;O(|nE}%(@d_9=SFE0?f<fOf{Rf)@){7xs#GT-E6+xiPLgKU%|)R
zcE`kJdPg}YfE8p!-1S>@8l#8o5Y6F<JPk9vgA*kvO7F-U5QzQ8JN~71XD4h`sx-+G
zWX4Vv+expvrDCac-E&{pGx&(DwN<{UyP4TDIPGXNNmT=zI2WFs>h@H=`s}BN7{88r
zE=<PtBJ7DUY3WngKSal{W5BQ@PWT}=o&jJ7_^q*Dt{)=!PSAH`=!6TPM5lrV>myL3
zFYdCJ3kJ}28{>_7wTw78P`X>_S$x|DI?mflQ!&v~B<QsC90|%}-2+wunEP7WO5W8O
z89X_^xfvRY)8s%Ax^7}(5kW9_($~2D9QMQ}C(dFQqlR_dCK0|T`F6P_|ExzRB>f|w
zPkIIuo~}nFY~*<y$moRAoa0bdhgL<_$a1WmDp_m!#zENVF0THt25Rw^4bpf+r49y`
zYwKa);N!=jZaeL6B>7^T%f(0FbGJ~7soGU(VQXi@wlW;6N7n15##V|}Y=9z7S<zJz
z6$l%c#H7lGdu}-!?mJ!Qnh<h6$<u8e_qK)*x($b70fO;GE|~Zy6eC^Z^wZy9tVKW?
zJ!A)H4*#FPD1p@%3t+${0*qH$SJ(-d@J2K=O{t3#HGc6@8<14&eQm`)+TIHE%6n06
zCb?Wo3KQoo^#JonK)(aKg*NdJ*j+r+FbD(3sP447rVWB;j$$D|D!ZBWNslc^<wX<u
z?QBb{*r?|yNvo)<Pw;NE?zgZf?iI=bW#6s)6o$(KDgg+F+ro&x7!vH}p`#lp=Hb5K
zozQ7yi*6qYj1K(1=zB!7zBsO&utUG^qlS15ha7hy)FZbC{V5hIX?Be)%0`I>2ZS)z
zZPfRAU?Vf!gTsKkDGW%bKf?s;<@tNuo6jdp@snJty#~As_Yc8Q@CHXgq{Em<1SS78
z5x=2huq{*zfi&PE9T0Iu$D;x{+AxxUEnmgvw}rkfy<E^g2cq0jK;Fl}(3_G4zc!Uu
zA~Q#2a(xiebDq%&bGk~}k+C|KHvI+S(Nswes&uN0bgY)|0TMT8qz+-jEY{mfABgRx
zdd=<6p<XYO$5$kXYS!<iT{E8SRH0@|T%lXtU-7~3$<s0JK3NV`XV98=v^#g-y?|pu
z$YY>dgzIdc3oiZ+{hR0^*N-vMqT%u9lzhZnx_k!6m_fNCxDDvPnQ-7asI<1_a;w-a
z0pz={X~MB9r0&WXi}EbgS{5hynof3=&fr}*_9HY+e5lXb3t4O^7iqGAYnm(@mfEGv
z2Ki3b+b{7-)xTVcig%e;2U-^gVW|v@KK-A;M0d*@{5!g5++V+G9tE>=Q5e;t*{Bi)
HkQe?1GD1bi

literal 0
HcmV?d00001

diff --git a/Summer-2020-Data-Analysis-Project-master/ckd.csv b/Summer-2020-Data-Analysis-Project-master/ckd.csv
new file mode 100644
index 0000000..8c30b6b
--- /dev/null
+++ b/Summer-2020-Data-Analysis-Project-master/ckd.csv
@@ -0,0 +1,159 @@
+Glucose,Hemoglobin,Class
+117,11.2,1
+70,9.5,1
+380,10.8,1
+157,5.6,1
+173,7.7,1
+95,9.8,1
+264,12.5,1
+70,10,1
+253,10.5,1
+163,9.8,1
+129,9.1,1
+133,10.3,1
+76,7.1,1
+280,13,1
+210,16.1,1
+219,10.4,1
+295,9.2,1
+118,11.4,1
+224,8.1,1
+128,8.2,1
+118,12,1
+105,11.1,1
+288,7.9,1
+273,8.3,1
+122,12.6,1
+303,10.4,1
+102,8.7,1
+107,8.3,1
+117,10,1
+239,9.5,1
+94,9.9,1
+129,8.1,1
+252,11.2,1
+255,7.3,1
+253,10.9,1
+214,10.9,1
+490,11.5,1
+163,7.9,1
+241,9.6,1
+214,9.4,1
+106,8.6,1
+424,12.6,1
+176,3.1,1
+140,15,0
+70,17,0
+82,15.9,0
+119,15.4,0
+99,13,0
+121,13.6,0
+131,14.5,0
+91,14,0
+98,13.9,0
+104,16.1,0
+131,14.1,0
+122,17,0
+118,15.5,0
+117,16.2,0
+132,14.4,0
+97,14.2,0
+133,13.2,0
+122,13.9,0
+121,15,0
+111,14.3,0
+96,13.8,0
+139,14.8,0
+125,16.5,0
+123,15.7,0
+112,14.5,0
+140,16.3,0
+130,15.5,0
+123,14.6,0
+100,16.9,0
+94,16,0
+81,14.7,0
+93,16.6,0
+124,14.9,0
+89,16.7,0
+125,16.8,0
+91,13.5,0
+127,15.1,0
+96,16.9,0
+128,13.1,0
+122,17.1,0
+128,15.2,0
+137,13.6,0
+81,13.9,0
+102,13.2,0
+132,13.7,0
+104,17.3,0
+131,15.6,0
+102,15,0
+120,17.4,0
+105,15.7,0
+109,13.9,0
+130,15.9,0
+100,14,0
+109,15.8,0
+120,13.4,0
+80,14.1,0
+130,13.5,0
+99,17.7,0
+134,14.2,0
+92,14,0
+132,17.8,0
+88,13.3,0
+100,14.3,0
+130,13.4,0
+95,15,0
+111,16.2,0
+106,14.4,0
+97,13.5,0
+108,17.8,0
+99,13.6,0
+83,17.5,0
+109,15,0
+86,13.6,0
+102,14.6,0
+95,15,0
+87,17.1,0
+107,13.6,0
+117,13,0
+88,17.2,0
+105,14.7,0
+70,13.7,0
+89,15,0
+118,14.8,0
+81,15,0
+125,17.4,0
+82,14.9,0
+107,13.6,0
+83,16.2,0
+79,17.6,0
+109,15,0
+133,13.7,0
+111,16.3,0
+74,15.1,0
+88,16.4,0
+97,13.8,0
+78,16.1,0
+113,15.3,0
+75,16.8,0
+119,13.9,0
+132,15.4,0
+113,16.5,0
+100,16.4,0
+93,16.7,0
+94,15.5,0
+112,17,0
+99,15,0
+85,15.6,0
+133,14.8,0
+117,13,0
+137,14.1,0
+140,15.7,0
+75,16.5,0
+100,15.8,0
+114,14.2,0
+131,15.8,0

From 9ea6086d66bbb319a46358fa2c24058d7e84e8bf Mon Sep 17 00:00:00 2001
From: awong15 <46977254+awong15@users.noreply.github.com>
Date: Sun, 6 Sep 2020 10:10:01 -0400
Subject: [PATCH 2/2] Changed folder name

---
 .../KMeansClustering.py                       | 258 +++++++-------
 .../KNearestNeighborClassifier.py             | 132 ++++----
 .../NearestNeighborClassifier.py              | 222 ++++++------
 .../README.md                                 |   0
 .../KNearestNeighborClassifier.cpython-37.pyc | Bin
 .../NearestNeighborClassifier.cpython-37.pyc  | Bin
 .../NearestNeighborClassifier.cpython-38.pyc  | Bin
 .../ckd.csv                                   | 318 +++++++++---------
 8 files changed, 465 insertions(+), 465 deletions(-)
 rename {Summer-2020-Data-Analysis-Project-master => Summer-2020-Data-Analysis-Project-Brandon-Branch}/KMeansClustering.py (97%)
 rename {Summer-2020-Data-Analysis-Project-master => Summer-2020-Data-Analysis-Project-Brandon-Branch}/KNearestNeighborClassifier.py (97%)
 rename {Summer-2020-Data-Analysis-Project-master => Summer-2020-Data-Analysis-Project-Brandon-Branch}/NearestNeighborClassifier.py (97%)
 rename {Summer-2020-Data-Analysis-Project-master => Summer-2020-Data-Analysis-Project-Brandon-Branch}/README.md (100%)
 rename {Summer-2020-Data-Analysis-Project-master => Summer-2020-Data-Analysis-Project-Brandon-Branch}/__pycache__/KNearestNeighborClassifier.cpython-37.pyc (100%)
 rename {Summer-2020-Data-Analysis-Project-master => Summer-2020-Data-Analysis-Project-Brandon-Branch}/__pycache__/NearestNeighborClassifier.cpython-37.pyc (100%)
 rename {Summer-2020-Data-Analysis-Project-master => Summer-2020-Data-Analysis-Project-Brandon-Branch}/__pycache__/NearestNeighborClassifier.cpython-38.pyc (100%)
 rename {Summer-2020-Data-Analysis-Project-master => Summer-2020-Data-Analysis-Project-Brandon-Branch}/ckd.csv (91%)

diff --git a/Summer-2020-Data-Analysis-Project-master/KMeansClustering.py b/Summer-2020-Data-Analysis-Project-Brandon-Branch/KMeansClustering.py
similarity index 97%
rename from Summer-2020-Data-Analysis-Project-master/KMeansClustering.py
rename to Summer-2020-Data-Analysis-Project-Brandon-Branch/KMeansClustering.py
index 31e4484..0edbcf2 100644
--- a/Summer-2020-Data-Analysis-Project-master/KMeansClustering.py
+++ b/Summer-2020-Data-Analysis-Project-Brandon-Branch/KMeansClustering.py
@@ -1,130 +1,130 @@
-# =============================================================================
-# KMeansClustering.py
-# Name: Alycia Wong and Brandon Wong
-# Date: June 2020
-# Description: Process and graph a CSV file containing biomedical data that 
-# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
-# Randomly generate up to 10 centroids without issue. Each centroid will have a
-# classification. The nearest centroid to a point will determine the point's 
-# classicfication (decide what to do if the distances are equal yourself).
-# Create random test cases until centroids stop mocing and determine whether 
-# each case is likely to have CKD depending on the classification of the
-# nearest centroid.
-# Bonus: Create lines roughly separating each centroid group
-# =============================================================================
-
-# =============================================================================
-# Import statements
-# =============================================================================
-import matplotlib.pyplot as plt
-import numpy as np
-import NearestNeighborClassifier as NNC
-from scipy.spatial import KDTree as kdt
-
-# =============================================================================
-# Functions
-# =============================================================================
-# randomCentroids function takes in an integer number of clusters to be
-# generated. 
-# OR asks for k number of integer clusters
-# Outputs a 2D array filled with random values between 0-1. The 
-# first column represents glucose and the second column represents hemoglobin.
-# There are k number of rows representing the number of centroids and the
-# classification of each centroid (i.e.: row index = classification value).
-# OR you can have a third column with the classification value.
-def randomCentroids(k):
-    return np.random.rand(k,2)
-
-# assignCentroids function takes in an array of normalized x (hemoglobin) and y 
-# (glucose) values from the CSV file and the randomly generated array of 
-# centroids from randomCentroids. Using the findDistance function from 
-# NearestNeighborClassifier, points are assigned the same classification as the 
-# nearest centroid. A 2D array of the normalized data and its classification 
-# are returned.
-def assignToCentroids(normArr, centArr):
-    return kdt(centArr).query(normArr)[1]
-# print(assignToCentroids(NNC.normalizeData(NNC.openCSVFile('ckd.csv')).paras, np.array([[.5, .5],[.25,.25]])))
-
-# updateCentroids function inputs the 2D array of centroid locations and of 
-# classified and normalized CSV data. The average x (hemo) and y (gluc) 
-# positions of all data points for each classifications are found and an 
-# updated 2D array with these average cartesian points as the location for the
-# new centroids is returned along with the original cartesian points. 
-#avg of all 1s will be new cent, avg of all 0s will be new cent
-
-def updateCentroids(centArr, classArr, normArr):
-    upCentArr = centArr.copy()
-    for i in range(len(centArr[:,0])):
-        upCentArr[i,0] = np.mean(normArr.gluc[classArr==i])
-        upCentArr[i,1] = np.mean(normArr.hemo[classArr==i])
-    return upCentArr
-# centArr = np.array([[0.5, 0.5], [.25, .25]])
-# print(updateCentroids(
-#     centArr, assignToCentroids(
-#         NNC.normalizeData(NNC.openCSVFile('ckd.csv')).paras, centArr),
-#         NNC.normalizeData(NNC.openCSVFile('ckd.csv'))
-#     ))
-# print(centArr)
-
-# iterate void function can either
-# a) input information and iterate the original information until centArr ~ 
-#     upCentArr
-def iterate(normArr, centArr):
-    # classArr = np.zeros(len(normArr.gluc))
-    classArr = assignToCentroids(normArr, centArr)
-    upCentArr = updateCentroids(centArr, classArr, normArr)
-    # print(classArr)
-    if (upCentArr != centArr).any():
-        centArr = upCentArr
-        return iterate(normArr, centArr)
-    return centArr
-print(iterate(
-    NNC.normalizeData(NNC.openCSVFile('ckd.csv')), np.array([[.5, .5],[.25,.25]])
-    ))
-
-# graphClusters void function takes in a 1D and a 2D numpy array to graph. The
-# 1D array of centroid locations and classifactions have distinct points on the 
-# graph. The 2D array graphs points of normalized CSV data and colors them the
-# same color as their corresponding centroids. A legend is generated in a
-# reasonable position.
-# Bonus: Create lines roughly separating each centroid group
-def graphClusters():
-    
-    return
-
-# dataAnalysis void function takes in the original parsed CSV classifications 
-# and the final classifications of the data based on K-means clustering (use of
-# centroids) and compares the two to find false/true positives/negatives.
-# Note: This should only run when there are two centroids (i.e.: k = 2)
-# False positive: Percentage of non-CKD were incorrectly labelled by K-Means as
-# being in the CKD cluster
-# True positive (sensitivity): Percentage of CKD patients were correctly 
-# labeled by K-Means 
-# False negative: Percentage of non-CKD were incorrectly labelled by K-Means as
-# being in the CKD cluster
-# True negative (specificity): Percentage of non-CKD patients were correctly 
-# labelled by K-Means 
-# Note: True positive (~93 %) + False positive (~7%) = 100%
-# Note: True Negative (~100%) + False negative (~0%) = 100%
-def dataAnalysis():
-    return 
-# =============================================================================
-# Main Script
-# =============================================================================
-# mainDriver function takes in nothing and graphs both the orginial CSV file,
-# the k number of nearest neighbors, and the test case. This function returns 
-# 0.
-def mainDriver():
-    # Open the CSV file using the parsing method from 
-    # NearestNeighborClassifier. No input, outputs 2D numpy array.
-    NNC.openCSVFile
-    
-    # Normalize data using method from NearestNeighborClassifier. Input and
-    # outputs a 2D numpy array
-    NNC.normalizeData()
-    
-    # Graph CSV file using method from NearestNeighborClassifier. Input 2D 
-    # numpy array. Void function.
-    NNC.graphCSVFile()
-    
+# =============================================================================
+# KMeansClustering.py
+# Name: Alycia Wong and Brandon Wong
+# Date: June 2020
+# Description: Process and graph a CSV file containing biomedical data that 
+# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+# Randomly generate up to 10 centroids without issue. Each centroid will have a
+# classification. The nearest centroid to a point will determine the point's 
+# classicfication (decide what to do if the distances are equal yourself).
+# Create random test cases until centroids stop mocing and determine whether 
+# each case is likely to have CKD depending on the classification of the
+# nearest centroid.
+# Bonus: Create lines roughly separating each centroid group
+# =============================================================================
+
+# =============================================================================
+# Import statements
+# =============================================================================
+import matplotlib.pyplot as plt
+import numpy as np
+import NearestNeighborClassifier as NNC
+from scipy.spatial import KDTree as kdt
+
+# =============================================================================
+# Functions
+# =============================================================================
+# randomCentroids function takes in an integer number of clusters to be
+# generated. 
+# OR asks for k number of integer clusters
+# Outputs a 2D array filled with random values between 0-1. The 
+# first column represents glucose and the second column represents hemoglobin.
+# There are k number of rows representing the number of centroids and the
+# classification of each centroid (i.e.: row index = classification value).
+# OR you can have a third column with the classification value.
+def randomCentroids(k):
+    return np.random.rand(k,2)
+
+# assignCentroids function takes in an array of normalized x (hemoglobin) and y 
+# (glucose) values from the CSV file and the randomly generated array of 
+# centroids from randomCentroids. Using the findDistance function from 
+# NearestNeighborClassifier, points are assigned the same classification as the 
+# nearest centroid. A 2D array of the normalized data and its classification 
+# are returned.
+def assignToCentroids(normArr, centArr):
+    return kdt(centArr).query(normArr)[1]
+# print(assignToCentroids(NNC.normalizeData(NNC.openCSVFile('ckd.csv')).paras, np.array([[.5, .5],[.25,.25]])))
+
+# updateCentroids function inputs the 2D array of centroid locations and of 
+# classified and normalized CSV data. The average x (hemo) and y (gluc) 
+# positions of all data points for each classifications are found and an 
+# updated 2D array with these average cartesian points as the location for the
+# new centroids is returned along with the original cartesian points. 
+#avg of all 1s will be new cent, avg of all 0s will be new cent
+
+def updateCentroids(centArr, classArr, normArr):
+    upCentArr = centArr.copy()
+    for i in range(len(centArr[:,0])):
+        upCentArr[i,0] = np.mean(normArr.gluc[classArr==i])
+        upCentArr[i,1] = np.mean(normArr.hemo[classArr==i])
+    return upCentArr
+# centArr = np.array([[0.5, 0.5], [.25, .25]])
+# print(updateCentroids(
+#     centArr, assignToCentroids(
+#         NNC.normalizeData(NNC.openCSVFile('ckd.csv')).paras, centArr),
+#         NNC.normalizeData(NNC.openCSVFile('ckd.csv'))
+#     ))
+# print(centArr)
+
+# iterate void function can either
+# a) input information and iterate the original information until centArr ~ 
+#     upCentArr
+def iterate(normArr, centArr):
+    # classArr = np.zeros(len(normArr.gluc))
+    classArr = assignToCentroids(normArr, centArr)
+    upCentArr = updateCentroids(centArr, classArr, normArr)
+    # print(classArr)
+    if (upCentArr != centArr).any():
+        centArr = upCentArr
+        return iterate(normArr, centArr)
+    return centArr
+print(iterate(
+    NNC.normalizeData(NNC.openCSVFile('ckd.csv')), np.array([[.5, .5],[.25,.25]])
+    ))
+
+# graphClusters void function takes in a 1D and a 2D numpy array to graph. The
+# 1D array of centroid locations and classifactions have distinct points on the 
+# graph. The 2D array graphs points of normalized CSV data and colors them the
+# same color as their corresponding centroids. A legend is generated in a
+# reasonable position.
+# Bonus: Create lines roughly separating each centroid group
+def graphClusters():
+    
+    return
+
+# dataAnalysis void function takes in the original parsed CSV classifications 
+# and the final classifications of the data based on K-means clustering (use of
+# centroids) and compares the two to find false/true positives/negatives.
+# Note: This should only run when there are two centroids (i.e.: k = 2)
+# False positive: Percentage of non-CKD were incorrectly labelled by K-Means as
+# being in the CKD cluster
+# True positive (sensitivity): Percentage of CKD patients were correctly 
+# labeled by K-Means 
+# False negative: Percentage of non-CKD were incorrectly labelled by K-Means as
+# being in the CKD cluster
+# True negative (specificity): Percentage of non-CKD patients were correctly 
+# labelled by K-Means 
+# Note: True positive (~93 %) + False positive (~7%) = 100%
+# Note: True Negative (~100%) + False negative (~0%) = 100%
+def dataAnalysis():
+    return 
+# =============================================================================
+# Main Script
+# =============================================================================
+# mainDriver function takes in nothing and graphs both the orginial CSV file,
+# the k number of nearest neighbors, and the test case. This function returns 
+# 0.
+def mainDriver():
+    # Open the CSV file using the parsing method from 
+    # NearestNeighborClassifier. No input, outputs 2D numpy array.
+    NNC.openCSVFile
+    
+    # Normalize data using method from NearestNeighborClassifier. Input and
+    # outputs a 2D numpy array
+    NNC.normalizeData()
+    
+    # Graph CSV file using method from NearestNeighborClassifier. Input 2D 
+    # numpy array. Void function.
+    NNC.graphCSVFile()
+    
     return 0
\ No newline at end of file
diff --git a/Summer-2020-Data-Analysis-Project-master/KNearestNeighborClassifier.py b/Summer-2020-Data-Analysis-Project-Brandon-Branch/KNearestNeighborClassifier.py
similarity index 97%
rename from Summer-2020-Data-Analysis-Project-master/KNearestNeighborClassifier.py
rename to Summer-2020-Data-Analysis-Project-Brandon-Branch/KNearestNeighborClassifier.py
index 5f53890..382bd59 100644
--- a/Summer-2020-Data-Analysis-Project-master/KNearestNeighborClassifier.py
+++ b/Summer-2020-Data-Analysis-Project-Brandon-Branch/KNearestNeighborClassifier.py
@@ -1,67 +1,67 @@
-# =============================================================================
-# KNearestNeighborClassifier.py
-# Name: Alycia Wong and Brandon Wong
-# Date: June 2020
-# Description: Process and graph a CSV file containing biomedical data that 
-# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
-# Create a random test case and determine whether the case is
-# likely to have CKD depending on the mode of the classifications of the
-# k number of nearest points.
-# =============================================================================
-
-# =============================================================================
-# Import statements
-# =============================================================================
-import matplotlib.pyplot as plt
-import numpy as np
-import NearestNeighborClassifier as NNC
-from statistics import mode
-
-# =============================================================================
-# Functions
-# =============================================================================
-# findDistanceArray inputs a numpy array, a random point, and an integer k and
-# uses the findDistance function from NearestNeighborClassifier. The function
-# outputs a 1D array containing the k number of nearst points to the random
-# test case.
-def findDistanceArray(normArr, testCase, k):
-    distArr = np.zeros(normArr.len)
-    for i in range(len(distArr)):
-        distArr[i] = NNC.findDistance(normArr.hemo[i], normArr.gluc[i], testCase[1], testCase[0])
-        kindex = np.argsort(distArr)[:k]
-    return kindex
-
-# graphKNearestNeighbor void function takes in two 1D and one 2D numpy arrays
-# to graph. One of the 1D arrays is a random testCase with its own distinct
-# points. The other 1D array is used to circle the k number of points closest 
-# to the test case. The 2D array contains information parsed from the CSV 
-# column. The first column (hemoglobin) is graphed as the x-axis and the second
-# column (glucose) as the y-axis. The third column  (classification) determines
-# the color of the points. A legend is generated in a reasonable position.
-def graphKNearestNeighbor(testCase, normArr, k):
-    kindex = findDistanceArray(normArr, testCase, k)
-    NNC.graphCSVFile(normArr)
-    plt.scatter(testCase[1], testCase[0],
-                c = ('b' if mode(normArr.disease[kindex])==0 else 'r'),
-                label = 'Test Case',
-                marker = "x")
-    plt.scatter(normArr.hemo[kindex], normArr.gluc[kindex],
-                c='y', label = 'Nearest neighbor(s)')
-    print("butts")
-    plt.legend(fontsize="small")
-    plt.show()
-    return
-
-# =============================================================================
-# Main Script
-# =============================================================================
-# mainDriver function takes in nothing and graphs both the orginial CSV file,
-# the k number of nearest neighbors, and the test case. This function returns 
-# 0.5
-def mainDriver():
-    val = int(input("How many neighbors are you looking for: "))
-    test = NNC.createTestCase()
-    normal = NNC.normalizeData(NNC.openCSVFile('ckd.csv'))
-    graphKNearestNeighbor(test, normal, val)
-    return 0
+# =============================================================================
+# KNearestNeighborClassifier.py
+# Name: Alycia Wong and Brandon Wong
+# Date: June 2020
+# Description: Process and graph a CSV file containing biomedical data that 
+# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+# Create a random test case and determine whether the case is
+# likely to have CKD depending on the mode of the classifications of the
+# k number of nearest points.
+# =============================================================================
+
+# =============================================================================
+# Import statements
+# =============================================================================
+import matplotlib.pyplot as plt
+import numpy as np
+import NearestNeighborClassifier as NNC
+from statistics import mode
+
+# =============================================================================
+# Functions
+# =============================================================================
+# findDistanceArray inputs a numpy array, a random point, and an integer k and
+# uses the findDistance function from NearestNeighborClassifier. The function
+# outputs a 1D array containing the k number of nearst points to the random
+# test case.
+def findDistanceArray(normArr, testCase, k):
+    distArr = np.zeros(normArr.len)
+    for i in range(len(distArr)):
+        distArr[i] = NNC.findDistance(normArr.hemo[i], normArr.gluc[i], testCase[1], testCase[0])
+        kindex = np.argsort(distArr)[:k]
+    return kindex
+
+# graphKNearestNeighbor void function takes in two 1D and one 2D numpy arrays
+# to graph. One of the 1D arrays is a random testCase with its own distinct
+# points. The other 1D array is used to circle the k number of points closest 
+# to the test case. The 2D array contains information parsed from the CSV 
+# column. The first column (hemoglobin) is graphed as the x-axis and the second
+# column (glucose) as the y-axis. The third column  (classification) determines
+# the color of the points. A legend is generated in a reasonable position.
+def graphKNearestNeighbor(testCase, normArr, k):
+    kindex = findDistanceArray(normArr, testCase, k)
+    NNC.graphCSVFile(normArr)
+    plt.scatter(testCase[1], testCase[0],
+                c = ('b' if mode(normArr.disease[kindex])==0 else 'r'),
+                label = 'Test Case',
+                marker = "x")
+    plt.scatter(normArr.hemo[kindex], normArr.gluc[kindex],
+                c='y', label = 'Nearest neighbor(s)')
+    print("butts")
+    plt.legend(fontsize="small")
+    plt.show()
+    return
+
+# =============================================================================
+# Main Script
+# =============================================================================
+# mainDriver function takes in nothing and graphs both the orginial CSV file,
+# the k number of nearest neighbors, and the test case. This function returns 
+# 0.5
+def mainDriver():
+    val = int(input("How many neighbors are you looking for: "))
+    test = NNC.createTestCase()
+    normal = NNC.normalizeData(NNC.openCSVFile('ckd.csv'))
+    graphKNearestNeighbor(test, normal, val)
+    return 0
 mainDriver()
\ No newline at end of file
diff --git a/Summer-2020-Data-Analysis-Project-master/NearestNeighborClassifier.py b/Summer-2020-Data-Analysis-Project-Brandon-Branch/NearestNeighborClassifier.py
similarity index 97%
rename from Summer-2020-Data-Analysis-Project-master/NearestNeighborClassifier.py
rename to Summer-2020-Data-Analysis-Project-Brandon-Branch/NearestNeighborClassifier.py
index cb298ce..6d0e806 100644
--- a/Summer-2020-Data-Analysis-Project-master/NearestNeighborClassifier.py
+++ b/Summer-2020-Data-Analysis-Project-Brandon-Branch/NearestNeighborClassifier.py
@@ -1,112 +1,112 @@
-# =============================================================================
-# NearestNeighborClassifier.py
-# Name: Alycia Wong and Brandon Wong
-# Date: June 2020
-# Description: Process and graph a CSV file containing biomedical data that 
-# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
-# Create n number of random test cases and determine whether the case is
-# likely to have CKD depending on the classification of the nearest point.
-# =============================================================================
-
-# =============================================================================
-# Import statements
-# =============================================================================
-import matplotlib.pyplot as plt
-import numpy as np
-
-# =============================================================================
-# Classes
-# =============================================================================
-class Butts:
-    def __init__(self, data):
-        self.gluc = data[:,0]
-        self.hemo = data[:,1]
-        self.disease = data[:,2]
-        self.len = len(data)
-        self.all = data[:,:3]
-        self.paras = data[:,:2]
-        self.shape = np.shape(data)
-        self.colmax = np.amax(data, axis = 0)
-        self.colmin = np.amin(data, axis = 0)
-
-# =============================================================================
-# Functions
-# =============================================================================
-# Parses in file and turns it into Butts class of data
-def openCSVFile(fileName):
-    return Butts(np.genfromtxt(fileName, delimiter=',',skip_header=1))
-
-# Takes in butts class
-# Loops over data normalizing it for every row
-# returns normalized butts class data
-def normalizeData(dataArr):
-    normArr = np.zeros(dataArr.shape)
-    for i in range(len(normArr)):
-        normArr[i] = (dataArr.all[i] - dataArr.colmin) / (dataArr.colmax - dataArr.colmin)
-    return Butts(normArr)
-
-# graphCSVFile void function takes in a 2D numpy array and graphs with the
-# first column (hemoglobin) as the x-axis and second column (glucose) as the 
-# y-axis. The third column (classification) is used to determine the color of
-# the points on the graph.
-def graphCSVFile(normArr):
-    plt.scatter(normArr.hemo[normArr.disease==0], normArr.gluc[normArr.disease==0],
-                c='b', label='No CKD' )
-    plt.scatter(normArr.hemo[normArr.disease==1], normArr.gluc[normArr.disease==1],
-                c='r', label='CKD')
-    plt.title('Hemoglobin and Glucose levels')
-    plt.xlabel('Hemoglobin')
-    plt.ylabel('Glucose')
-    return
-# findDistance function is either:
-# a) takes in an array and a point and returns an array of distances or the
-# minimum distance or
-# B) takes in cartesian coordinates and uses a simple use of the distance
-# formula to return the distance between the two points.
-def findDistance(x1, y1, x2, y2):
-    return np.sqrt((x1-x2)**2+(y1-y2)**2)
-
-# createTestCase function creates two random test cases (hemoglobin and 
-# glucose) from 0-1 and: 
-# creates a new 1D array with the two points
-# return the points raw
-def createTestCase():
-    return np.random.rand(2)
-
-# nearestNeighborIndex takes in the test case point and returns the index of the
-# nearest point to the test case
-def nearestNeighborIndex(testCase, normArr):
-    distArr = np.zeros(normArr.len)
-    for i in range(len(distArr)):
-        distArr[i] = findDistance(normArr.hemo[i], normArr.gluc[i], testCase[1], testCase[0])
-    nni = distArr.argmin()
-    return nni
-
-# graphNearestNeighbor void function takes in a 2D numpy array (and a cartesian 
-# coordinate depending on createTestCase) and graphs the first column 
-# (hemoglobin) as the x-axis and the second column (glucose) as the y-axis
-# the third column (classification) determines the color of the points. A 
-# randomly generated test case is graphed as a distinct point with a 
-# line connecting it to the nearest neighbor whose classification it takes on.
-# A legend is generated in a reasonable position.
-def graphNearestNeighbor(testCase, normArr):
-    nni = nearestNeighborIndex(testCase, normArr)
-    graphCSVFile(normArr)
-    plt.scatter(testCase[1], testCase[0],
-                c = ('b' if normArr.disease[nni]==0 else 'r'),
-                label = 'Test Case',
-                marker = "x")
-    plt.plot([testCase[1], normArr.hemo[nni]], [testCase[0], normArr.gluc[nni]], 'k-')
-    plt.legend()
-    plt.show()
-    return
-
-# =============================================================================
-# Main Script
-# =============================================================================
-# mainDriver function takes in no inputs and graphs both the orginial CSV
-# file and the test case. This function returns 0.
-def mainDriver():
-    graphNearestNeighbor(createTestCase(), normalizeData(openCSVFile('ckd.csv')))
-    return 0
+# =============================================================================
+# NearestNeighborClassifier.py
+# Name: Alycia Wong and Brandon Wong
+# Date: June 2020
+# Description: Process and graph a CSV file containing biomedical data that 
+# relates hemoglobin levels, glucose levels, and chronic kidney disease (CKD).
+# Create n number of random test cases and determine whether the case is
+# likely to have CKD depending on the classification of the nearest point.
+# =============================================================================
+
+# =============================================================================
+# Import statements
+# =============================================================================
+import matplotlib.pyplot as plt
+import numpy as np
+
+# =============================================================================
+# Classes
+# =============================================================================
+class Butts:
+    def __init__(self, data):
+        self.gluc = data[:,0]
+        self.hemo = data[:,1]
+        self.disease = data[:,2]
+        self.len = len(data)
+        self.all = data[:,:3]
+        self.paras = data[:,:2]
+        self.shape = np.shape(data)
+        self.colmax = np.amax(data, axis = 0)
+        self.colmin = np.amin(data, axis = 0)
+
+# =============================================================================
+# Functions
+# =============================================================================
+# Parses in file and turns it into Butts class of data
+def openCSVFile(fileName):
+    return Butts(np.genfromtxt(fileName, delimiter=',',skip_header=1))
+
+# Takes in butts class
+# Loops over data normalizing it for every row
+# returns normalized butts class data
+def normalizeData(dataArr):
+    normArr = np.zeros(dataArr.shape)
+    for i in range(len(normArr)):
+        normArr[i] = (dataArr.all[i] - dataArr.colmin) / (dataArr.colmax - dataArr.colmin)
+    return Butts(normArr)
+
+# graphCSVFile void function takes in a 2D numpy array and graphs with the
+# first column (hemoglobin) as the x-axis and second column (glucose) as the 
+# y-axis. The third column (classification) is used to determine the color of
+# the points on the graph.
+def graphCSVFile(normArr):
+    plt.scatter(normArr.hemo[normArr.disease==0], normArr.gluc[normArr.disease==0],
+                c='b', label='No CKD' )
+    plt.scatter(normArr.hemo[normArr.disease==1], normArr.gluc[normArr.disease==1],
+                c='r', label='CKD')
+    plt.title('Hemoglobin and Glucose levels')
+    plt.xlabel('Hemoglobin')
+    plt.ylabel('Glucose')
+    return
+# findDistance function is either:
+# a) takes in an array and a point and returns an array of distances or the
+# minimum distance or
+# B) takes in cartesian coordinates and uses a simple use of the distance
+# formula to return the distance between the two points.
+def findDistance(x1, y1, x2, y2):
+    return np.sqrt((x1-x2)**2+(y1-y2)**2)
+
+# createTestCase function creates two random test cases (hemoglobin and 
+# glucose) from 0-1 and: 
+# creates a new 1D array with the two points
+# return the points raw
+def createTestCase():
+    return np.random.rand(2)
+
+# nearestNeighborIndex takes in the test case point and returns the index of the
+# nearest point to the test case
+def nearestNeighborIndex(testCase, normArr):
+    distArr = np.zeros(normArr.len)
+    for i in range(len(distArr)):
+        distArr[i] = findDistance(normArr.hemo[i], normArr.gluc[i], testCase[1], testCase[0])
+    nni = distArr.argmin()
+    return nni
+
+# graphNearestNeighbor void function takes in a 2D numpy array (and a cartesian 
+# coordinate depending on createTestCase) and graphs the first column 
+# (hemoglobin) as the x-axis and the second column (glucose) as the y-axis
+# the third column (classification) determines the color of the points. A 
+# randomly generated test case is graphed as a distinct point with a 
+# line connecting it to the nearest neighbor whose classification it takes on.
+# A legend is generated in a reasonable position.
+def graphNearestNeighbor(testCase, normArr):
+    nni = nearestNeighborIndex(testCase, normArr)
+    graphCSVFile(normArr)
+    plt.scatter(testCase[1], testCase[0],
+                c = ('b' if normArr.disease[nni]==0 else 'r'),
+                label = 'Test Case',
+                marker = "x")
+    plt.plot([testCase[1], normArr.hemo[nni]], [testCase[0], normArr.gluc[nni]], 'k-')
+    plt.legend()
+    plt.show()
+    return
+
+# =============================================================================
+# Main Script
+# =============================================================================
+# mainDriver function takes in no inputs and graphs both the orginial CSV
+# file and the test case. This function returns 0.
+def mainDriver():
+    graphNearestNeighbor(createTestCase(), normalizeData(openCSVFile('ckd.csv')))
+    return 0
 # mainDriver()
\ No newline at end of file
diff --git a/Summer-2020-Data-Analysis-Project-master/README.md b/Summer-2020-Data-Analysis-Project-Brandon-Branch/README.md
similarity index 100%
rename from Summer-2020-Data-Analysis-Project-master/README.md
rename to Summer-2020-Data-Analysis-Project-Brandon-Branch/README.md
diff --git a/Summer-2020-Data-Analysis-Project-master/__pycache__/KNearestNeighborClassifier.cpython-37.pyc b/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/KNearestNeighborClassifier.cpython-37.pyc
similarity index 100%
rename from Summer-2020-Data-Analysis-Project-master/__pycache__/KNearestNeighborClassifier.cpython-37.pyc
rename to Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/KNearestNeighborClassifier.cpython-37.pyc
diff --git a/Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-37.pyc b/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-37.pyc
similarity index 100%
rename from Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-37.pyc
rename to Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-37.pyc
diff --git a/Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-38.pyc b/Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-38.pyc
similarity index 100%
rename from Summer-2020-Data-Analysis-Project-master/__pycache__/NearestNeighborClassifier.cpython-38.pyc
rename to Summer-2020-Data-Analysis-Project-Brandon-Branch/__pycache__/NearestNeighborClassifier.cpython-38.pyc
diff --git a/Summer-2020-Data-Analysis-Project-master/ckd.csv b/Summer-2020-Data-Analysis-Project-Brandon-Branch/ckd.csv
similarity index 91%
rename from Summer-2020-Data-Analysis-Project-master/ckd.csv
rename to Summer-2020-Data-Analysis-Project-Brandon-Branch/ckd.csv
index 8c30b6b..d071373 100644
--- a/Summer-2020-Data-Analysis-Project-master/ckd.csv
+++ b/Summer-2020-Data-Analysis-Project-Brandon-Branch/ckd.csv
@@ -1,159 +1,159 @@
-Glucose,Hemoglobin,Class
-117,11.2,1
-70,9.5,1
-380,10.8,1
-157,5.6,1
-173,7.7,1
-95,9.8,1
-264,12.5,1
-70,10,1
-253,10.5,1
-163,9.8,1
-129,9.1,1
-133,10.3,1
-76,7.1,1
-280,13,1
-210,16.1,1
-219,10.4,1
-295,9.2,1
-118,11.4,1
-224,8.1,1
-128,8.2,1
-118,12,1
-105,11.1,1
-288,7.9,1
-273,8.3,1
-122,12.6,1
-303,10.4,1
-102,8.7,1
-107,8.3,1
-117,10,1
-239,9.5,1
-94,9.9,1
-129,8.1,1
-252,11.2,1
-255,7.3,1
-253,10.9,1
-214,10.9,1
-490,11.5,1
-163,7.9,1
-241,9.6,1
-214,9.4,1
-106,8.6,1
-424,12.6,1
-176,3.1,1
-140,15,0
-70,17,0
-82,15.9,0
-119,15.4,0
-99,13,0
-121,13.6,0
-131,14.5,0
-91,14,0
-98,13.9,0
-104,16.1,0
-131,14.1,0
-122,17,0
-118,15.5,0
-117,16.2,0
-132,14.4,0
-97,14.2,0
-133,13.2,0
-122,13.9,0
-121,15,0
-111,14.3,0
-96,13.8,0
-139,14.8,0
-125,16.5,0
-123,15.7,0
-112,14.5,0
-140,16.3,0
-130,15.5,0
-123,14.6,0
-100,16.9,0
-94,16,0
-81,14.7,0
-93,16.6,0
-124,14.9,0
-89,16.7,0
-125,16.8,0
-91,13.5,0
-127,15.1,0
-96,16.9,0
-128,13.1,0
-122,17.1,0
-128,15.2,0
-137,13.6,0
-81,13.9,0
-102,13.2,0
-132,13.7,0
-104,17.3,0
-131,15.6,0
-102,15,0
-120,17.4,0
-105,15.7,0
-109,13.9,0
-130,15.9,0
-100,14,0
-109,15.8,0
-120,13.4,0
-80,14.1,0
-130,13.5,0
-99,17.7,0
-134,14.2,0
-92,14,0
-132,17.8,0
-88,13.3,0
-100,14.3,0
-130,13.4,0
-95,15,0
-111,16.2,0
-106,14.4,0
-97,13.5,0
-108,17.8,0
-99,13.6,0
-83,17.5,0
-109,15,0
-86,13.6,0
-102,14.6,0
-95,15,0
-87,17.1,0
-107,13.6,0
-117,13,0
-88,17.2,0
-105,14.7,0
-70,13.7,0
-89,15,0
-118,14.8,0
-81,15,0
-125,17.4,0
-82,14.9,0
-107,13.6,0
-83,16.2,0
-79,17.6,0
-109,15,0
-133,13.7,0
-111,16.3,0
-74,15.1,0
-88,16.4,0
-97,13.8,0
-78,16.1,0
-113,15.3,0
-75,16.8,0
-119,13.9,0
-132,15.4,0
-113,16.5,0
-100,16.4,0
-93,16.7,0
-94,15.5,0
-112,17,0
-99,15,0
-85,15.6,0
-133,14.8,0
-117,13,0
-137,14.1,0
-140,15.7,0
-75,16.5,0
-100,15.8,0
-114,14.2,0
-131,15.8,0
+Glucose,Hemoglobin,Class
+117,11.2,1
+70,9.5,1
+380,10.8,1
+157,5.6,1
+173,7.7,1
+95,9.8,1
+264,12.5,1
+70,10,1
+253,10.5,1
+163,9.8,1
+129,9.1,1
+133,10.3,1
+76,7.1,1
+280,13,1
+210,16.1,1
+219,10.4,1
+295,9.2,1
+118,11.4,1
+224,8.1,1
+128,8.2,1
+118,12,1
+105,11.1,1
+288,7.9,1
+273,8.3,1
+122,12.6,1
+303,10.4,1
+102,8.7,1
+107,8.3,1
+117,10,1
+239,9.5,1
+94,9.9,1
+129,8.1,1
+252,11.2,1
+255,7.3,1
+253,10.9,1
+214,10.9,1
+490,11.5,1
+163,7.9,1
+241,9.6,1
+214,9.4,1
+106,8.6,1
+424,12.6,1
+176,3.1,1
+140,15,0
+70,17,0
+82,15.9,0
+119,15.4,0
+99,13,0
+121,13.6,0
+131,14.5,0
+91,14,0
+98,13.9,0
+104,16.1,0
+131,14.1,0
+122,17,0
+118,15.5,0
+117,16.2,0
+132,14.4,0
+97,14.2,0
+133,13.2,0
+122,13.9,0
+121,15,0
+111,14.3,0
+96,13.8,0
+139,14.8,0
+125,16.5,0
+123,15.7,0
+112,14.5,0
+140,16.3,0
+130,15.5,0
+123,14.6,0
+100,16.9,0
+94,16,0
+81,14.7,0
+93,16.6,0
+124,14.9,0
+89,16.7,0
+125,16.8,0
+91,13.5,0
+127,15.1,0
+96,16.9,0
+128,13.1,0
+122,17.1,0
+128,15.2,0
+137,13.6,0
+81,13.9,0
+102,13.2,0
+132,13.7,0
+104,17.3,0
+131,15.6,0
+102,15,0
+120,17.4,0
+105,15.7,0
+109,13.9,0
+130,15.9,0
+100,14,0
+109,15.8,0
+120,13.4,0
+80,14.1,0
+130,13.5,0
+99,17.7,0
+134,14.2,0
+92,14,0
+132,17.8,0
+88,13.3,0
+100,14.3,0
+130,13.4,0
+95,15,0
+111,16.2,0
+106,14.4,0
+97,13.5,0
+108,17.8,0
+99,13.6,0
+83,17.5,0
+109,15,0
+86,13.6,0
+102,14.6,0
+95,15,0
+87,17.1,0
+107,13.6,0
+117,13,0
+88,17.2,0
+105,14.7,0
+70,13.7,0
+89,15,0
+118,14.8,0
+81,15,0
+125,17.4,0
+82,14.9,0
+107,13.6,0
+83,16.2,0
+79,17.6,0
+109,15,0
+133,13.7,0
+111,16.3,0
+74,15.1,0
+88,16.4,0
+97,13.8,0
+78,16.1,0
+113,15.3,0
+75,16.8,0
+119,13.9,0
+132,15.4,0
+113,16.5,0
+100,16.4,0
+93,16.7,0
+94,15.5,0
+112,17,0
+99,15,0
+85,15.6,0
+133,14.8,0
+117,13,0
+137,14.1,0
+140,15.7,0
+75,16.5,0
+100,15.8,0
+114,14.2,0
+131,15.8,0