forked from dychen/cs156b
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinear_regression.py
More file actions
49 lines (41 loc) · 1.28 KB
/
linear_regression.py
File metadata and controls
49 lines (41 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from sklearn import linear_model
import time
# Learn #
f_learn = open("all_um.dta", 'r')
input_vector = []
output_vector = []
num_iter = 0
LIMIT = 10000000
start_time = time.time()
for line in f_learn:
uid, mid, date, rating = line.strip().split()
input_vector.append([int(uid), int(mid)])
output_vector.append(int(rating))
num_iter += 1
if num_iter >= LIMIT:
break
print "Read input time: %s s." % (time.time() - start_time)
f_learn.close()
start_time = time.time()
clf = linear_model.LinearRegression()
clf.fit(input_vector, output_vector)
print "Learn time: %s s." % (time.time() - start_time)
# Predict #
f_predict = open("qual_mu.dta", 'r')
f_predict_out = open("submission2.dta", 'w')
start_time = time.time()
target_input_vector = []
target_output_vector = []
for line in f_predict:
uid, mid, date = line.strip().split()
target_input_vector.append([int(uid), int(mid)])
target_output_vector = clf.predict(target_input_vector)
for predicted_rating in target_output_vector:
f_predict_out.write(str(predicted_rating) + '\n')
print "Predict time: %s s." % (time.time() - start_time)
f_predict.close()
f_predict_out.close()
# For LIMIT = 1000000 data points:
# Read input time: 5.19124603271 s.
# Learn time: 4.09771299362 s.
# Predict time: 52.6327300072 s.