-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtask4.py
More file actions
94 lines (73 loc) · 3.32 KB
/
task4.py
File metadata and controls
94 lines (73 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import csv
import json
import matplotlib.pyplot as plt
import numpy as np
def task4():
# Define the rows and column titles of the required csv file to be outputted
fields = ['news_source','num_articles','avg_rating']
rows = []
# Open the json file containing details about the reviews and load the data
f = open('/course/data/a1/reviews/HealthStory.json')
data = json.load(f)
news_sources = []
source_articles = []
# Loop through each individual review in the json file
for individual_review in data:
# If a news source exists for that review, add it to the list
if individual_review['news_source'] != None:
# Ensure there is no duplication of news sources
if individual_review['news_source'] not in news_sources:
news_sources.append(individual_review['news_source'])
# Initialise other parameters to 0
for source in news_sources:
source_articles.append([source,0,0])
# Calculate the total articles by each news source and sum up the ratings
for individual_review in data:
if individual_review['news_source'] in news_sources:
for source_info in source_articles:
if source_info[0] == individual_review['news_source']:
source_info[1] += 1
source_info[2] += individual_review['rating']
# Find the average rating of each news source
for source_info in source_articles:
source_info[2] = source_info[2]/source_info[1]
# Copy collected data into rows of csv
rows = source_articles.copy()
# Sort by ascending order of news source
rows.sort(key=lambda x: x[0])
rows.pop(0)
# Create task4a.csv and write the rows and columns to it
with open('task4a.csv', 'w') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(fields)
csvwriter.writerows(rows)
average_article_ratings = {}
# Open the csv file just created and read its rows
with open('task4a.csv','r') as file:
reader = csv.reader(file)
for row in reader:
# Check for news sources that have published atleast 5 articles
if row[1].isnumeric() == True and int(row[1]) >= 5:
average_article_ratings[row[0]]=row[2]
# Sort by ascending order of news source
average_article_ratings = sorted(average_article_ratings.items(), key = lambda x: x[1])
source_names = []
average_ratings = []
# Create seperate lists containing news sources and their respective average ratings
for tuple in average_article_ratings:
source_names.append(tuple[0])
average_ratings.append(tuple[1])
# Assign the values in these lists to the x-axis and y-axis of the plot
x_axis = source_names
y_axis = [round(float(rating),2) for rating in average_ratings]
# Specify the appropriate parameters and plot the graph
plt.figure(figsize=(20, 10))
plt.barh(x_axis,y_axis)
plt.grid()
plt.xlabel('News Sources')
plt.ylabel('Average Ratings')
plt.xticks(np.arange(0,5,0.25))
plt.title('Average rating of articles published by each news source')
# Save the graph as 'task4b.png'
plt.savefig("task4b.png")
return