-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcreate-graphs.py
More file actions
98 lines (78 loc) · 3.32 KB
/
create-graphs.py
File metadata and controls
98 lines (78 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
import matplotlib.pyplot as plt
import pandas as pd
import sys
def main():
try:
# If a results file is specified, use this.
results_file = sys.argv[1]
except:
# Otherwise, use a default file name.
results_file = 'results.csv'
print(f"Parsing '{results_file}'...")
df = parse_data(results_file)
print(df)
print('Parsing completed.')
# Now time to graph!
print('Creating graphs...')
# List the tests used.
tests = [col for col in df.columns if col not in ['test', 'cmdline', 'params']]
# To display only specific tests, you may comment this above line and replace it
# with something like the following:
# tests = ['test1', 'test2', ...']
# This list can use any of the test names as listed in the original CSV file.
# Create a box plot.
# Set some styling. This is how to arrange the results for each test.
rows = 1
columns = len(tests)
row_width_in = 7
col_width_in = 5
# Set up the figure layout.
fig, ax = plt.subplots(rows, columns, figsize=(columns * col_width_in, rows * row_width_in))
fig.set_tight_layout(True)
for i, test in enumerate(tests):
try:
axis = ax[i]
except TypeError:
# There will only be one axis if there is one test, so `ax` is not indexable in this case.
axis = ax
df.boxplot(column=test, by='params', rot=90, layout=(1, len(tests)), ax=axis)
axis.set_xlabel('Parameters')
match test:
case 'sysbench':
axis.set_ylabel('Events per Second')
case str(test) if 'crypt' in test:
axis.set_ylabel('Speed (MiB/s)')
case str(test) if 'zip' in test:
axis.set_ylabel('Average MIPS')
case str(test) if 'boot' in test:
axis.set_ylabel('Time (s)')
case _:
axis.set_ylabel('Score')
# Save the figure.
plt.suptitle('')
plt.savefig('results.pdf', bbox_inches='tight')
plt.savefig('results.svg', bbox_inches='tight')
print('Graphs created successfully. See results.pdf and results.svg.')
# Extract data from the results CSV file and produce a pandas DataFrame.
def parse_data(results_file):
# Read data from CSV file.
df = pd.read_csv(open(results_file, newline=''), index_col=0)
# Set the 'cmdline' column to string types.
df['cmdline'] = df['cmdline'].astype('string')
# Split the boot parameters of each run into a list.
df['cmdline_split'] = df['cmdline'].str.split()
# Find 'trivial' boot parameters.
# Count the frequency of each parameter.
c = df['cmdline_split'].explode().value_counts()
# A 'trivial' boot parameter is one that is in every run.
trivial = c[c >= len(df)].index.tolist()
# Place the non-trivial parameters of each run into a new column. (Used for graphing.)
df['params'] = pd.Series([list(set([param for param in cmdline if param not in trivial])) for cmdline in df['cmdline_split']], index=df.index).str.join(' ')
# If there are no non-trivial parameters, put default text here.
df['params'] = df['params'].replace(r'^\s*$', 'default', regex=True)
# Remove the temporarily-used split parameter data.
df = df.drop(columns=['cmdline_split'])
return df
if __name__ == '__main__':
main()