-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcachesim.cpp
More file actions
423 lines (399 loc) · 13.2 KB
/
cachesim.cpp
File metadata and controls
423 lines (399 loc) · 13.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
#include "cachesim.hpp"
#include <map>
#include <vector>
#include <assert.h>
#include <algorithm>
#include <iostream>
#include <stdio.h>
#include <string.h>
//block structure that will be used for the whole cache
struct Block_ {
int valid;
uint64_t tag;
int dirty;
uint64_t lru;
};
//each block in the prefetch buffer has a tag, valid bit, and timestamp for FIFO implementation
struct p_block {
int valid;
uint64_t tag;
uint64_t fifo;
};
//each row of markov will have a timestamp for LRU and a table of addresses & uses
struct markov_row {
uint64_t pre_miss;
uint64_t table[2][4]; //[0][i] is the ID of the i'th block, [1][i] is the count
uint64_t timestamp;
};
uint64_t accessNum = 0;
//array for cache
struct Block_ **cache;
struct markov_row *markov;
struct p_block *p_buffer;
int numWays;
int numSets;
uint64_t C;
uint64_t B;
uint64_t S;
int markovDepth = 0;
uint64_t numHits = 0;
uint64_t numMiss = 0;
uint64_t numWrites = 0;
uint64_t numWriteMiss = 0;
uint64_t numWriteHits = 0;
uint64_t numWriteBack = 0;
uint64_t numReads = 0;
uint64_t numReadMiss = 0;
uint64_t numReadHits = 0;
uint64_t prefetchHits = 0;
uint64_t numPrefetch = 0;
uint64_t pbufferMiss = 0;
uint64_t prefetchIssued = 0;
uint64_t numMissL1 = 0;
int pbufferlength = 32;
double hitTime = 0.0;
double missTime = 0.0;
double currTime = 0.0;
uint64_t preType = 0;
uint64_t prevMiss = 0;
int firstMiss = 1;
/**
* Subroutine for initializing the cache. You many add and initialize any global or heap
* variables as needed.
* XXX: You're responsible for completing this routine
*
* @c1 The total number of bytes for data storage in L1 is 2^c
* @b1 The size of L1's blocks in bytes: 2^b-byte blocks.
* @s1 The number of blocks in each set of L1: 2^s blocks per set.
*/
void setup_cache(uint64_t c1, uint64_t b1, uint64_t s1, uint64_t p1, uint64_t prefetcher_type) {
C = c1; B=b1; S=s1; markovDepth=(int) p1; preType = prefetcher_type;
//number of ways is equal to 2^S
numWays = 1 << S;
//number of sets is equal to 2^(C-B-S)
numSets = 1 << (C-B-S);
//create 2D array of blocks for cache
cache = (Block_**)malloc(numSets * sizeof(Block_*));
for(int i = 0; i < numSets; i++){
cache[i] = (Block_*)malloc(numWays * sizeof(Block_));
for(int j = 0; j < numWays; j++){
cache[i][j] = {.valid = 0, .tag = 0, .dirty = 0, .lru=0};
}
}
//create markov and prefetch buffer objects
markov = (markov_row*)malloc(markovDepth * sizeof(markov_row));
memset(markov, 0, markovDepth*sizeof(markov_row));
p_buffer = (p_block*)malloc(pbufferlength * sizeof(p_block));
for(int i = 0; i < pbufferlength; i++){
p_buffer[i] = {.valid = 0, .tag = 0, .fifo = 0};
}
//set the hit time and initialize the time as 0
hitTime = 2 + 0.2*(double)S;
missTime = 20.0;
currTime = 0.0;
}
/**
* Subroutine that simulates the cache one trace event at a time.
* XXX: You're responsible for completing this routine
*
* @type The type of event, can be READ or WRITE.
* @arg The target memory address
* @p_stats Pointer to the statistics structure
*/
void cache_access(char type, uint64_t arg, cache_stats_t* p_stats) {
//printf("Time: %d\n",accessNum);
//increment access number
accessNum += 1;
//increment number of reads or writes
if(type == READ){
numReads += 1;
}
else numWrites += 1;
int cacheHit = 0;
int preHit = 0;
//set mask for index, which will be all 1's at the index bit and 0 everywhere else
uint64_t indexmask = ((1 << (C-B-S)) - 1) << B;
int index = (int)((arg & indexmask) >> B);
//set mask for tag, which will be all 1's at the bits before the index bits and 0 everywhere else. This could also be shifted all the way right, but is unnecessary since we are using uint64_t
uint64_t tagmask = ~((1 << (B + (C-B-S))) - 1);
uint64_t tag = arg & tagmask;
uint64_t blockid = (arg & tagmask) | (arg & indexmask); //this will be used in the FA prefetch buffer
uint64_t nPlusOne = blockid + (1 << B);
int prefetchIndex = -1;
//printf("Curr Addr without offset: %llx\n",blockid);
//loop through all the ways in the indexed set to see if the tag matches.
for(int j = 0; j < numWays; j++){
if((cache[index][j].tag == tag) && (cache[index][j].valid==1)) {
//if the tag matches, print hit and increment the number of hits based on the type
printf("H\n");
cacheHit = 1;
numHits += 1;
//set lru to the access number to record when it was last used
cache[index][j].lru = accessNum;
if(type == WRITE){
//if the hit is a write, the dirty bit need to be set
cache[index][j].dirty = 1;
numWriteHits += 1;
}
else numReadHits += 1;
}
//if it misses in the cache, check prefetch buffer (unless there is no prefetching)
else if (preType > 0) {
for(int i=0; i<pbufferlength; i++){
if((p_buffer[i].tag == blockid) && (p_buffer[i].valid == 1)){
prefetchIndex = i;
prefetchHits += 1;
preHit = 1; //flag set
p_buffer[i].valid = 0; //if it is found in the prefetch buffer, move to cache
printf("PH\n");
break; //no reason to continue loop
}
}
}
}
uint64_t p_tag;
if(cacheHit == 0){
if (preHit == 0){ //if it isn't found in either cache or prefetch
pbufferMiss += 1;
int preReplace = -1;
//this loop finds the prefetch block that will be replaced if prefetching is required
uint64_t fifotest = std::numeric_limits<uint64_t>::max();
for(int i=0; i<pbufferlength; i++){
if (p_buffer[i].valid == 0){
preReplace = i;
break;
}
else if (p_buffer[i].fifo < fifotest){
preReplace = i;
fifotest = p_buffer[i].fifo;
}
}
int replacing = 0;
//work here
int tempIndex = -1;
//this searches the markov table if markov or hybrid is enabled
if((preType == 1)||(preType == 3)){
int mfuIndex = 0;
uint64_t mfuValue = 0;
uint64_t mfuAddr = 0;
replacing = 0;
//first search the rows to see if any match the current block
for(int i=0; i<markovDepth; i++){
if(markov[i].pre_miss == blockid){
tempIndex = i;
//if the row is found, find the MFU to prefetch
for(int j=0; j<4; j++){
if(markov[i].table[1][j] > mfuValue){
mfuIndex = j;
mfuAddr = markov[i].table[0][j];
mfuValue = markov[i].table[1][j];
replacing = 1;
}
//if the MFU's are equal, prefetch the higher address
else if(markov[i].table[1][j] == mfuValue){
if((mfuAddr < markov[i].table[0][j]) && (mfuValue > 0)){
mfuAddr = markov[i].table[0][j];
mfuIndex = j;
mfuValue = markov[i].table[1][j];
replacing = 1;
}
}
}
}
}
p_tag = mfuAddr;
if(p_tag == blockid) replacing = 0;
//printf("%d,%d,%d\n",tempIndex,mfuIndex,replacing);
}
//if n+1, or hybrid and not found in markov table, prefetch n+1
if((preType == 2)||((preType == 3)&& (replacing == 0))){
p_tag = nPlusOne;
replacing = 1;
}
//if it is already in the prefetch buffer, don't prefetch
for (int i = 0; i<pbufferlength; i++){
if ((p_buffer[i].valid == 1) && (p_buffer[i].tag == p_tag)){
replacing = 0;
//printf("in prefetch buffer\n");
}
}
//mask the prefetch index to search cache
int p_index = (int)((p_tag & indexmask) >> B);
uint64_t p_cachetag = p_tag & tagmask;
//if it is found in the cache, don't prefetch
for(int j=0; j < numWays; j++){
if((cache[p_index][j].tag == p_cachetag) && (cache[p_index][j].valid == 1)){
replacing = 0;
//printf("in cache\n");
}
}
//always print miss if it is not found in prefetch buffer or
printf("M\n");
//if we are prefetching, prefetch
if(replacing == 1){
if(preType == 1){
//markov[tempIndex].timestamp = accessNum;
//printf("%d\n",tempIndex);
//printf("Issue prefetch for addr: %llx\n",p_tag);
}
prefetchIssued += 1;
//set fifo bit and tag, make valid
p_buffer[preReplace] = {.valid = 1, .tag = p_tag, .fifo = accessNum};
}
numMiss += 1;
currTime = currTime + missTime;
//if markov or hybrid, and this is not the first miss, update markov table
if(((preType == 1)||(preType == 3))&&(firstMiss == 0)){
//printf("Prev Miss Addr without offset: %llx\n",prevMiss);
int row = -1;
int column = -1;
uint64_t lfuVal = std::numeric_limits<uint64_t>::max();
int lfuIndex = -1;
uint64_t lfuAddr = std::numeric_limits<uint64_t>::max();
//search for row
for (int i=0 ; i<markovDepth ; i++){
if (markov[i].pre_miss == prevMiss){
row = i;
//printf("Row hit\n");
//search for column
for (int j=0; j<4; j++){
if (markov[i].table[0][j] == blockid){ //row exists, column exists
column = j;
//markov[i].table[1][j] += 1;
//printf("Column hit\n");
}
}
//if column not found, find one to replace
if (column < 0){ //column doesnt exist
for(int j=0; j<4; j++){
if(markov[i].table[1][j] < lfuVal){
lfuVal = markov[i].table[1][j];
lfuIndex = j;
lfuAddr = markov[i].table[0][j];
}
else if(markov[i].table[1][j] == lfuVal){
if(markov[i].table[0][j] < lfuAddr){
lfuVal = markov[i].table[1][j];
lfuIndex = j;
lfuAddr = markov[i].table[0][j];
}
}
}
//replace column
column = lfuIndex;
markov[row].table[0][column] = blockid;
markov[row].table[1][column] = 0;
//printf("Column miss\n");
}
}
else;
}
int lruRow = -1;
uint64_t lruTime = std::numeric_limits<uint64_t>::max();
//if row not found, find one to replace
if (row < 0){ //row and column dont exist
for(int i=0; i<markovDepth; i++){
if(markov[i].timestamp < lruTime){
lruTime = markov[i].timestamp;
row = i;
}
}
//replace row
markov[row].pre_miss = prevMiss;
//kill old row
for(int i=0; i<4; i++){
markov[row].table[0][i] = 0;
markov[row].table[1][i] = 0;
}
column = 0;
//markov[row].table[0][column] = blockid;
//rintf("Row miss\nColumn miss\n");
}
markov[row].table[0][column] = blockid;
markov[row].timestamp = accessNum;
markov[row].table[1][column] += 1;
//printf("markov:%d,%d,%d,%d\n",row,column,lfuVal,lfuAddr);
}
prevMiss = blockid;
firstMiss = 0;
}
numMissL1 += 1;
//now update the cache
int lruIndex = 0;
//set current lru to the highest possible value of uint64_t
uint64_t currMin = std::numeric_limits<uint64_t>::max();
//loop through the LRU values. If any value is lower, replace
for(int j=0; j < numWays; j++){
uint64_t jlru = cache[index][j].lru;
if(jlru < currMin){ //could add in a check for valid bits, but unnecessry since all new blocks had lru time set to 0.
currMin = jlru;
lruIndex = j;
}
}
if(cache[index][lruIndex].dirty == 1){
//if the replaced block has a dirty bit of 1, write back
numWriteBack += 1;
}
cache[index][lruIndex] = {.valid = 1, .tag = tag, .dirty = 0, .lru=accessNum};
if(type==WRITE){
//if write, set dity bit
cache[index][lruIndex].dirty = 1;
numWriteMiss += 1;
}
else {
numReadMiss += 1;
}
}
//add hit time to all
currTime = currTime + hitTime;
}
/**
* Subroutine for cleaning up any outstanding memory operations and calculating overall statistics
* such as miss rate or average access time.
* XXX: You're responsible for completing this routine
*
* @p_stats Pointer to the statistics structure
*/
void complete_cache(cache_stats_t *p_stats) {
//free cache
for (int i=0; i<numSets; i++){
free(cache[i]);
}
free(cache);
free(markov);
free(p_buffer);
//set all of the values in the p_stats structure
p_stats -> accesses = numHits + numMiss + prefetchHits;
p_stats -> prefetch_issued = prefetchIssued;
p_stats -> prefetch_hits = prefetchHits;
p_stats -> prefetch_misses = pbufferMiss;
p_stats -> accesses_l2 = 0; //no L2
p_stats -> accesses_vc = 0; //no victim cache
p_stats -> reads = numReads;
p_stats -> read_hits_l1 = numReadHits;
p_stats -> read_misses_l1 = numReadMiss;
p_stats -> read_misses_l2 = 0; //no L2
p_stats -> writes = numWrites;
p_stats -> write_hits_l1 = numWriteHits;
p_stats -> write_misses_l1 = numWriteMiss;
p_stats -> write_misses_l2 = 0; //no L2
p_stats -> write_back_l1 = numWriteBack;
p_stats -> write_back_l2 = 0; //no L2
p_stats -> total_hits_l1 = numHits;
p_stats -> total_misses_l1 = numWriteMiss + numReadMiss;
p_stats -> l1_hit_ratio = ((double)numHits)/((double)(numHits+numMiss+prefetchHits));
p_stats -> l1_miss_ratio = ((double)numMissL1)/((double)(numHits+numMiss+prefetchHits));
p_stats -> overall_miss_ratio = (double)(numHits+numMiss+prefetchHits-numHits-prefetchHits)/(double)(numHits+numMiss+prefetchHits);
p_stats -> read_hit_ratio = ((double)numReadHits)/((double)(numReadHits+numReadMiss));
p_stats -> read_miss_ratio = ((double)numReadMiss)/((double)(numReadHits+numReadMiss));
p_stats -> write_hit_ratio = ((double)numWriteHits)/((double)(numWriteHits+numWriteMiss));
p_stats -> write_miss_ratio = ((double)numWriteMiss)/((double)(numWriteHits+numWriteMiss));
p_stats -> prefetch_hit_ratio = (double)prefetchHits / (double)prefetchIssued;
p_stats -> victim_hits = 0;
p_stats -> avg_access_time_l1 = currTime / (double) accessNum;
if(preType == 0){
p_stats -> prefetch_hit_ratio = 0.00;
p_stats -> prefetch_misses = 0;
p_stats -> prefetch_hits = 0;
}
}