lab从这里开始变得难了起来了,这次要模拟计算机里的一个硬件-cache的工作,关于cache,百度百科介绍的比我好,这边请———>

然后咱们就先拿到实验用的文件以及他的writeup,看完一会之后(long long after)就知道了此次实验的目的。然后文件也知道的一清二楚了。

csim.c:用来做part A的主要文件。

trans.c:用来做part B的主要文件。

Part A

writeup里面已经提到了,cache.h头文件里面有所需的函数并且要在csim.c里面完成cache的模拟过程,那么首先我们看看cache.h头文件提供了哪些东西

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
/* 
* cachelab.h - Prototypes for Cache Lab helper functions
*/

#ifndef CACHELAB_TOOLS_H
#define CACHELAB_TOOLS_H

#define MAX_TRANS_FUNCS 100

typedef struct trans_func{
void (*func_ptr)(int M,int N,int[N][M],int[M][N]);
char* description;
char correct;
unsigned int num_hits;
unsigned int num_misses;
unsigned int num_evictions;
} trans_func_t;

/*
* printSummary - This function provides a standard way for your cache
* simulator * to display its final hit and miss statistics
*/
void printSummary(int hits, /* number of hits */
int misses, /* number of misses */
int evictions); /* number of evictions */

/* Fill the matrix with data */
void initMatrix(int M, int N, int A[N][M], int B[M][N]);

/* The baseline trans function that produces correct results. */
void correctTrans(int M, int N, int A[N][M], int B[M][N]);

/* Add the given function to the function list */
void registerTransFunction(
void (*trans)(int M,int N,int[N][M],int[M][N]), char* desc);

#endif /* CACHELAB_TOOLS_H */

用我那-4级的英语水平来翻译大概就是定义了一个结构体,结构体里有测试cache的一些指标,诸如命中次数这些的东西,所以这个结构体就当成cache吧。

那么就很好写(chao)csim.c的代码了,如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#include "cachelab.h"
#include<stdlib.h>
#include<unistd.h>
#include<stdio.h>
#include<limits.h>
#include<getopt.h>
#include<string.h>
int help_mode, verbose_mode, s, E, b, S,number_hits, number_miss, number_eviction;
//S is the number of sets, E is the associativity, b is number of block bits
char filename[1000];//The file name
char buffer[1000];//The buffer of input
typedef struct
{
int valid_bit, tag, stamp;//cold miss
}cache_line;
cache_line **cache = NULL;
void update(unsigned int address)
{
int max_stamp = INT_MIN, max_stamp_id = -1;
int t_address, s_address;// The t value and s value of address
s_address = (address >> b) & ((-1U) >> (32 - s));//use bit manipulation to get s_address, -1U equals to INT_MAX
t_address = address >> (s + b);
for(int i = 0; i < E; i++)//check whether there is a hit
if(cache[s_address][i].tag == t_address)//which means a hit
{
//is_placed = 1;
cache[s_address][i].stamp = 0;//restart the time stamp control unit
number_hits++;
//printf("hit\n");
return ;//just return now
}
//to check whether is an empty line
for(int i = 0; i < E; i++)
if(cache[s_address][i].valid_bit == 0)
{
cache[s_address][i].valid_bit = 1;
cache[s_address][i].tag = t_address;
cache[s_address][i].stamp = 0;
number_miss++;//compulsory miss
//printf("miss\n");
return;
}
//If there is not any empty line, then an eviction will occur
number_eviction++;
number_miss++;
for(int i = 0; i < E; i++)
if(cache[s_address][i].stamp > max_stamp)
{
max_stamp = cache[s_address][i].stamp;
max_stamp_id = i;
}
cache[s_address][max_stamp_id].tag = t_address;
cache[s_address][max_stamp_id].stamp = 0;
return;
}

void update_time(void)//update the time stamp of each cache line
{
for(int i = 0; i < S; i++)
for(int j = 0; j < E; j++)
if(cache[i][j].valid_bit == 1)//if valid
cache[i][j].stamp++;
}

int main(int argc,char* argv[])
{
int opt, temp;//The getopt return value
char type;//type of a single trace record
unsigned int address;//address of memory
number_hits = number_miss = number_eviction = 0;//initialization
while(-1 != (opt = (getopt(argc, argv, "hvs:E:b:t:"))))
{
switch(opt)
{
case 'h':help_mode = 1;
break;
case 'v':verbose_mode = 1;
break;
case 's':s = atoi(optarg);
break;
case 'E':E = atoi(optarg);
break;
case 'b':b = atoi(optarg);
break;
case 't':strcpy(filename, optarg);
break;
}
}
if(help_mode == 1)
{
system("cat help_info");//"help_info" is a text file containing help information
exit(0);
}
FILE* fp = fopen(filename,"r");
if(fp == NULL)
{
fprintf(stderr,"The File is wrong!\n");
exit(-1);
}
S = (1 << s); // S equals to 2^s
cache = (cache_line**)malloc(sizeof(cache_line*) * S);
for(int i = 0; i < S; i++)
cache[i] = (cache_line*)malloc(sizeof(cache_line) * E);//Important! malloc each row of cache
for(int i = 0; i < S; i++)
for(int j = 0; j < E; j++)
{
cache[i][j].valid_bit = 0;
cache[i][j].tag = cache[i][j].stamp = -1;
}//initialization
while(fgets(buffer,1000,fp))//get a whole line
{
sscanf(buffer," %c %xu,%d", &type, &address, &temp);//hexdecimal
switch(type)
{
case 'L':update(address);
break;
case 'M':update(address);//just let it fall through, do twice
case 'S':update(address);
break;
}
update_time();
}
for(int i = 0; i < S; i++)
free(cache[i]);//free allocated space first
free(cache);
fclose(fp);
printSummary(number_hits, number_miss, number_eviction);
return 0;
}

测试:

Part B

优化矩阵转置函数,使得cache miss尽可能少,不超过12个临时变量

32 x 32

直接转置是肯定不行的,这都不用去试(实则败而归来)。众所周知,分块是一个很好用的算法

32位字节的数据,一个int4字节,每行/列有8个int,我们将其分块为8x8,进行处理

充分利用这些变量即可写出

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
if(M == 32)
{
int i, j, k, v1, v2, v3, v4, v5, v6, v7, v8;
for (i = 0; i < 32; i += 8)
for(j = 0; j < 32; j += 8)
for(k = i; k < (i + 8); ++k)
{
v1 = A[k][j];
v2 = A[k][j+1];
v3 = A[k][j+2];
v4 = A[k][j+3];
v5 = A[k][j+4];
v6 = A[k][j+5];
v7 = A[k][j+6];
v8 = A[k][j+7];
B[j][k] = v1;
B[j+1][k] = v2;
B[j+2][k] = v3;
B[j+3][k] = v4;
B[j+4][k] = v5;
B[j+5][k] = v6;
B[j+6][k] = v7;
B[j+7][k] = v8;
}
}

这个暂时先立个flag在这里,等之后会做了再做吧,后面的真的做的不太行了。。。