下载文件之后：

脑瓜子嗡嗡的，writeup看不懂，只能先去看看已有的实验报告，然后先做个总结——

先把里面的sim.tar解压了，然后进入目录之后make clean，make会在里面的所有文件该生成的文件生成了。

part A

这个部分的文件全在misc文件夹。

意思大概是用y86指令集实现example.c文件里面的三个函数，那也太棒了呀，我最擅长手写汇编了，汇编它虽然码起来效率低，但是能直接对底层进行操作，想想都激动awa

要用到的指令:

1 2	./yis xxx.ys ./yas xxx.yo

第一条可以理解为汇编，第二条可以理解为链接且运行。

sum_list

/* sum_list - Sum the elements of a linked list */
long sum_list(list_ptr ls)
{
    long val = 0;
    while (ls) {
        val += ls->val;
        ls = ls->next;
    }
    return val;
}

就是一个很朴素的链表求和，用循环实现。书上有例子的，那就直接把代码写下来吧，注释上写的挺清晰了。

    .pos 0#初始化
    irmovq stack,%rsp#初始化一个栈帧
    call main#执行主函数
    halt#结束
    .align 8#对齐
ele1:#链表头
    .quad 0x00a#链表值1
    .quad ele2#连接下一个表
ele2:
    .quad 0x0b0#链表值2
    .quad ele3#连接下一个表
ele3:
    .quad 0xc00#链表值3
    .quad 0#NULL
#This is main function
main:
    irmovq ele1,%rdi#传参
    call sum_list#调用函数
    ret 
sum_list:
        irmovq $0,%r14
        irmovq $0,%rax
L2:
        subq %r14,%rdi
        je L4#到了NULL跳转L4返回
        mrmovq (%rdi),%r13#取值
        addq %r13,%rax#加给ax寄存器
        mrmovq 8(%rdi),%rdi#下一个地址给%rdi
        jmp     L2
L4:
        ret
#stack starts here and grows to lower addresses
    .pos 0x200
stack:

运行结果:

while实现方式有多种，下面几种都可以，

L2:
    //循环块
    jnz L2
    ret

L2:
    jz L4
    //循环块
    jmp L2
L4:
    ret

……接下来读者自己想象吧

rsum_list

这一次也是要一个链表求和，只是函数需要递归。

那么这次我们稍微改一下，把跳转到L2改成重新call一次就好了呗，这里不过多解释了。

    .pos 0#初始化
    irmovq stack,%rsp#初始化一个栈帧
    call main#执行主函数
    halt#结束
    .align 8#对齐
ele1:#链表头
    .quad 0x00a#链表值1
    .quad ele2#连接下一个表
ele2:
    .quad 0x0b0#链表值2
    .quad ele3#连接下一个表
ele3:
    .quad 0xc00#链表值3
    .quad 0#NULL
#This is main function
main:
    irmovq ele1,%rdi#传参
    irmovq $0,%rax
    call sum_list#调用函数
    ret 
sum_list:
        irmovq $0,%r13
        subq %r13,%rdi
        je L4#到了NULL跳转L4返回
        mrmovq (%rdi),%r13#取值
        addq %r13,%rax#加给ax寄存器
        mrmovq 8(%rdi),%rdi#下一个地址给%rdi
        call sum_list#递归调用
L4:
        ret
#stack starts here and grows to lower addresses
    .pos 0x400
stack:

但是我这个做法在编译应该是不存在的，真正递归的话每次调用都应该用rax保存返回值的，但是我没有，因为我们是直接写汇编指令的，所以不必那么麻烦(其实我也不知道符不符合要求，反正能过的程序)

贴一个运行结果吧:

(PS:就感觉这个lab我写的挺水的，分析的东西比较少，可能还是我菜吧qwq)

copy_block

/* copy_block - Copy src to dest and return xor checksum of src */
long copy_block(long *src, long *dest, long len)
{
    long result = 0;
    while (len > 0) {
        long val = *src++;
        *dest++ = val;
        result ^= val;
        len--;
    }
    return result;
}

这第三个函数跟链表关系不大了，给出源地址和目的地址，源地址保存了一些值，要将源地址开始的len长度的数据拷贝到目的地址，并且把拷贝的值异或起来并且返回。首先len为循环次数没得跑，那么就先可以构建出它循环的基本框架

L2:
    //……
    irmovq $1,%r9
    subq %r9,%rdx
    jne L2
    ret

然后略去的内容无非就是赋值，异或，然后就完了…最后注意一下在main函数把三个参数传好，64位的程序前六个参数依次给rdi,rsi,rdx,rcx,r8,r9寄存器，那么main函数就应该是

main:
    irmovq src,%rdi
    irmovq dest,%rsi
    irmovq xxx,%rdx//这里的xxx自己写，写了多长的数据给多少数值
    call copy
    ret

那么自己再随便取一下dest和src汇编运行后就可以看到结果。

完整代码:

    .pos 0#初始化
    irmovq stack,%rsp#初始化一个栈帧
    call main#执行主函数
    halt#结束
    .align 8#对齐
src:
        .quad 0x00a
        .quad 0x0b0
        .quad 0xc00
dest:
        .quad 0x111
        .quad 0x222
        .quad 0x333
#This is main function
main:
        irmovq src,%rdi
        irmovq dest,%rsi
    irmovq $3,%rdx#argument len
    call copy
    ret 
copy:
    irmovq $0,%rax
    irmovq $1,%r9
    irmovq $8,%r10
L1:
        mrmovq (%rdi),%r8
        xorq %r8,%rax
        rmmovq %r8,(%rsi)
        addq %r10,%rdi
        addq %r10,%rsi
        subq %r9,%rdx
        jne L1
        ret
#stack starts here and grows to lower addresses
    .pos 0x200
stack:

运行结果我们具体关注一下111,222,333内存里面的值有没有分别被修改为对应值以及返回的异或值是否与自己计算的是否相等就行了

这样的话Part A就撒花啦

Part B

测评方式:在ptest目录下执行以下命令。

make SIM=../seq/ssim

这个Part B大概意思就是在hcl文件添加一个指令iaddq，这个咱们照本宣科在seq-full.hcl文件对应位置加上这个指令的名称就行了，最后文件修改成这个样子

#/* $begin seq-all-hcl */
####################################################################
#  HCL Description of Control for Single Cycle Y86-64 Processor SEQ   #
#  Copyright (C) Randal E. Bryant, David R. O'Hallaron, 2010       #
####################################################################

## Your task is to implement the iaddq instruction
## The file contains a declaration of the icodes
## for iaddq (IIADDQ)
## Your job is to add the rest of the logic to make it work

####################################################################
#    C Include's.  Don't alter these                               #
####################################################################

quote '#include <stdio.h>'
quote '#include "isa.h"'
quote '#include "sim.h"'
quote 'int sim_main(int argc, char *argv[]);'
quote 'word_t gen_pc(){return 0;}'
quote 'int main(int argc, char *argv[])'
quote '  {plusmode=0;return sim_main(argc,argv);}'

####################################################################
#    Declarations.  Do not change/remove/delete any of these       #
####################################################################

##### Symbolic representation of Y86-64 Instruction Codes #############
wordsig INOP     'I_NOP'
wordsig IHALT    'I_HALT'
wordsig IRRMOVQ    'I_RRMOVQ'
wordsig IIRMOVQ    'I_IRMOVQ'
wordsig IRMMOVQ    'I_RMMOVQ'
wordsig IMRMOVQ    'I_MRMOVQ'
wordsig IOPQ    'I_ALU'
wordsig IJXX    'I_JMP'
wordsig ICALL    'I_CALL'
wordsig IRET    'I_RET'
wordsig IPUSHQ    'I_PUSHQ'
wordsig IPOPQ    'I_POPQ'
# Instruction code for iaddq instruction
wordsig IIADDQ    'I_IADDQ'

##### Symbolic represenations of Y86-64 function codes                  #####
wordsig FNONE    'F_NONE'        # Default function code

##### Symbolic representation of Y86-64 Registers referenced explicitly #####
wordsig RRSP     'REG_RSP'        # Stack Pointer
wordsig RNONE    'REG_NONE'       # Special value indicating "no register"

##### ALU Functions referenced explicitly                            #####
wordsig ALUADD    'A_ADD'        # ALU should add its arguments

##### Possible instruction status values                             #####
wordsig SAOK    'STAT_AOK'    # Normal execution
wordsig SADR    'STAT_ADR'    # Invalid memory address
wordsig SINS    'STAT_INS'    # Invalid instruction
wordsig SHLT    'STAT_HLT'    # Halt instruction encountered

##### Signals that can be referenced by control logic ####################

##### Fetch stage inputs        #####
wordsig pc 'pc'                # Program counter
##### Fetch stage computations        #####
wordsig imem_icode 'imem_icode'        # icode field from instruction memory
wordsig imem_ifun  'imem_ifun'         # ifun field from instruction memory
wordsig icode      'icode'        # Instruction control code
wordsig ifun      'ifun'        # Instruction function
wordsig rA      'ra'            # rA field from instruction
wordsig rB      'rb'            # rB field from instruction
wordsig valC      'valc'        # Constant from instruction
wordsig valP      'valp'        # Address of following instruction
boolsig imem_error 'imem_error'        # Error signal from instruction memory
boolsig instr_valid 'instr_valid'    # Is fetched instruction valid?

##### Decode stage computations        #####
wordsig valA    'vala'            # Value from register A port
wordsig valB    'valb'            # Value from register B port

##### Execute stage computations    #####
wordsig valE    'vale'            # Value computed by ALU
boolsig Cnd    'cond'            # Branch test

##### Memory stage computations        #####
wordsig valM    'valm'            # Value read from memory
boolsig dmem_error 'dmem_error'        # Error signal from data memory


####################################################################
#    Control Signal Definitions.                                   #
####################################################################

################ Fetch Stage     ###################################

# Determine instruction code
word icode = [
    imem_error: INOP;
    1: imem_icode;        # Default: get from instruction memory
];

# Determine instruction function
word ifun = [
    imem_error: FNONE;
    1: imem_ifun;        # Default: get from instruction memory
];

bool instr_valid = icode in 
    { INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
           IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };

# Does fetched instruction require a regid byte?
bool need_regids =
    icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
             IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ };

# Does fetched instruction require a constant word?
bool need_valC =
    icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ };

################ Decode Stage    ###################################

## What register should be used as the A source?
word srcA = [
    icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : rA;
    icode in { IPOPQ, IRET } : RRSP;
    1 : RNONE; # Don't need register
];

## What register should be used as the B source?
word srcB = [
    icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ  } : rB;
    icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't need register
];

## What register should be used as the E destination?
word dstE = [
    icode in { IRRMOVQ } && Cnd : rB;
    icode in { IIRMOVQ, IOPQ, IIADDQ} : rB;
    icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
    1 : RNONE;  # Don't write any register
];

## What register should be used as the M destination?
word dstM = [
    icode in { IMRMOVQ, IPOPQ } : rA;
    1 : RNONE;  # Don't write any register
];

################ Execute Stage   ###################################

## Select input A to ALU
word aluA = [
    icode in { IRRMOVQ, IOPQ } : valA;
    icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : valC;
    icode in { ICALL, IPUSHQ } : -8;
    icode in { IRET, IPOPQ } : 8;
    # Other instructions don't need ALU
];

## Select input B to ALU
word aluB = [
    icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
              IPUSHQ, IRET, IPOPQ, IIADDQ } : valB;
    icode in { IRRMOVQ, IIRMOVQ } : 0;
    # Other instructions don't need ALU
];

## Set the ALU function
word alufun = [
    icode == IOPQ : ifun;
    1 : ALUADD;
];

## Should the condition codes be updated?
+bool set_cc = icode in { IOPQ, IIADDQ };

################ Memory Stage    ###################################

## Set read control signal
bool mem_read = icode in { IMRMOVQ, IPOPQ, IRET };

## Set write control signal
bool mem_write = icode in { IRMMOVQ, IPUSHQ, ICALL };

## Select memory address
word mem_addr = [
    icode in { IRMMOVQ, IPUSHQ, ICALL, IMRMOVQ } : valE;
    icode in { IPOPQ, IRET } : valA;
    # Other instructions don't need address
];

## Select memory input data
word mem_data = [
    # Value from register
    icode in { IRMMOVQ, IPUSHQ } : valA;
    # Return PC
    icode == ICALL : valP;
    # Default: Don't write anything
];

## Determine instruction status
word Stat = [
    imem_error || dmem_error : SADR;
    !instr_valid: SINS;
    icode == IHALT : SHLT;
    1 : SAOK;
];

################ Program Counter Update ############################

## What address should instruction be fetched at

word new_pc = [
    # Call.  Use instruction constant
    icode == ICALL : valC;
    # Taken branch.  Use instruction constant
    icode == IJXX && Cnd : valC;
    # Completion of RET instruction.  Use value from stack
    icode == IRET : valM;
    # Default: Use incremented PC
    1 : valP;
];
#/* $end seq-all-hcl */

PS:咱这个实验没有自己作过，上面的修改方式参照了别的师傅的wp，然后我其实突然发现我直接测评也是直接满掉的，不知道为啥，所以这个还是不能算我自己写的，我也是真不会，我也不知道哪里需要添加，为什么以及它工作的原理是真的不知道，以后变强了看看这里能不能更新一下解决这个问题吧。qwq

Part C

Part C在寒假的逆向培训中我还是记得很清楚的，循环次数缩减可以用每次移动八个字节，循环执行[len/8]次，然后再一字节一字节移动len%8次，利用这个思路去优化memcpy函数来着的。

然后写出优化的思路：

# You can modify this portion
    # Loop header
    xorq %rax,%rax        # count = 0;
    iaddq $-4, %rdx
    jle EQ0

Npos0:
    mrmovq (%rdi), %r10
    mrmovq 8(%rdi), %r11
    mrmovq 16(%rdi), %r12
    mrmovq 24(%rdi), %r13
    mrmovq 32(%rdi), %r14
    rmmovq %r10, (%rsi)
    andq %r10, %r10        # val <= 0?
    jle Npos1
    iaddq $1, %rax

Npos1:
    rmmovq %r11, 8(%rsi)
    andq %r11, %r11        # val <= 0?
    jle Npos2
    iaddq $1, %rax

Npos2:
    rmmovq %r12, 16(%rsi)
    andq %r12, %r12        # val <= 0?
    jle Npos3
    iaddq $1, %rax

Npos3:
    rmmovq %r13, 24(%rsi)
    andq %r13, %r13    # val <= 0?
    jle Npos4
    iaddq $1, %rax

Npos4:
    rmmovq %r14, 32(%rsi)
    andq %r14, %r14    # val <= 0?
    jle Tail
    iaddq $1, %rax

Tail:
    iaddq $40, %rsi
    iaddq $40, %rdi
    iaddq $-5, %rdx
    jg Npos0

EQ0:
    iaddq $4, %rdx
    jle Done
    mrmovq (%rdi), %r10
    mrmovq 8(%rdi), %r11
    rmmovq %r10, (%rsi)
    andq %r10, %r10
    jle EQ1
    iaddq $1, %rax

EQ1:
    iaddq $-1, %rdx
    jle Done
    rmmovq %r11, 8(%rsi)
    andq %r11, %r11
    jle EQ2
    iaddq $1, %rax

EQ2:
    iaddq $-1, %rdx
    jle Done
    mrmovq 16(%rdi), %r12
    rmmovq %r12, 16(%rsi)
    andq %r12, %r12
    jle EQ3
    iaddq $1, %rax

EQ3:
    iaddq $-1, %rdx
    jle Done
    mrmovq 24(%rdi), %r13
    rmmovq %r13, 24(%rsi)
    andq %r13, %r13
    jle Done
    iaddq $1, %rax