这个实验让我们用Y86语言写程序,然后用HCL语言写CPU的逻辑,可玩性很高。实验文件可以从CSAPP的网站上下载,我完成的答案在GitHub上。

Part A

Part A让我们用Y86写几个程序,对于被Bomb Lab和Buffer Lab虐过的我来说还是比较容易的。

sum

就是计算一个链表中所有数值的和。

## Execution begins at address 0 
			.pos 0
init: 		irmovl Stack, %esp	 	## set up stack pointer 
			irmovl Stack, %ebp 		## set up base pointer 
			call Main 				## execute main program 
			halt 					## terminate program 

## Sample linked list
			.align 4
ele1:
			.long 0X00a
			.long ele2
ele2:
			.long 0x0b0
			.long ele3
ele3:
			.long 0xc00
			.long 0

Main: 		pushl %ebp 
			rrmovl %esp,%ebp
			irmovl ele1,%eax
			pushl %eax 				## push ele1
			call sum_list			## sum_list(ele1) 
			rrmovl %ebp,%esp
			popl %ebp
			ret

## int sum_list(list_ptr ls)
sum_list: 	pushl %ebp 
			rrmovl %esp,%ebp
			mrmovl 8(%ebp),%ecx 	## ecx = ele1
			xorl %eax, %eax			## eax = 0
			andl %ecx, %ecx 		## test ecx
			je End
Loop:		mrmovl (%ecx),%esi		## get *ls
			addl %esi,%eax			## add to sum
			mrmovl 4(%ecx), %ecx 	## ls = ls->next
			andl %ecx, %ecx 		## test ecx
			jne Loop
End: 		rrmovl %ebp,%esp
			popl %ebp
			ret

## The stack starts here and grows to lower addresses 
			.pos 0x100
Stack:
rsum

使用递归的方法计算一个链表中所有数值的和。

## Execution begins at address 0 
			.pos 0
init: 		irmovl Stack, %esp	 	## set up stack pointer 
			irmovl Stack, %ebp 		## set up base pointer 
			call Main 				## execute main program 
			halt 					## terminate program 

## Sample linked list
			.align 4
ele1:
			.long 0X00a
			.long ele2
ele2:
			.long 0x0b0
			.long ele3
ele3:
			.long 0xc00
			.long 0

Main: 		pushl %ebp 
			rrmovl %esp,%ebp
			irmovl ele1,%eax
			pushl %eax 				## push ele1
			call rsum_list			## rsum_list(ele1) 
			rrmovl %ebp,%esp
			popl %ebp
			ret

## int rsum_list(list_ptr ls)
rsum_list: 	pushl %ebp 
			rrmovl %esp,%ebp
			irmovl $-4,%ebx
			addl %ebx,%esp
			mrmovl 8(%ebp),%ecx 	## ecx = ele1
			andl %ecx,%ecx 			## test ecx
			jne Recursion
			xorl %eax,%eax			## eax = 0
			jmp End
Recursion:	mrmovl (%ecx),%ebx		## ebx = *ls
			rmmovl %ebx,(%esp)		## *esp = ebx
			mrmovl 4(%ecx),%ebx
			pushl %ebx				## push ls->next
			call rsum_list			## rsum_list(ls->next)
			mrmovl 4(%esp),%ebx
			addl %ebx,%eax			## eax = eax + ebx
End: 		rrmovl %ebp,%esp
			popl %ebp
			ret

## The stack starts here and grows to lower addresses 
			.pos 0x100
Stack:
copy

把一个数组中的值复制到另一个另外一个数组中,同时还要计算所有数值的异或和。

## Execution begins at address 0 
			.pos 0
init: 		irmovl Stack, %esp	 	## set up stack pointer 
			irmovl Stack, %ebp 		## set up base pointer 
			call Main 				## execute main program 
			halt 					## terminate program 

			.align 4
## Source block
src:		
			.long 0x000a
			.long 0x00b0
			.long 0x0c00

## Destination block
dest:		
			.long 0x111
			.long 0x222
			.long 0x333

Main: 		pushl %ebp 
			rrmovl %esp,%ebp
			irmovl $3,%eax
			pushl %eax				## push 3
			irmovl dest,%eax
			pushl %eax 				## push dest
			irmovl src,%eax
			pushl %eax				## push src
			call copy_block			## copy_block(src, dest, 3) 
			rrmovl %ebp,%esp
			popl %ebp
			ret

## int copy_block(int *src, int *dest, int len)
copy_block: pushl %ebp 
			rrmovl %esp,%ebp
			mrmovl 8(%ebp),%edx 	## edx = src
			mrmovl 12(%ebp),%ecx	## ecx = dest
			mrmovl 16(%ebp),%ebx	## ebx = 3
			xorl %eax,%eax			## eax = 0
			andl %ebx,%ebx			## test ebx
			je End
Loop:		mrmovl (%edx),%esi
			rmmovl %esi,(%ecx)
			xorl %esi,%eax 			## eax ^= esi
			irmovl $4,%esi
			addl %esi,%edx 			## edx -= 4
			addl %esi,%ecx 			## ecx -= 4
			irmovl $-1,%esi
			addl %esi,%ebx			## ebx--
			jne Loop
End: 		rrmovl %ebp,%esp
			popl %ebp
			ret

## The stack starts here and grows to lower addresses 
			.pos 0x100
Stack:

Part B

这部分需要我们修改HCL代码实现iaddl和leave两个指令,iaddl指令让我们可以直接把一个立即数加到一个寄存器上,而leave指令则实现在ret之前清除函数创建的栈空间。

iaddl指令相当于irmovl指令和addl指令的混合,所以想办法把这两个指令合并到一起。

ileave指令相当于pop指令和mrmovl指令的混合,所以操作需要参考这两个指令的代码实现。

其中容易忘记的就是在iaddl指令计算结束后需要设置状状态码。

################### Fetch Stage     ###################################

bool instr_valid = icode in 
	{ INOP, IHALT, IRRMOVL, IIRMOVL, IRMMOVL, IMRMOVL,
	       IOPL, IJXX, ICALL, IRET, IPUSHL, IPOPL, IIADDL, ILEAVE };

## Does fetched instruction require a regid byte?
bool need_regids =
	icode in { IRRMOVL, IOPL, IPUSHL, IPOPL, 
		     IIRMOVL, IRMMOVL, IMRMOVL, IIADDL };

## Does fetched instruction require a constant word?
bool need_valC =
	icode in { IIRMOVL, IRMMOVL, IMRMOVL, IJXX, ICALL, IIADDL };

################### Decode Stage    ###################################

##### What register should be used as the B source?
int srcB = [
	icode in { IOPL, IRMMOVL, IMRMOVL, IIADDL  } : rB;
	icode in { IPUSHL, IPOPL, ICALL, IRET } : RESP;
	icode == ILEAVE : REBP;
	1 : RNONE;  ## Don't need register
];

##### What register should be used as the B source?
int srcB = [
	icode in { IOPL, IRMMOVL, IMRMOVL, IIADDL  } : rB;
	icode in { IPUSHL, IPOPL, ICALL, IRET } : RESP;
	icode == ILEAVE : REBP;
	1 : RNONE;  ## Don't need register
];

##### What register should be used as the M destination?
int dstM = [
	icode in { IMRMOVL, IPOPL } : rA;
	icode == ILEAVE : REBP;
	1 : RNONE;  ## Don't write any register
];

################### Execute Stage   ###################################

##### Select input A to ALU
int aluA = [
	icode in { IRRMOVL, IOPL } : valA;
	icode in { IIRMOVL, IRMMOVL, IMRMOVL, IIADDL } : valC;
	icode in { ICALL, IPUSHL } : -4;
	icode in { IRET, IPOPL, ILEAVE } : 4;
	## Other instructions don't need ALU
];

##### Select input B to ALU
int aluB = [
	icode in { IRMMOVL, IMRMOVL, IOPL, ICALL, 
		      IPUSHL, IRET, IPOPL, IIADDL, ILEAVE } : valB;
	icode in { IRRMOVL, IIRMOVL } : 0;
	## Other instructions don't need ALU
];

################### Memory Stage    ###################################

##### Select memory address
int mem_addr = [
	icode in { IRMMOVL, IPUSHL, ICALL, IMRMOVL } : valE;
	icode in { IPOPL, IRET, ILEAVE } : valA;
	## Other instructions don't need address
];

Part C

这部分需要我们尽可能降低程序运行过程中的CPI,主要是两种思路:

1. 首先是好好利用iaddl指令,这样的话我们可以轻松地把多个指令合并为一个指令,想要使用iaddl指令,只要在pip-full.hcl文件里添加代码实现就好了。

2. Unrolled loop。

这是pipe-full.hcl文件,添加iaal指令的具体过程和Part B类似。

################### Fetch Stage     ###################################

## Is instruction valid?
bool instr_valid = f_icode in 
	{ INOP, IHALT, IRRMOVL, IIRMOVL, IRMMOVL, IMRMOVL,
	  IOPL, IJXX, ICALL, IRET, IPUSHL, IPOPL, IIADDL };

## Does fetched instruction require a regid byte?
bool need_regids =
	f_icode in { IRRMOVL, IOPL, IPUSHL, IPOPL, 
		     IIRMOVL, IRMMOVL, IMRMOVL, IIADDL };

## Does fetched instruction require a constant word?
bool need_valC =
	f_icode in { IIRMOVL, IRMMOVL, IMRMOVL, IJXX, ICALL, IIADDL };

################### Decode Stage ######################################

##### What register should be used as the B source?
int d_srcB = [
	D_icode in { IOPL, IRMMOVL, IMRMOVL, IIADDL  } : D_rB;
	D_icode in { IPUSHL, IPOPL, ICALL, IRET } : RESP;
	1 : RNONE;  ## Don't need register
];

##### What register should be used as the E destination?
int d_dstE = [
	D_icode in { IRRMOVL, IIRMOVL, IOPL, IIADDL} : D_rB;
	D_icode in { IPUSHL, IPOPL, ICALL, IRET } : RESP;
	1 : RNONE;  ## Don't write any register
];

################### Execute Stage #####################################

##### Select input A to ALU
int aluA = [
	E_icode in { IRRMOVL, IOPL } : E_valA;
	E_icode in { IIRMOVL, IRMMOVL, IMRMOVL, IIADDL } : E_valC;
	E_icode in { ICALL, IPUSHL } : -4;
	E_icode in { IRET, IPOPL } : 4;
	## Other instructions don't need ALU
];

##### Select input B to ALU
int aluB = [
	E_icode in { IRMMOVL, IMRMOVL, IOPL, ICALL, 
		     IPUSHL, IRET, IPOPL, IIADDL } : E_valB;
	E_icode in { IRRMOVL, IIRMOVL } : 0;
	## Other instructions don't need ALU
];

##### Should the condition codes be updated?
bool set_cc = E_icode in { IOPL, IIADDL } &&
	## State changes only during normal operation
	!m_stat in { SADR, SINS, SHLT } && !W_stat in { SADR, SINS, SHLT };

这是ncopy.ys文件,有了iaddl指令,我们少了很多的指令,由于iaddl会更新状态码,所以计算结束就可以直接进行条件跳转了。

#/* $begin ncopy-ys */
##################################################################
# ncopy.ys - Copy a src block of len ints to dst.
# Return the number of positive ints (>0) contained in src.
#
# Include your name and ID here.
#
# Describe how and why you modified the baseline code.
# 1. replace all addl with iaddl;
# 2. unrolled loop (copy 8 elements each loop);
##################################################################
# Do not modify this portion
# Function prologue.
ncopy:	pushl %ebp			# Save old frame pointer
	rrmovl %esp,%ebp		# Set up new frame pointer
	pushl %esi				# Save callee-save regs
	pushl %ebx
	pushl %edi
	mrmovl 8(%ebp),%ebx		# src
	mrmovl 16(%ebp),%edx	# len
	mrmovl 12(%ebp),%ecx	# dst

##################################################################
# You can modify this portion
	# Loop header
	xorl %eax,%eax			# count = 0;
	rrmovl %edx, %edi
	iaddl $-8, %edi
	jl Left1				# if len < 8, goto EleLeft:
Ele1:	
	mrmovl (%ebx), %esi	# read val from src...
	rmmovl %esi, (%ecx)		# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Ele2				# if so, goto Ele2
	iaddl $1, %eax			# count++
Ele2:	
	mrmovl 4(%ebx), %esi# read val from src...
	rmmovl %esi, 4(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Ele3				# if so, goto Ele3
	iaddl $1, %eax			# count++
Ele3:	
	mrmovl 8(%ebx), %esi# read val from src...
	rmmovl %esi, 8(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Ele4				# if so, goto Ele4
	iaddl $1, %eax			# count
Ele4:	
	mrmovl 12(%ebx), %esi# read val from src...
	rmmovl %esi, 12(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Ele5				# if so, goto Ele5
	iaddl $1, %eax			# count
Ele5:	
	mrmovl 16(%ebx), %esi	# read val from src...
	rmmovl %esi, 16(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Ele6				# if so, goto Ele6
	iaddl $1, %eax			# count++
Ele6:	
	mrmovl 20(%ebx), %esi# read val from src...
	rmmovl %esi, 20(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Ele7				# if so, goto Ele7
	iaddl $1, %eax			# count++
Ele7:	
	mrmovl 24(%ebx), %esi# read val from src...
	rmmovl %esi, 24(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Ele8				# if so, goto Ele8
	iaddl $1, %eax			# count++
Ele8:	
	mrmovl 28(%ebx), %esi# read val from src...
	rmmovl %esi, 28(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Reapt				# if so, goto Reapt
	iaddl $1, %eax			# count++
Reapt:
	iaddl $-8, %edx			# len-=8
	iaddl $32, %ebx			# src+=8
	iaddl $32, %ecx			# dst+=8
	iaddl $-8, %edi
	jge Ele1				# if len >= 8, goto Ele1:
Left1:	
	iaddl $-1, %edx
	jl Done
	mrmovl (%ebx), %esi		# read val from src...
	rmmovl %esi, (%ecx)		# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Left2				# if so, goto Left2
	iaddl $1, %eax			# count++
Left2:
	iaddl $-1, %edx
	jl Done
	mrmovl 4(%ebx), %esi	# read val from src...
	rmmovl %esi, 4(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Left3				# if so, goto Left3
	iaddl $1, %eax			# count++
Left3:
	iaddl $-1, %edx
	jl Done
	mrmovl 8(%ebx), %esi	# read val from src...
	rmmovl %esi, 8(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Left4				# if so, goto Left4
	iaddl $1, %eax			# count++
Left4:
	iaddl $-1, %edx
	jl Done
	mrmovl 12(%ebx), %esi	# read val from src...
	rmmovl %esi, 12(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Left5				# if so, goto Left5
	iaddl $1, %eax			# count++
Left5:
	iaddl $-1, %edx
	jl Done
	mrmovl 16(%ebx), %esi	# read val from src...
	rmmovl %esi, 16(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Left6				# if so, goto Left6
	iaddl $1, %eax			# count++
Left6:
	iaddl $-1, %edx
	jl Done
	mrmovl 20(%ebx), %esi	# read val from src...
	rmmovl %esi, 20(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Left7				# if so, goto Left7
	iaddl $1, %eax			# count++
Left7:
	iaddl $-1, %edx
	jl Done
	mrmovl 24(%ebx), %esi	# read val from src...
	rmmovl %esi, 24(%ecx)	# ...and store it to dst
	andl %esi, %esi			# val <= 0?
	jle Done				# if so, goto Done
	iaddl $1, %eax			# count++
##################################################################
# Do not modify the following section of code
# Function epilogue.
Done:
	popl %edi               # Restore callee-save registers
	popl %ebx
	popl %esi
	rrmovl %ebp, %esp
	popl %ebp
	ret
##################################################################
# Keep the following label at the end of your function
End:
#/* $end ncopy-ys */