上次课内容:
C程序如何从源代码生成指令序列(二进制可执行文件)
本次课内容:
我们之前接触的是宿主操作系统(Linux)之上的运行时环境
printf()
的代码在哪里 😂
RTFM: C99
5.1.2.1 Freestanding environment
2 The effect of program termination in a freestanding environment is
implementation-defined.
QEMU虽然是个开源项目, 但还挺复杂, 不利于我们理解细节
让我们来设计一个面向RISC-V程序的简单freestanding运行时环境!
0
开始执行addi
指令ebreak
指令
a0=0
时, 输出寄存器a1
低8位的字符a0=1
时, 结束运行
static void ebreak(long arg0, long arg1) {
asm volatile("addi a0, x0, %0;"
"addi a1, x0, %1;"
"ebreak" : : "i"(arg0), "i"(arg1));
}
static void putch(char ch) { ebreak(0, ch); }
static void halt(int code) { ebreak(1, code); while (1); }
void _start() {
putch('A');
halt(0);
}
rv32gcc -ffreestanding -nostdlib -static -Wl,-Ttext=0 -O2 -o prog a.c
rvobjdump -M no-aliases -d prog
看看反汇编
addi
和ebreak
指令但怎么让这个程序运行呢?
我们只要把这个状态机实现出来, 就可以用它来执行指令了!
程序 | 抽象计算机 | CPU | |
---|---|---|---|
状态 | \(\{<V, PC>\}\) | \(\{<R, M>\}\) | \(\{时序逻辑电路\}\) |
状态转移规则 | C语言语句的语义 | 指令的语义 | 组合逻辑电路 |
FM | C语言标准手册 | 指令集手册 | 架构设计文档 |
#include <stdint.h>
uint32_t R[32], PC; // according to the RISC-V manual
uint8_t M[64]; // 64-Byte memory
Q: 为什么不使用int32_t
和int8_t
?
A: C语言标准规定, 有符号数溢出是undefined behavior, 但无符号数不会溢出
指令周期(instruction cycle): 执行一条指令的步骤
RTFM后得知:
31 20 19 15 14 12 11 7 6 0
+---------------+-----+-----+-----+---------+
| imm[11:0] | rs1 | 000 | rd | 0010011 | ADDI
+---------------+-----+-----+-----+---------+
+---------------+-----+-----+-----+---------+
| 000000000001 |00000| 000 |00000| 1110011 | EBREAK
+---------------+-----+-----+-----+---------+
一个简单的实现:
void inst_cycle() {
uint32_t inst = *(uint32_t *)&M[PC];
if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0) { // addi
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] +
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (inst == 0x00100073) { // ebreak
if (R[10] == 0) { putchar(R[11] & 0xff); }
else if (R[10] == 1) { halt = true; }
else { printf("Unsupported ebreak command\n"); }
} else { printf("Unsupported instuction\n"); }
PC += 4;
}
RTFM: 《The RISC-V Instruction Set Manual - Volume II: Privileged Architecture》
3.4 Reset
The pc is set to an implementation-defined reset vector... All other hart state is
unspecified.
注意这里的unspecified
和C语言标准的含义不同
根据手册, 初始状态如下:
R[0] = 0
, 0号寄存器恒为0PC = 0
, 与自制运行时环境共同约定M
中存放程序, 与自制运行时环境共同约定,
由模拟器加载程序#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
uint32_t R[32], PC;
uint8_t M[64] = {
0x13, 0x05, 0x00, 0x00, 0x93, 0x05, 0x10, 0x04, 0x73, 0x00, 0x10, 0x00,
0x13, 0x05, 0x10, 0x00, 0x93, 0x05, 0x00, 0x00, 0x73, 0x00, 0x10, 0x00,
0x6f, 0x00, 0x00, 0x00,
};
bool halt = false;
void inst_cycle() {
uint32_t inst = *(uint32_t *)&M[PC];
if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0) { // addi
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] +
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (inst == 0x00100073) { // ebreak
if (R[10] == 0) { putchar(R[11] & 0xff); }
else if (R[10] == 1) { halt = true; }
else { printf("Unsupported ebreak command\n"); }
} else { printf("Unsupported instuction\n"); }
PC += 4;
}
int main() {
PC = 0; R[0] = 0; // can be omitted since uninitialized global variables are initialized with 0
while (!halt) { inst_cycle(); }
return 0;
}
// ...
uint8_t M[1024];
int main(int argc, char *argv[]) {
PC = 0; R[0] = 0;
FILE *fp = fopen(argv[1], "r");
fread(M, 1, 1024, fp);
fclose(fp);
while (!halt) { inst_cycle(); }
return 0;
}
# 将可执行文件prog中的指令序列抽取到prog.bin
riscv64-linux-gnu-objcopy -j .text -O binary prog prog.bin
gcc -o yemu yemu.c && ./yemu prog.bin
YEMU很小, 能很快定位问题; 但如何从大项目中存活?
调试的最高境界: 不用调试
诀窍: 编写可读可维护的代码
#\
define C(c /**/)#c
/*size=3173*/#include<stdio.h>
/*crc=b7f9ecff.*/#include<stdlib.h>
/*Mile/Adele_von_Ascham*/#include<time.h>
typedef/**/int(I);I/*:3*/d,i,j,a,b,l,u[16],v
[18],w[36],x,y,z,k;char*P="\n\40(),",*p,*q,*t[18],m[4];
void/**/O(char*q){for(;*q;q++)*q>32?z=111-*q?z=(z+*q)%185,(k?
k--:(y=z%37,(x=z/37%7)?printf(*t,t[x],y?w[y-1]:95):y>14&&y<33?x
=y>15,printf(t[15+x],x?2<<y%16:l,x?(1<<y%16)-1:1):puts(t[y%28])))
,0:z+82:0;}void/**/Q(I(p),I*q){for(x=0;x<p;x++){q[x]=x;}for(;--p
>1;q[p]=y)y =q[x=rand()%-~p],q[x]=q[p];}char/**/n[999]=C(Average?!nQVQd%R>Rd%
R% %RNIPRfi#VQ}R;TtuodtsRUd%RUd%RUOSetirwf!RnruterR{RTSniamRtniQ>h.oidts<edulc
ni #V>rebmun<=NIPD-RhtiwRelipmocResaelPRrorre#QNIPRfednfi#V__ELIF__R_
Re nifed#V~-VU0V;}V{R= R][ORrahcRdengisnuRtsnocRcitatsVesle#Vfidne#V53556
. .1RfoRegnarRehtRniRre getniRnaRsiR]NIP[R erehwQQc.tuptuoR>Rtxt.tupniR
< R]NIP[R:egasuV_Redulcn i#VfednfiVfednuVenife dVfedfiVQc%Rs%#V);I/**/main(
I( f),char**e){if(f){for(i= time(NULL),p=n,q= n+997,x=18;x;p++){*p>32&&!(
*--q=*p>80&&*p<87?P[*p- 81]:* p)?t [( -- x)]=q+1:q;}if(f-2||(d=atoi
(e[1]))<1||65536<d){;O(" \""); goto O;}srand(i);Q(16,u);i=0;Q(
36,w);for(;i<36; i++){w[i] +=w [i]<26 ? 97:39; }O(C(ouoo9oBotoo%]#
ox^#oy_#ozoou#o{ a#o|b#o}c# o~d#oo-e #oo. f#oo/g#oo0h#oo1i#oo
2j#oo3k#oo4l#o p));for(j =8;EOF -(i= getchar());l+=1){a=1+
rand()%16;for(b =0;b<a||i- main (0,e);b++)x=d^d/4^d/8^d/
32,d= (d/ 2|x<<15)&65535; b|= !l<<17;Q(18,v);for(a=0;a<18;
a++ ){if( (b&(1<<(i=v[a] ))))* m=75+i,O(m),j=i<17&&j<i?i:j;}O(C(
!) ); }O(C(oqovoo97o /n!));i= 0;for(;i<8;O(m))m[2]=35,*m=56+u[i],m[1
]= 75 +i++;O(C(oA!oro oqoo9) );k=112-j*7;O(C(6o.!Z!Z#5o-!Y!Y#4~!X!X#3}
!W !W #2 |!V!V#1{!U!U#0z! T!T#/y!S!S#.x!R!R#-w!Q!Q#ooAv!P!P#+o#!O!O#*t!N!
N# oo >s!M!M#oo=r!L!L#oo<q!K!K# &pIo@:;= oUm#oo98m##oo9=8m#oo9oUm###oo9;=8m#o
o9 oUm##oo9=oUm#oo98m#### o09] #o1:^#o2;_#o3<o ou#o4=a#o5>b#o6?c#o
7@d#o8A e#o 9B f#o:Cg#o; D h#o<Ei #o=Fj#o> Gk#o?Hl#oo9os#####
));d=0 ;} O: for(x=y=0;x<8;++
x)y|= d&(1<<u[x])?
1<< x:0;return
/* :9 */
y ; }
YEMU v1.0其实也做得不够好, 让我们来改进它
不相信外界的输入/其他函数传递的参数, 通过断言提前拦截非预期情况
#include <assert.h>
// ...
int main(int argc, char *argv[]) {
PC = 0; R[0] = 0;
assert(argc >= 2); // 要求至少包含一个参数
FILE *fp = fopen(argv[1], "r");
assert(fp != NULL); // 要求argv[1]是一个可以成功打开的文件
int ret = fseek(fp, 0, SEEK_END);
assert(ret != -1); // 要求fseek()成功
long fsize = ftell(fp);
assert(fsize != -1); // 要求ftell()成功
rewind(fp);
assert(fsize < 1024); // 要求程序大小不超过1024字节
ret = fread(M, 1, 1024, fp);
assert(ret == fsize); // 要求完全读出程序的内容
fclose(fp);
while (!halt) { inst_cycle(); }
return 0;
}
将预期的正确行为直接写到程序中
segmentation fault
->
yemu.c:27: main: ...
程序中的断言足够多 -> 近似于证明了程序的正确性
IC验证教大家写SVA(SystemVerilog Assertion), 也是类似的道理
#define Assert(cond, format, ...) \
do { \
if (!(cond)) { \
fprintf(stderr, format "\n", ## __VA_ARGS__); \
assert(cond); \
} \
} while (0)
int main(int argc, char *argv[]) {
PC = 0; R[0] = 0;
Assert(argc >= 2, "Program is not given"); // 要求至少包含一个参数
FILE *fp = fopen(argv[1], "r");
Assert(fp != NULL, "Fail to open %s", argv[1]); // 要求argv[1]是一个可以成功打开的文件
int ret = fseek(fp, 0, SEEK_END);
Assert(ret != -1, "Fail to seek the end of the file"); // 要求fseek()成功
long fsize = ftell(fp);
Assert(fsize != -1, "Fail to return the file position"); // 要求ftell()成功
rewind(fp);
Assert(fsize < 1024, "Program size exceeds 1024 Bytes"); // 要求程序大小不超过1024字节
ret = fread(M, 1, 1024, fp);
Assert(ret == fsize, "Fail to load the whole program"); // 要求完全读出程序的内容
fclose(fp);
while (!halt) { inst_cycle(); }
return 0;
}
#include <string.h>
#include <errno.h>
#define Perror(cond, format, ...) \
Assert(cond, format ": %s", ## __VA_ARGS__, strerror(errno))
int main(int argc, char *argv[]) {
PC = 0; R[0] = 0;
Assert(argc >= 2, "Program is not given"); // 要求至少包含一个参数
FILE *fp = fopen(argv[1], "r");
Perror(fp != NULL, "Fail to open %s", argv[1]); // 要求argv[1]是一个可以成功打开的文件
int ret = fseek(fp, 0, SEEK_END);
Perror(ret != -1, "Fail to seek the end of the file"); // 要求fseek()成功
long fsize = ftell(fp);
Perror(fsize != -1, "Fail to return the file position"); // 要求ftell()成功
rewind(fp);
Assert(fsize < 1024, "Program size exceeds 1024 Bytes"); // 要求程序大小不超过1024字节
ret = fread(M, 1, 1024, fp);
Assert(ret == fsize, "Fail to load the whole program"); // 要求完全读出程序的内容
fclose(fp);
while (!halt) { inst_cycle(); }
return 0;
}
RTFM: man errno
破坏隐含依赖 = bug (例如这里改了, 那里忘了改):
随着项目规模增长, 需要分成多个文件来管理
if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0) { // addi
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] +
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0x4) { // xori
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] ^
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0x6) { // ori
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] |
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (((inst & 0x7f) == 0x13) && ((inst >> 12) & 0x7) == 0x4) { // andi
if (((inst >> 7) & 0x1f) != 0) {
R[(inst >> 7) & 0x1f] = R[(inst >> 15) & 0x1f] &
(((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0));
}
} else if (...) { ... }
上述代码有一处错误, 你找到了吗?
Copy-Paste = 编写相似代码时, 复制旧代码并稍作修改
上述代码不言自明本身就不怎么样, 不言自证就更难了
粘贴一时爽, 调试火葬场 😈
通过变量, 函数, 宏等方式消除重复/相似的代码
uint32_t inst = *(uint32_t *)&M[PC];
uint32_t opcode = inst & 0x7f;
uint32_t funct3 = (inst >> 12) & 0x7;
uint32_t rd = (inst >> 7 ) & 0x1f;
uint32_t rs1 = (inst >> 15) & 0x1f;
uint32_t imm = ((inst >> 20) & 0x7ff) - ((inst & 0x80000000) ? 4096 : 0);
if (opcode == 0x13) {
if (funct3 == 0x0) { R[rd] = R[rs1] + imm; } // addi
else if (funct3 == 0x4) { R[rd] = R[rs1] ^ imm; } // xori
else if (funct3 == 0x6) { R[rd] = R[rs1] | imm; } // ori
else if (funct3 == 0x7) { R[rd] = R[rs1] & imm; } // andi
else { panic("Unsupported funct3 = %d", funct3); }
R[0] = 0; // 若指令写入了R[0], 此处将其重置为0
} else if (...) { ... }
PC += 4;
typedef union {
struct {
uint32_t opcode : 7;
uint32_t rd : 5;
uint32_t funct3 : 3;
uint32_t rs1 : 5;
int32_t imm11_0 : 12;
} I;
struct { /* ... */ } R;
uint32_t bytes;
} inst_t;
inst_t *inst = (inst_t *)&M[PC];
uint32_t rd = inst->I.rd;
uint32_t rs1 = inst->I.rs1;
uint32_t imm = (int32_t)inst->I.imm11_0;
if (inst->I.opcode == 0b0010011) {
switch (inst->I.funct3) {
case 0b000: R[rd] = R[rs1] + imm; break; // addi
case 0b100: R[rd] = R[rs1] ^ imm; break; // xori
case 0b110: R[rd] = R[rs1] | imm; break; // ori
case 0b111: R[rd] = R[rs1] & imm; break; // andi
default: panic("Unsupported funct3 = %d", inst->I.funct3);
}
R[0] = 0; // 若指令写入了R[0], 此处将其重置为0
} else if (inst->bytes == 0x00100073) { ... }
struct
和位域(bit field)
union
switch-case
语句
正确的代码 != 好代码
好代码的两条重要准则
使用正确的编程模式写出好代码
assert
检查非预期行为
与YEMU类似, 我们可以把这个状态机实现出来, 用它来执行C程序!
import sys,re
# prepend an empty line to let PC starts from 1
srcs = [''] + list(map(lambda s: s.strip(), sys.stdin.read().split('\n')))
# set PC to the next line of "int main"
state = {'PC': i + 1 for i, line in enumerate(srcs) if line.startswith('int main') }
labels = {} # record mappings of label -> PC
[labels.setdefault(line.rstrip(':'), i) for i, line in enumerate(srcs) if re.match('^\w+:', line) != None]
semantics = [
(r'^int\s+(\w+)\s*;$', lambda s, p: exec(re.sub(p, r'\1 = 0xdeadbeef', s), {}, state)),
(r'^int\s+(\w+)\s*=\s*(.+)?;$', lambda s, p: exec(re.sub(p, r'\1 = \2', s), {}, state)),
(r'^\w+\s*=.+\s*;$', lambda s, p: exec(s, {}, state)),
(r'^printf\s*\(.+\)\s*;$', lambda s, p: exec(s, {'printf': lambda fmt, *args: print(fmt % args, end='')}, state)),
(r'^return\s+(.+)\s*;$', lambda s, p: (print('Exit with %d' % eval(re.sub(p, r'\1', s), {}, state)), exit())),
(r'^\w+:$', lambda s, p: 0), # do nothing
(r'^if\s*\((.+)\)\s*goto\s+(\w+)\s*;$',
lambda s, p: exec(re.sub(p, r'if \1: PC = labels["\2"]', s), {'labels': labels}, state)),
(r'^.*$', lambda s, p: print("Not implement: " + s)),
]
while True:
print(state)
stmt = srcs[state['PC']] # read one line of statement
for pattern, fn in semantics:
if re.match(pattern, stmt) != None: # parse it with regular expression
fn(stmt, pattern) # execute acccording to the semantics
break
state['PC'] = state['PC'] + 1 # read PC again, since it may be changed by the if statement
通过各种高级语言特性轻松实现CEMU
strip()
,
split()
, startwith()
for ... in ...
, map
exec()
和eval()
如果用C语言来实现, 代码量至少翻10倍
状态机的4个要素同样存在
state
字典while
的循环体semantics
列表state
的初值int main() {
int s = 0;
int i = 1;
loop:
s = s + i;
i = i + 1;
if (i <= 100) goto loop;
printf("s = %d\n", s);
return 0;
}
我们用代码展示了\(s_{compile}(next(S_c, 语句)) = next(S_{isa}, 指令序列)\)
编写可读可维护的代码
使用正确的编程模式写出好代码