hariboteは、はりぼて言語の処理系です。
gitを使わなくてもhariboteのhistブランチ(Commits on Mar 22, 2024)のソースコードが利用できるように、ここに置いておきます。
haribote HL-8c
haribote HL-8cは、ifやforなどの制御構文と配列や文字列リテラルをサポートしています。
他のバージョンのソースコードを読む
if文のサポート
参考: ifgoto()の仕組みについて
for文のサポート
for文の実装が分かれば、while文の実装も分かると思います(2da9c49)。
処理系の改造
はりぼて言語の公式処理系の構造をベースに処理系を改造する際は、上で示したコミットとHL-7で示した論理否定演算子を追加するコミット(525c1e4)が参考になると思います。
haribote HL-8cを前提とすると、次の順で改造すると理解しやすいのではないかと考えています。
- デクリメント演算子の追加
- 中置演算子の追加
- 複合代入演算子の追加
- 残された課題への挑戦
- まずintだけではなくdoubleも使えるようにする(参考: a22_txt03)。
- 引数なしの関数を作って呼び出せるようにする(参考: a22_txt03_2)。
- ローカル変数に対応する。
- 引数ありの関数を作って呼び出せるようにする。
- 構造体なども使えるようにする。
処理系のどこで何をしているかを確認することが目的なので、すべての演算子を追加する必要はないでしょう。
もし余力があれば残された課題へ挑戦する前後で、else ifなどもサポートしてみるとたのしいと思います。
- else ifのサポート
- 条件演算子の追加
余談ですが、処理系の改造の手がかりとして構文を追加したコミットをひとつだけ残しておこうと決めたものの、それをwhile文にするかelse ifにするか迷いました。
(switch文の実装は、一身上の都合で候補から外しています。)
最終的にwhile文を実装することに決めたのは、かんたんだと思い込んでいたelse ifの実装に意外と苦戦してしまったことに起因します。
意外と苦戦してしまったelse ifが実装できてよろこぶ筆者1
意外と苦戦してしまったelse ifが実装できてよろこぶ筆者2
苦戦はしてしまいましたが、else ifが実装できたら、挙動が似ている条件演算子やnull合体演算子も実装できるようになったのです。
この「たのしみ」を味わってもらいたくて、else ifではなくwhile文を実装したのでした。
他のバージョンのソースコードを読む
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <assert.h>
#if defined(__APPLE__) || defined(__linux__)
#include <unistd.h>
#include <termios.h>
#endif
typedef unsigned char *String;
int loadText(String path, String text, int size)
{
unsigned char buf[1000];
int startPos = path[0] == '"';
int i = 0;
while (path[startPos + i] != 0 && path[startPos + i] != '"') {
buf[i] = path[startPos + i];
++i;
}
buf[i] = 0;
FILE *fp = fopen(buf, "rt");
if (fp == NULL) {
printf("Failed to open %s\n", path);
return 1;
}
int nItems = fread(text, 1, size - 1, fp);
fclose(fp);
text[nItems] = 0;
return 0;
}
#define MAX_TOKEN_CODE 1000
String tokenStrs[MAX_TOKEN_CODE + 1];
int tokenLens[MAX_TOKEN_CODE + 1];
intptr_t vars[MAX_TOKEN_CODE + 1];
int getTokenCode(String str, int len)
{
static unsigned char tokenBuf[(MAX_TOKEN_CODE + 1) * 10];
static int nTokens = 0, unusedHead = 0;
int i;
for (i = 0; i < nTokens; ++i) {
if (len == tokenLens[i] && strncmp(str, tokenStrs[i], len) == 0)
break;
}
if (i == nTokens) {
if (nTokens >= MAX_TOKEN_CODE) {
printf("Too many tokens\n");
exit(1);
}
strncpy(&tokenBuf[ unusedHead ], str, len);
tokenBuf[ unusedHead + len ] = 0;
tokenStrs[i] = &tokenBuf[ unusedHead ];
tokenLens[i] = len;
unusedHead += len + 1;
++nTokens;
vars[i] = strtol(tokenStrs[i], NULL, 0);
if (tokenStrs[i][0] == '"') {
char *p = malloc(len - 1);
if (p == NULL) {
printf("Failed to allocate memory\n");
exit(1);
}
vars[i] = (intptr_t) p;
memcpy(p, tokenStrs[i] + 1, len - 2);
p[len - 2] = 0;
}
}
return i;
}
inline static int isAlphabet(unsigned char ch)
{
return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_';
}
inline static int isNumber(unsigned char ch)
{
return '0' <= ch && ch <= '9';
}
int lexer(String str, int *tc)
{
int pos = 0, nTokens = 0;
int len;
for (;;) {
while (str[pos] == ' ' || str[pos] == '\t' || str[pos] == '\n' || str[pos] == '\r')
++pos;
if (str[pos] == 0)
return nTokens;
len = 0;
if (strchr("(){}[];,", str[pos]) != NULL)
len = 1;
else if (isAlphabet(str[pos]) || isNumber(str[pos])) {
while (isAlphabet(str[pos + len]) || isNumber(str[pos + len]))
++len;
}
else if (strchr("=+-*/!%&~|<>?:.#", str[pos]) != NULL) {
while (strchr("=+-*/!%&~|<>?:.#", str[pos + len]) != NULL && str[pos + len] != 0)
++len;
}
else if (str[pos] == '"') {
len = 1;
while (str[pos + len] != str[pos] && str[pos + len] >= ' ')
++len;
if (str[pos + len] == str[pos])
++len;
}
else {
printf("Lexing error: %.10s\n", &str[pos]);
exit(1);
}
tc[nTokens] = getTokenCode(&str[pos], len);
pos += len;
++nTokens;
}
}
int tc[10000];
enum {
PlusPlus,
Equal,
NotEq,
Les,
GtrEq,
LesEq,
Gtr,
Plus,
Minus,
Multi,
Divi,
Mod,
And,
ShiftRight,
Assign,
Ex,
Lparen,
Rparen,
Lbracket,
Rbracket,
Lbrace,
Rbrace,
Period,
Comma,
Semicolon,
Colon,
Print,
Time,
Goto,
If,
Else,
For,
While,
Continue,
Break,
Prints,
Wildcard,
Expr,
Expr0,
Zero,
One,
Two,
Three,
Four,
Five,
Six,
Seven,
Eight,
Nine,
Tmp0,
Tmp1,
Tmp2,
Tmp3,
Tmp4,
Tmp5,
Tmp6,
Tmp7,
Tmp8,
Tmp9,
EndOfKeys
};
String defaultTokens[] = {
"++",
"==",
"!=",
"<",
">=",
"<=",
">",
"+",
"-",
"*",
"/",
"%",
"&",
">>",
"=",
"!",
"(",
")",
"[",
"]",
"{",
"}",
".",
",",
";",
":",
"print",
"time",
"goto",
"if",
"else",
"for",
"while",
"continue",
"break",
"prints",
"!!*",
"!!**",
"!!***",
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"_t0",
"_t1",
"_t2",
"_t3",
"_t4",
"_t5",
"_t6",
"_t7",
"_t8",
"_t9",
};
void initTc(String *defaultTokens, int len)
{
assert(len == EndOfKeys);
for (int i = 0; i < len; ++i)
tc[i] = getTokenCode(defaultTokens[i], strlen(defaultTokens[i]));
}
typedef enum {
Prefix_PlusPlus = 2,
Prefix_Minus = 2,
Prefix_Ex = 2,
Infix_Multi = 4,
Infix_Divi = 4,
Infix_Mod = 4,
Infix_Plus = 5,
Infix_Minus = 5,
Infix_ShiftRight = 6,
Infix_LesEq = 7,
Infix_GtrEq = 7,
Infix_Les = 7,
Infix_Gtr = 7,
Infix_Equal = 8,
Infix_NotEq = 8,
Infix_And = 9,
Infix_Assign = 15,
LowestPrecedence = 99,
NoPrecedence = 100
} Precedence;
Precedence precedenceTable[][2] = {
[PlusPlus] = {NoPrecedence, Prefix_PlusPlus},
[Equal] = {Infix_Equal, NoPrecedence},
[NotEq] = {Infix_NotEq, NoPrecedence},
[LesEq] = {Infix_LesEq, NoPrecedence},
[GtrEq] = {Infix_GtrEq, NoPrecedence},
[Les] = {Infix_Les, NoPrecedence},
[Gtr] = {Infix_Gtr, NoPrecedence},
[Plus] = {Infix_Plus, NoPrecedence},
[Minus] = {Infix_Minus, Prefix_Minus},
[Multi] = {Infix_Multi, NoPrecedence},
[Divi] = {Infix_Divi, NoPrecedence},
[Mod] = {Infix_Mod, NoPrecedence},
[And] = {Infix_And, NoPrecedence},
[ShiftRight] = {Infix_ShiftRight, NoPrecedence},
[Assign] = {Infix_Assign, NoPrecedence},
[Ex] = {NoPrecedence, Prefix_Ex},
};
enum { Infix, Prefix, EndOfStyles };
inline static Precedence getPrecedence(int style, int operator)
{
assert(0 <= style && style < EndOfStyles);
return 0 <= operator && operator < Lparen
? precedenceTable[operator][style]
: NoPrecedence;
}
#define MAX_PHRASE_LEN 31
#define N_WILDCARDS 10
int phraseTc[(MAX_PHRASE_LEN + 1) * 100];
int wpc[N_WILDCARDS * 2];
int nextPc;
inline static int End_(int num)
{
return N_WILDCARDS + num;
}
int match(int id, String phrase, int pc)
{
int head = id * (MAX_PHRASE_LEN + 1), phraseLen;
if (phraseTc[head + MAX_PHRASE_LEN] == 0) {
phraseLen = lexer(phrase, &phraseTc[head]);
assert(phraseLen <= MAX_PHRASE_LEN);
phraseTc[head + MAX_PHRASE_LEN] = phraseLen;
}
phraseLen = phraseTc[head + MAX_PHRASE_LEN];
for (int pos = 0; pos < phraseLen; ++pos) {
int phraTc = phraseTc[head + pos];
if (phraTc == Wildcard || phraTc == Expr || phraTc == Expr0) {
++pos;
int num = phraseTc[head + pos] - Zero;
wpc[num] = pc;
if (phraTc == Wildcard) {
++pc;
continue;
}
int depth = 0;
for (;;) {
if (tc[pc] == Semicolon)
break;
if (tc[pc] == Comma && depth == 0)
break;
if (tc[pc] == Lparen || tc[pc] == Lbracket)
++depth;
if (tc[pc] == Rparen || tc[pc] == Rbracket)
--depth;
if (depth < 0)
break;
++pc;
}
wpc[End_(num)] = pc;
if (phraTc == Expr && wpc[num] == pc)
return 0;
if (depth > 0)
return 0;
continue;
}
if (phraTc != tc[pc])
return 0;
++pc;
}
nextPc = pc;
return 1;
}
typedef intptr_t *IntPtr;
IntPtr internalCodes[10000];
IntPtr *icp;
typedef enum {
OpEnd,
OpCpy,
OpCeq,
OpCne,
OpClt,
OpCge,
OpCle,
OpCgt,
OpAdd,
OpSub,
OpMul,
OpDiv,
OpMod,
OpBand,
OpShr,
OpAdd1,
OpNot,
OpNeg,
OpGoto,
OpJeq,
OpJne,
OpJlt,
OpJge,
OpJle,
OpJgt,
OpLop,
OpPrint,
OpTime,
OpPrints,
OpAryNew,
OpAryInit,
OpAryGet,
OpArySet,
OpPrm,
} Opcode;
void putIc(Opcode op, IntPtr p1, IntPtr p2, IntPtr p3, IntPtr p4)
{
icp[0] = (IntPtr) op;
icp[1] = p1;
icp[2] = p2;
icp[3] = p3;
icp[4] = p4;
icp += 5;
}
#define N_TMPS 10
char tmpFlags[N_TMPS];
int tmpAlloc()
{
for (int i = 0; i < N_TMPS; ++i) {
if (!tmpFlags[i]) {
tmpFlags[i] = 1;
return Tmp0 + i;
}
}
printf("Register allocation failed\n");
return -1;
}
void tmpFree(int i)
{
if (Tmp0 <= i && i <= Tmp9)
tmpFlags[i - Tmp0] = 0;
}
int epc, epcEnd;
int evalExpression(Precedence precedence);
int expression(int num);
inline static Opcode getOpcode(int i)
{
assert(Equal <= i && i < Assign);
return OpCeq + i - Equal;
}
int evalInfixExpression(int lhs, Precedence precedence, int op)
{
++epc;
int rhs = evalExpression(precedence);
int res = tmpAlloc();
putIc(getOpcode(op), &vars[res], &vars[lhs], &vars[rhs], 0);
tmpFree(lhs);
tmpFree(rhs);
if (lhs < 0 || rhs < 0)
return -1;
return res;
}
int evalExpression(Precedence precedence)
{
int res = -1, e0 = 0, e1 = 0, e2 = 0;
nextPc = 0;
if (match(99, "( !!**0 )", epc)) {
res = expression(0);
}
else if (match(72, "!!*0!!*1[!!**2]", epc) && tc[wpc[0]] == PlusPlus) {
e2 = expression(2);
res = tmpAlloc();
putIc(OpAryGet, &vars[tc[wpc[1]]], &vars[e2], &vars[res], 0);
putIc(OpAdd1, &vars[res], 0, 0, 0);
putIc(OpArySet, &vars[tc[wpc[1]]], &vars[e2], &vars[res], 0);
}
else if (tc[epc] == PlusPlus) {
++epc;
res = evalExpression(getPrecedence(Prefix, PlusPlus));
putIc(OpAdd1, &vars[res], 0, 0, 0);
}
else if (tc[epc] == Minus) {
++epc;
e0 = evalExpression(getPrecedence(Prefix, Minus));
res = tmpAlloc();
putIc(OpNeg, &vars[res], &vars[e0], 0, 0);
}
else if (tc[epc] == Ex) {
++epc;
e0 = evalExpression(getPrecedence(Prefix, Ex));
res = tmpAlloc();
putIc(OpNot, &vars[res], &vars[e0], 0, 0);
}
else {
res = tc[epc];
++epc;
}
if (nextPc > 0)
epc = nextPc;
for (;;) {
tmpFree(e0);
tmpFree(e1);
tmpFree(e2);
if (res < 0 || e0 < 0 || e1 < 0 || e2 < 0)
return -1;
if (epc >= epcEnd)
break;
Precedence encountered;
e0 = 0, e1 = 0, e2 = 0;
if (tc[epc] == PlusPlus) {
++epc;
e0 = res;
res = tmpAlloc();
putIc(OpCpy, &vars[res], &vars[e0], 0, 0);
putIc(OpAdd1, &vars[e0], 0, 0, 0);
}
else if (match(73, "[!!**0]!!*1", epc) && tc[wpc[1]] == PlusPlus) {
e1 = res;
res = tmpAlloc();
e0 = expression(0);
epc = nextPc;
putIc(OpAryGet, &vars[e1], &vars[e0], &vars[res], 0);
e2 = tmpAlloc();
putIc(OpAdd, &vars[e2], &vars[res], &vars[One], 0);
putIc(OpArySet, &vars[e1], &vars[e0], &vars[e2], 0);
}
else if (match(70, "[!!**0]=", epc)) {
e1 = res;
e0 = expression(0);
epc = nextPc;
res = evalExpression(getPrecedence(Infix, Assign));
putIc(OpArySet, &vars[e1], &vars[e0], &vars[res], 0);
}
else if (match(71, "[!!**0]", epc)) {
e1 = res;
res = tmpAlloc();
e0 = expression(0);
putIc(OpAryGet, &vars[e1], &vars[e0], &vars[res], 0);
epc = nextPc;
}
else if (precedence >= (encountered = getPrecedence(Infix, tc[epc]))) {
switch (tc[epc]) {
case Multi: case Divi: case Mod:
case Plus: case Minus:
case ShiftRight:
case Les: case LesEq: case Gtr: case GtrEq:
case Equal: case NotEq:
case And:
res = evalInfixExpression(res, encountered - 1, tc[epc]);
break;
case Assign:
++epc;
e0 = evalExpression(encountered);
putIc(OpCpy, &vars[res], &vars[e0], 0, 0);
break;
}
}
else
break;
}
return res;
}
int expression(int num)
{
if (wpc[num] == wpc[End_(num)])
return 0;
int i, end = N_WILDCARDS * 2, buf[end + 1];
for (i = 0; i < end; ++i)
buf[i] = wpc[i];
buf[i] = nextPc;
int oldEpc = epc, oldEpcEnd = epcEnd;
epc = wpc[num]; epcEnd = wpc[End_(num)];
int res = evalExpression(LowestPrecedence);
if (epc < epcEnd)
return -1;
for (i = 0; i < end; ++i)
wpc[i] = buf[i];
nextPc = buf[i];
epc = oldEpc; epcEnd = oldEpcEnd;
return res;
}
enum { ConditionIsTrue, ConditionIsFalse };
void ifgoto(int i, int not, int label)
{
int begin = wpc[i];
if (begin + 3 == wpc[End_(i)] && Equal <= tc[begin + 1] && tc[begin + 1] <= Gtr) {
Opcode op = OpJeq + ((tc[begin + 1] - Equal) ^ not);
putIc(op, &vars[label], &vars[tc[begin]], &vars[tc[begin + 2]], 0);
}
else {
i = expression(i);
putIc(OpJne - not, &vars[label], &vars[i], &vars[Zero], 0);
tmpFree(i);
}
}
int tmpLabelNo;
int tmpLabelAlloc()
{
char str[10];
sprintf(str, "_l%d", tmpLabelNo);
++tmpLabelNo;
return getTokenCode(str, strlen(str));
}
#define BLOCK_INFO_SIZE 10
int blockInfo[BLOCK_INFO_SIZE * 100], blockDepth, loopDepth;
enum { BlockType, IfBlock, ForBlock, WhileBlock };
enum { IfLabel0 = 1, IfLabel1 };
enum { LoopBegin = 1, LoopContinue, LoopBreak, LoopDepth, LoopWpc1, LoopWpcEnd1, LoopWpc2, LoopWpcEnd2 };
inline static int *initBlockInfo()
{
blockDepth = loopDepth = 0;
return blockInfo;
}
inline static int *beginBlock()
{
blockDepth += BLOCK_INFO_SIZE;
return &blockInfo[blockDepth];
}
inline static int *endBlock()
{
blockDepth -= BLOCK_INFO_SIZE;
return &blockInfo[blockDepth];
}
inline static void startLoop(int **loopBlock)
{
blockInfo[blockDepth + LoopDepth] = loopDepth;
loopDepth = blockDepth;
*loopBlock = &blockInfo[loopDepth];
}
inline static void stopLoop(int **loopBlock)
{
loopDepth = blockInfo[blockDepth + LoopDepth];
*loopBlock = loopDepth == 0 ? NULL : &blockInfo[loopDepth];
}
inline static void saveExpr(int i)
{
int type = blockInfo[blockDepth + BlockType], head;
#if defined(NDEBUG)
head = LoopWpc1 - 2;
#else
if (type == ForBlock && (i == 1 || i == 2))
head = LoopWpc1 - 2;
else if (type == WhileBlock && i == 1)
head = LoopWpc1 - 2;
else {
printf("%s:%s:%d: ", __FILE__, __FUNCTION__, __LINE__);
printf("Not implemented error: BlockType=%d (i=%d) is not availavle\n", type, i);
exit(1);
}
#endif
blockInfo[blockDepth + head + 2 * i] = wpc[i];
blockInfo[blockDepth + head + 2 * i + 1] = wpc[End_(i)];
}
inline static void restoreExpr(int i)
{
int type = blockInfo[blockDepth + BlockType], head;
#if defined(NDEBUG)
head = LoopWpc1 - 2;
#else
if (type == ForBlock && (i == 1 || i == 2))
head = LoopWpc1 - 2;
else if (type == WhileBlock && i == 1)
head = LoopWpc1 - 2;
else {
printf("%s:%s:%d: ", __FILE__, __FUNCTION__, __LINE__);
printf("Not implemented error: BlockType=%d (i=%d) is not availavle\n", type, i);
exit(1);
}
#endif
wpc[i] = blockInfo[blockDepth + head + 2 * i];
wpc[End_(i)] = blockInfo[blockDepth + head + 2 * i + 1];
}
int exprsPutIc(int times, Opcode op, int tmpReg, int *err)
{
assert(tmpReg >= 0);
int hasTmpAlloced = tmpReg != 0, e[8] = {0};
if (hasTmpAlloced)
e[0] = tmpReg;
for (int i = hasTmpAlloced; i < times; ++i)
e[i] = expression(i);
putIc(op, &vars[e[0]], &vars[e[1]], &vars[e[2]], &vars[e[3]]);
if (times > 4)
putIc(OpPrm, &vars[e[4]], &vars[e[5]], &vars[e[6]], &vars[e[7]]);
for (int i = hasTmpAlloced; i < times; ++i) {
if (e[i] < 0)
*err = -1;
tmpFree(e[i]);
}
return tmpReg;
}
int compile(String src)
{
int nTokens = lexer(src, tc);
tc[nTokens++] = Semicolon;
tc[nTokens] = tc[nTokens + 1] = tc[nTokens + 2] = tc[nTokens + 3] = Period;
icp = internalCodes;
for (int i = 0; i < N_TMPS; ++i)
tmpFlags[i] = 0;
tmpLabelNo = 0;
int *curBlock = initBlockInfo(), *loopBlock = NULL;
int pc;
for (pc = 0; pc < nTokens;) {
int e0 = 0, e2 = 0;
if (match(0, "!!*0 = !!*1;", pc)) {
putIc(OpCpy, &vars[tc[wpc[0]]], &vars[tc[wpc[1]]], 0, 0);
}
else if (match(10, "!!*0 = !!*1 + 1; if (!!*2 < !!*3) goto !!*4;", pc) && tc[wpc[0]] == tc[wpc[1]] && tc[wpc[0]] == tc[wpc[2]]) {
putIc(OpLop, &vars[tc[wpc[4]]], &vars[tc[wpc[0]]], &vars[tc[wpc[3]]], 0);
}
else if (match(9, "!!*0 = !!*1 + 1;", pc) && tc[wpc[0]] == tc[wpc[1]]) {
putIc(OpAdd1, &vars[tc[wpc[0]]], 0, 0, 0);
}
else if (match(1, "!!*0 = !!*1 !!*2 !!*3;", pc) && Equal <= tc[wpc[2]] && tc[wpc[2]] < Assign) {
putIc(OpCeq + tc[wpc[2]] - Equal, &vars[tc[wpc[0]]], &vars[tc[wpc[1]]], &vars[tc[wpc[3]]], 0);
}
else if (match(3, "print !!**0;", pc)) {
exprsPutIc(1, OpPrint, 0, &e0);
}
else if (match(4, "!!*0:", pc)) {
vars[tc[wpc[0]]] = icp - internalCodes;
}
else if (match(5, "goto !!*0;", pc)) {
putIc(OpGoto, &vars[tc[wpc[0]]], &vars[tc[wpc[0]]], 0, 0);
}
else if (match(6, "if (!!**0) goto !!*1;", pc)) {
ifgoto(0, ConditionIsTrue, tc[wpc[1]]);
}
else if (match(7, "time;", pc)) {
putIc(OpTime, 0, 0, 0, 0);
}
else if (match(11, "if (!!**0) {", pc)) {
curBlock = beginBlock();
curBlock[ BlockType ] = IfBlock;
curBlock[ IfLabel0 ] = tmpLabelAlloc();
curBlock[ IfLabel1 ] = 0;
ifgoto(0, ConditionIsFalse, curBlock[IfLabel0]);
}
else if (match(13, "} else {", pc) && curBlock[BlockType] == IfBlock) {
curBlock[IfLabel1] = tmpLabelAlloc();
putIc(OpGoto, &vars[curBlock[IfLabel1]], &vars[curBlock[IfLabel1]], 0, 0);
vars[curBlock[IfLabel0]] = icp - internalCodes;
}
else if (match(12, "}", pc) && curBlock[BlockType] == IfBlock) {
int ifLabel = curBlock[IfLabel1] ? IfLabel1 : IfLabel0;
vars[curBlock[ifLabel]] = icp - internalCodes;
curBlock = endBlock();
}
else if (match(14, "for (!!***0; !!***1; !!***2) {", pc)) {
curBlock = beginBlock();
curBlock[ BlockType ] = ForBlock;
curBlock[ LoopBegin ] = tmpLabelAlloc();
curBlock[ LoopContinue ] = tmpLabelAlloc();
curBlock[ LoopBreak ] = tmpLabelAlloc();
startLoop(&loopBlock);
e0 = expression(0);
saveExpr(1);
if (wpc[1] < wpc[End_(1)])
ifgoto(1, ConditionIsFalse, curBlock[LoopBreak]);
saveExpr(2);
vars[curBlock[LoopBegin]] = icp - internalCodes;
}
else if (match(12, "}", pc) && curBlock[BlockType] == ForBlock) {
vars[curBlock[LoopContinue]] = icp - internalCodes;
restoreExpr(1);
restoreExpr(2);
int begin1 = wpc[1], end1 = wpc[End_(1)], begin2 = wpc[2], end2 = wpc[End_(2)];
int shouldOptimize =
begin1 + 3 == end1 && tc[begin1 + 1] == Les && begin2 + 2 == end2 &&
(tc[begin1] == tc[begin2] && tc[begin2 + 1] == PlusPlus ||
tc[begin1] == tc[begin2 + 1] && tc[begin2] == PlusPlus);
if (shouldOptimize)
putIc(OpLop, &vars[curBlock[LoopBegin]], &vars[tc[begin1]], &vars[tc[begin1 + 2]], 0);
else {
e2 = expression(2);
if (begin1 < end1)
ifgoto(1, ConditionIsTrue, curBlock[LoopBegin]);
else
putIc(OpGoto, &vars[curBlock[LoopBegin]], &vars[curBlock[LoopBegin]], 0, 0);
}
vars[curBlock[LoopBreak]] = icp - internalCodes;
stopLoop(&loopBlock);
curBlock = endBlock();
}
else if (match(22, "while (!!**1) {", pc)) {
curBlock = beginBlock();
curBlock[ BlockType ] = WhileBlock;
curBlock[ LoopBegin ] = tmpLabelAlloc();
curBlock[ LoopContinue ] = tmpLabelAlloc();
curBlock[ LoopBreak ] = tmpLabelAlloc();
startLoop(&loopBlock);
saveExpr(1);
ifgoto(1, ConditionIsFalse, curBlock[LoopBreak]);
vars[curBlock[LoopBegin]] = icp - internalCodes;
}
else if (match(12, "}", pc) && curBlock[BlockType] == WhileBlock) {
vars[curBlock[LoopContinue]] = icp - internalCodes;
restoreExpr(1);
ifgoto(1, ConditionIsTrue, curBlock[LoopBegin]);
vars[curBlock[LoopBreak]] = icp - internalCodes;
stopLoop(&loopBlock);
curBlock = endBlock();
}
else if (match(15, "continue;", pc) && loopBlock) {
putIc(OpGoto, &vars[loopBlock[LoopContinue]], &vars[loopBlock[LoopContinue]], 0, 0);
}
else if (match(16, "break;", pc) && loopBlock) {
putIc(OpGoto, &vars[loopBlock[LoopBreak]], &vars[loopBlock[LoopBreak]], 0, 0);
}
else if (match(17, "if (!!**0) continue;", pc) && loopBlock) {
ifgoto(0, ConditionIsTrue, loopBlock[LoopContinue]);
}
else if (match(18, "if (!!**0) break;", pc) && loopBlock) {
ifgoto(0, ConditionIsTrue, loopBlock[LoopBreak]);
}
else if (match(19, "prints !!**0;", pc)) {
exprsPutIc(1, OpPrints, 0, &e0);
}
else if (match(20, "int !!*0[!!**2];", pc)) {
e2 = expression(2);
putIc(OpAryNew, &vars[tc[wpc[0]]], &vars[e2], 0, 0);
}
else if (match(21, "int !!*0[!!**2] = {", pc)) {
e2 = expression(2);
putIc(OpAryNew, &vars[tc[wpc[0]]], &vars[e2], 0, 0);
int pc, nElems = 0;
for (pc = nextPc; tc[pc] != Rbrace; ++pc) {
if (pc >= nTokens)
goto err;
if (tc[pc] != Comma)
++nElems;
}
intptr_t *ary = malloc(nElems * sizeof(intptr_t));
if (ary == NULL) {
printf("Failed to allocate memory\n");
exit(1);
}
nElems = 0;
for (pc = nextPc; tc[pc] != Rbrace; ++pc) {
if (tc[pc] == Comma)
continue;
ary[nElems] = vars[tc[pc]];
++nElems;
}
putIc(OpAryInit, &vars[tc[wpc[0]]], (IntPtr) ary, (IntPtr) nElems, 0);
nextPc = pc + 2;
}
else if (match(8, "!!***0;", pc)) {
e0 = expression(0);
}
else {
goto err;
}
tmpFree(e0);
tmpFree(e2);
if (e0 < 0 || e2 < 0)
goto err;
pc = nextPc;
}
if (blockDepth > 0) {
printf("Block nesting error: blockDepth=%d loopDepth=%d", blockDepth, loopDepth);
return -1;
}
putIc(OpEnd, 0, 0, 0, 0);
IntPtr *end = icp, *tmpDest;
Opcode op;
for (icp = internalCodes; icp < end; icp += 5) {
op = (Opcode) icp[0];
if (OpGoto <= op && op <= OpLop) {
tmpDest = internalCodes + *icp[1];
while ((Opcode) tmpDest[0] == OpGoto)
tmpDest = internalCodes + *tmpDest[2];
icp[1] = (IntPtr) tmpDest;
}
}
return end - internalCodes;
err:
printf("Syntax error: %s %s %s %s\n", tokenStrs[tc[pc]], tokenStrs[tc[pc + 1]], tokenStrs[tc[pc + 2]], tokenStrs[tc[pc + 3]]);
return -1;
}
void exec()
{
clock_t begin = clock();
icp = internalCodes;
intptr_t i, *obj;
for (;;) {
switch ((Opcode) icp[0]) {
case OpEnd:
return;
case OpNeg: *icp[1] = -*icp[2]; icp += 5; continue;
case OpNot: *icp[1] = !*icp[2]; icp += 5; continue;
case OpAdd1: ++(*icp[1]); icp += 5; continue;
case OpMul: *icp[1] = *icp[2] * *icp[3]; icp += 5; continue;
case OpDiv: *icp[1] = *icp[2] / *icp[3]; icp += 5; continue;
case OpMod: *icp[1] = *icp[2] % *icp[3]; icp += 5; continue;
case OpAdd: *icp[1] = *icp[2] + *icp[3]; icp += 5; continue;
case OpSub: *icp[1] = *icp[2] - *icp[3]; icp += 5; continue;
case OpShr: *icp[1] = *icp[2] >> *icp[3]; icp += 5; continue;
case OpClt: *icp[1] = *icp[2] < *icp[3]; icp += 5; continue;
case OpCle: *icp[1] = *icp[2] <= *icp[3]; icp += 5; continue;
case OpCgt: *icp[1] = *icp[2] > *icp[3]; icp += 5; continue;
case OpCge: *icp[1] = *icp[2] >= *icp[3]; icp += 5; continue;
case OpCeq: *icp[1] = *icp[2] == *icp[3]; icp += 5; continue;
case OpCne: *icp[1] = *icp[2] != *icp[3]; icp += 5; continue;
case OpBand: *icp[1] = *icp[2] & *icp[3]; icp += 5; continue;
case OpCpy: *icp[1] = *icp[2]; icp += 5; continue;
case OpPrint:
printf("%d\n", *icp[1]);
icp += 5;
continue;
case OpGoto: icp = (IntPtr *) icp[1]; continue;
case OpJeq: if (*icp[2] == *icp[3]) { icp = (IntPtr *) icp[1]; continue; } icp += 5; continue;
case OpJne: if (*icp[2] != *icp[3]) { icp = (IntPtr *) icp[1]; continue; } icp += 5; continue;
case OpJle: if (*icp[2] <= *icp[3]) { icp = (IntPtr *) icp[1]; continue; } icp += 5; continue;
case OpJge: if (*icp[2] >= *icp[3]) { icp = (IntPtr *) icp[1]; continue; } icp += 5; continue;
case OpJlt: if (*icp[2] < *icp[3]) { icp = (IntPtr *) icp[1]; continue; } icp += 5; continue;
case OpJgt: if (*icp[2] > *icp[3]) { icp = (IntPtr *) icp[1]; continue; } icp += 5; continue;
case OpTime:
printf("time: %.3f[sec]\n", (clock() - begin) / (double) CLOCKS_PER_SEC);
icp += 5;
continue;
case OpLop:
i = *icp[2];
++i;
*icp[2] = i;
if (i < *icp[3]) {
icp = (IntPtr *) icp[1];
continue;
}
icp += 5;
continue;
case OpPrints:
printf("%s\n", (char *) *icp[1]);
icp += 5;
continue;
case OpAryNew:
*icp[1] = (intptr_t) malloc(*icp[2] * sizeof(intptr_t));
if (*icp[1] == (intptr_t) NULL) {
printf("Failed to allocate memory\n");
exit(1);
}
memset((char *) *icp[1], 0, *icp[2] * sizeof(intptr_t));
icp += 5;
continue;
case OpAryInit:
memcpy((char *) *icp[1], (char *) icp[2], ((int) icp[3]) * sizeof(intptr_t));
icp += 5;
continue;
case OpArySet:
obj = (intptr_t *) *icp[1];
i = *icp[2];
obj[i] = *icp[3];
icp += 5;
continue;
case OpAryGet:
obj = (intptr_t *) *icp[1];
i = *icp[2];
*icp[3] = obj[i];
icp += 5;
continue;
case OpPrm:
printf("%s:%s:%d: ", __FILE__, __FUNCTION__, __LINE__);
printf("Should not reach here\n");
exit(1);
}
}
}
int run(String src)
{
if (compile(src) < 0)
return 1;
exec();
return 0;
}
String removeTrailingSemicolon(String str, size_t len)
{
String rv = NULL;
for (int i = len - 1; i >= 0; --i) {
if (str[i] != ';')
break;
str[i] = 0;
rv = &str[i];
}
return rv;
}
#if defined(__APPLE__) || defined(__linux__)
struct termios initial_term;
void loadHistory();
void initTerm()
{
tcgetattr(0, &initial_term);
loadHistory();
}
void saveHistory();
void destroyTerm()
{
saveHistory();
}
inline static void setNonCanonicalMode()
{
struct termios new_term;
new_term = initial_term;
new_term.c_oflag |= ONOCR;
new_term.c_lflag &= ~(ICANON | ECHO);
new_term.c_cc[VMIN] = 1;
new_term.c_cc[VTIME] = 0;
tcsetattr(0, TCSANOW, &new_term);
}
inline static void setCanonicalMode()
{
tcsetattr(0, TCSANOW, &initial_term);
}
int cursorX;
inline static void eraseLine()
{
printf("\e[2K\r");
cursorX = 0;
}
inline static void eraseAll()
{
printf("\e[2J\e[H");
cursorX = 0;
}
#else
void initTerm() {}
void destroyTerm() {}
#endif
#define LINE_SIZE 100
#if defined(__APPLE__) || defined(__linux__)
#define HISTORY_SIZE 100
typedef struct { char str[LINE_SIZE]; int len; } Command;
typedef struct {
Command buf[HISTORY_SIZE];
int head, tail, count, pos;
} CmdHistory;
CmdHistory cmdHist;
void setHistory(char *cmd, int len)
{
if (len == 0)
return;
strncpy((char *) &cmdHist.buf[cmdHist.tail].str, cmd, len);
cmdHist.buf[cmdHist.tail].str[len] = 0;
cmdHist.buf[cmdHist.tail].len = len;
if (cmdHist.count < HISTORY_SIZE)
++cmdHist.count;
else
cmdHist.head = (cmdHist.head + 1) % HISTORY_SIZE;
cmdHist.tail = (cmdHist.tail + 1) % HISTORY_SIZE;
}
enum { Prev, Next };
void showHistory(int dir, char *buf)
{
if (cmdHist.count == 0)
return;
if (dir == Prev && cmdHist.pos < cmdHist.count)
++cmdHist.pos;
else if (dir == Prev && cmdHist.pos >= cmdHist.count)
cmdHist.pos = cmdHist.count;
else if (dir == Next && cmdHist.pos > 1)
--cmdHist.pos;
else if (dir == Next && cmdHist.pos <= 1) {
cmdHist.pos = 0;
return;
}
int i = (cmdHist.head + cmdHist.count - cmdHist.pos) % cmdHist.count;
Command *cmd = &cmdHist.buf[i];
printf("%s", cmd->str);
strncpy(buf, cmd->str, cmd->len + 1);
cursorX = cmd->len;
}
void saveHistory()
{
FILE *fp;
if (cmdHist.count == 0 || (fp = fopen(".haribote_history", "wt")) == NULL)
return;
int counter = cmdHist.count;
while (counter > 0) {
int i = (cmdHist.head + cmdHist.count - counter) % cmdHist.count;
fprintf(fp, "%s\n", cmdHist.buf[i].str);
--counter;
}
fclose(fp);
}
void loadHistory()
{
FILE *fp;
if ((fp = fopen(".haribote_history", "rt")) == NULL)
return;
for (int i = 0; i < HISTORY_SIZE; ++i) {
char line[LINE_SIZE];
if (fgets(line, LINE_SIZE, fp) == NULL)
break;
setHistory(line, strlen(line) - 1);
}
fclose(fp);
}
char *readLine(char *str, int size, FILE *stream)
{
assert(size > 0);
setNonCanonicalMode();
int i = cursorX ? cursorX : 0, end = size - 1, ch, nDeleted = 0, nInserted = 0;
char insertBuf[LINE_SIZE + 1] = "";
while ((i < end) && ((ch = fgetc(stream)) != EOF)) {
if (ch == 4)
break;
if (nInserted > 0 && (ch < 32 || ch == 127)) {
memmove(&str[cursorX], &str[cursorX - nInserted], insertBuf[LINE_SIZE]);
strncpy(&str[cursorX - nInserted], insertBuf, nInserted);
insertBuf[0] = nInserted = 0;
}
if (ch == 8 || ch == 127) {
if (cursorX == 0)
continue;
write(0, "\e[D\e[K", 6);
if (cursorX < i) {
str[i] = 0;
printf("\e7%s\e8", &str[cursorX + nDeleted]);
}
--cursorX;
++nDeleted;
continue;
}
else if (nDeleted > 0) {
memmove(&str[cursorX], &str[cursorX + nDeleted], i - cursorX - nDeleted);
i -= nDeleted;
str[i] = nDeleted = 0;
}
if (ch >= 32) {
putchar(ch);
if (cursorX < i) {
if (nInserted == 0) {
insertBuf[LINE_SIZE] = i - cursorX;
str[i] = 0;
}
printf("\e7%s\e8", &str[cursorX - nInserted]);
insertBuf[nInserted] = ch;
++nInserted;
}
else
str[cursorX] = ch;
++cursorX;
++i;
continue;
}
if (ch == '\n') {
putchar(ch);
str[i] = ch; str[i + 1] = 0;
setHistory(str, i);
cmdHist.pos = 0;
cursorX = 0;
setCanonicalMode();
return str;
}
else if (ch == 27) {
if ((ch = fgetc(stream)) == EOF)
break;
else if (ch != 91) {
ungetc(ch, stream);
continue;
}
if ((ch = fgetc(stream)) == EOF)
break;
switch (ch) {
case 67: if (cursorX < i) { write(0, "\e[C", 3); ++cursorX; } continue;
case 68: if (cursorX > 0) { write(0, "\e[D", 3); --cursorX; } continue;
case 65: strncpy(str, "prevhist", 9); break;
case 66: strncpy(str, "nexthist", 9); break;
}
setCanonicalMode();
return str;
}
else if (ch == 6) {
if (cursorX < i) {
write(0, "\e[C", 3);
++cursorX;
}
}
else if (ch == 2) {
if (cursorX > 0) {
write(0, "\e[D", 3);
--cursorX;
}
}
else if (ch == 1) {
if (cursorX <= 0)
continue;
printf("\e[%dD", cursorX);
cursorX = 0;
}
else if (ch == 5) {
if (cursorX >= i)
continue;
printf("\e[%dC", i - cursorX);
cursorX = i;
}
else if (ch == 21) {
if (cursorX <= 0)
continue;
str[i] = 0;
printf("\e[%dD\e[K\e7%s\e8", cursorX, &str[cursorX]);
memmove(str, &str[cursorX], i - cursorX);
i -= cursorX;
cursorX = 0;
}
else if (ch == 11) {
write(0, "\e[K", 3);
str[cursorX] = 0;
i -= i - cursorX;
}
else if (ch == 12) {
strncpy(str, "clear", 6);
setCanonicalMode();
return str;
}
}
if (nInserted > 0) {
memmove(&str[cursorX], &str[cursorX - nInserted], insertBuf[LINE_SIZE]);
strncpy(&str[cursorX - nInserted], insertBuf, nInserted);
}
str[i] = 0;
cursorX = 0;
setCanonicalMode();
return NULL;
}
#else
#define readLine fgets
#endif
int main(int argc, const char **argv)
{
unsigned char text[10000];
initTc(defaultTokens, sizeof defaultTokens / sizeof defaultTokens[0]);
if (argc >= 2) {
if (loadText((String) argv[1], text, 10000) != 0)
exit(1);
run(text);
exit(0);
}
int status = 0;
initTerm();
for (int next = 1, nLines = 0;;) {
if (next)
printf("[%d]> ", ++nLines);
if (readLine(text, LINE_SIZE, stdin) == NULL) {
printf("\n");
goto exit;
}
int inputLen = strlen(text);
if (text[inputLen - 1] == '\n')
text[--inputLen] = 0;
next = 1;
String semicolonPos = removeTrailingSemicolon(text, inputLen);
if (strcmp(text, "exit") == 0)
goto exit;
#if defined(__APPLE__) || defined(__linux__)
else if (strcmp(text, "prevhist") == 0) {
eraseLine();
printf("[%d]> ", nLines);
showHistory(Prev, text);
next = 0;
continue;
}
else if (strcmp(text, "nexthist") == 0) {
eraseLine();
printf("[%d]> ", nLines);
showHistory(Next, text);
next = 0;
continue;
}
#endif
else if (strncmp(text, "run ", 4) == 0) {
if (loadText(&text[4], text, 10000) != 0)
continue;
run(text);
}
#if defined(__APPLE__) || defined(__linux__)
else if (strcmp(text, "clear") == 0) {
eraseAll();
printf("[%d]> ", nLines);
next = 0;
continue;
}
#endif
else {
if (semicolonPos)
*semicolonPos = ';';
run(text);
}
}
exit:
destroyTerm();
exit(status);
}
他のバージョンのソースコードを読む
おわりに
プログラミング言語自作入門に取り組む、誰かの役に立つといいなぁ、と思いながらhariboteをリライトしました。
もしほんとうにそうなれば、とてもうれしいです。
参考
「10日くらいでできる!プログラミング言語自作入門」
インタプリタをC言語で実装しながらプログラミング言語自作に入門するテキストです。無料で読めます。続編ではインタプリタをJITコンパイラに改造します。
「10日くらいでできる!プログラミング言語自作入門」の続編#1-1
プログラミング言語自作入門で実装したインタプリタを、x86用またはx64用のJITコンパイラに改造します。後半では、レジスタ変数の導入や無駄なメモリアクセスの削減、コンパイル時の定数計算などの最適化に取り組みます。
「10日くらいでできる!プログラミング言語自作入門」の「残された課題」に関する補足
作者の川合秀実さんが、a21_txt01_10の「残された課題」に取り組む順序について、再検討した内容を共有してくれました。残された課題は、記号表やスタックを自前で実装する必要があるため、はりぼて言語の公式処理系の構造をベースに言語開発をしたい人にとって良い課題となるでしょう。