#include #include #include #include #include #define CONTROL_STACK_DEPTH 1024 #define DATA_STACK_DEPTH 1024 #define RETURN_STACK_DEPTH 1024 #define MAX_NAME_LEN 64 #define DICT_SIZE 1024 #define COMPILED_BUFFER_SIZE (1024*1024) #define HEAP_SIZE (1024*1024) #define MAXLINE 65535 struct fh_thread_s; struct fh_word_s; struct fh_instruction_s; /* if the return address is this, we should drop back to interactive mode */ #define MAGICADDR_INTERACTIVE 0xFFFFFFFFULL #define ALIGNWORD(var) \ do { \ while (((var) % 4) != 0) { (var)++; } \ } while (0) enum fh_error { FH_OK = 0, FH_ERR_CS_OVERFLOW = -1, FH_ERR_DS_OVERFLOW = -2, FH_ERR_RS_OVERFLOW = -3, FH_ERR_CS_UNDERFLOW = -4, FH_ERR_DS_UNDERFLOW = -5, FH_ERR_RS_UNDERFLOW = -6, FH_ERR_HEAP_FULL = -7, FH_ERR_DICT_FULL = -8, FH_ERR_COMPILE_FULL = -9, FH_ERR_NAME_TOO_LONG = -10, FH_ERR_INVALID_STATE = -11, FH_ERR_INTERNAL = -12, }; typedef enum fh_error (*word_exec_t)(struct fh_thread_s *fh); struct fh_word_s { char name[MAX_NAME_LEN]; word_exec_t handler; bool builtin; uint32_t start; uint32_t end; }; enum fb_instruction_kind { /* Data is a word number in the dict */ FH_INSTR_WORD, /* Data is a numeric value to push on the data stack */ FH_INSTR_NUMBER, }; struct fh_instruction_s { enum fb_instruction_kind kind; uint32_t data; }; /** words that are not in the dict, have special effect */ enum compiler_word { CPLWORD_ENDWORD = DICT_SIZE + 1, CPLWORD_ALLOCSTR, CPLWORD_TYPESTR, }; _Static_assert(sizeof(struct fh_instruction_s) % 4 == 0, "Instruction struct is aligned"); enum fh_state { FH_STATE_INTERPRET = 0, FH_STATE_COMPILE, FH_STATE_SHUTDOWN, }; enum fh_substate { FH_SUBSTATE_NONE = 0, FH_SUBSTATE_COLONNAME, FH_SUBSTATE_SQUOTE, FH_SUBSTATE_DOTQUOTE, FH_SUBSTATE_PARENCOMMENT, FH_SUBSTATE_LINECOMMENT, }; struct fh_thread_s { /** Control stack */ uint32_t control_stack[CONTROL_STACK_DEPTH]; size_t control_stack_top; /** Data stack */ uint32_t data_stack[DATA_STACK_DEPTH]; size_t data_stack_top; /** Return stack */ uint32_t return_stack[RETURN_STACK_DEPTH]; size_t return_stack_top; /** Data heap */ uint8_t heap[HEAP_SIZE]; size_t heap_top; /** Compile buffer, used for both word data and literals */ uint8_t compile[COMPILED_BUFFER_SIZE]; size_t compile_top; /** Pointer into the compile buffer for execution */ uint32_t execptr; /** Word dict */ struct fh_word_s dict[DICT_SIZE]; uint32_t dict_top; /** Forth state */ enum fh_state state; /** Forth sub-state */ enum fh_substate substate; /** Word currently being executed - a pointer is placed here * before calling the handler */ struct fh_word_s *exec_word; char linebuf[MAXLINE]; size_t linebuf_len; size_t linebuf_readptr; }; #define TRY(x) \ do { \ if (FH_OK != (rv = (x))) return rv; \ } while (0) #define TRY_FAIL(x) \ do { \ if (FH_OK != (rv = (x))) goto fail; \ } while (0) /** Add a word to the dictionary. */ enum fh_error fh_add_word(const struct fh_word_s *w, struct fh_thread_s *fh) { if (fh->dict_top == DICT_SIZE) { return FH_ERR_DICT_FULL; } memcpy(&fh->dict[fh->dict_top++], w, sizeof(struct fh_word_s)); return FH_OK; } //region Push & Pop static inline enum fh_error ds_pop(struct fh_thread_s *fh, uint32_t *out) { if (fh->data_stack_top == 0) { return FH_ERR_DS_UNDERFLOW; } *out = fh->data_stack[--fh->data_stack_top]; return FH_OK; } static inline enum fh_error rs_pop(struct fh_thread_s *fh, uint32_t *out) { if (fh->return_stack_top == 0) { return FH_ERR_RS_UNDERFLOW; } *out = fh->return_stack[--fh->return_stack_top]; return FH_OK; } static inline enum fh_error cs_pop(struct fh_thread_s *fh, uint32_t *out) { if (fh->control_stack_top == 0) { return FH_ERR_CS_UNDERFLOW; } *out = fh->control_stack[--fh->control_stack_top]; return FH_OK; } static inline enum fh_error ds_push(struct fh_thread_s *fh, uint32_t in) { if (fh->data_stack_top == DATA_STACK_DEPTH) { return FH_ERR_DS_OVERFLOW; } fh->data_stack[fh->data_stack_top++] = in; return FH_OK; } static inline enum fh_error rs_push(struct fh_thread_s *fh, uint32_t in) { if (fh->return_stack_top == RETURN_STACK_DEPTH) { return FH_ERR_RS_OVERFLOW; } fh->return_stack[fh->return_stack_top++] = in; return FH_OK; } static inline enum fh_error cs_push(struct fh_thread_s *fh, uint32_t in) { if (fh->control_stack_top == CONTROL_STACK_DEPTH) { return FH_ERR_CS_OVERFLOW; } fh->control_stack[fh->control_stack_top++] = in; return FH_OK; } //endregion Push & Pop enum fh_error fh_allot( struct fh_thread_s *fh, size_t len, uint32_t *addr ) { uint32_t p = fh->heap_top; ALIGNWORD(p); if (p + len > HEAP_SIZE) { return FH_ERR_HEAP_FULL; } *addr = p; size_t next = p + len; ALIGNWORD(next); fh->heap_top = next; return FH_OK; } enum fh_error fh_compile_reserve( struct fh_thread_s *fh, size_t len, uint32_t *addr ) { uint32_t p = fh->compile_top; // align up ALIGNWORD(p); if (p + len > COMPILED_BUFFER_SIZE) { return FH_ERR_HEAP_FULL; } *addr = p; size_t next = p + len; ALIGNWORD(next); fh->compile_top = next; return FH_OK; } //region Builtin Words enum fh_error w_add(struct fh_thread_s *fh) { enum fh_error rv; uint32_t a = 0, b = 0; TRY(ds_pop(fh, &a)); TRY(ds_pop(fh, &b)); TRY(ds_push(fh, a + b)); return FH_OK; } enum fh_error w_sub(struct fh_thread_s *fh) { enum fh_error rv; uint32_t a = 0, b = 0; TRY(ds_pop(fh, &a)); TRY(ds_pop(fh, &b)); TRY(ds_push(fh, a - b)); return FH_OK; } enum fh_error w_mul(struct fh_thread_s *fh) { enum fh_error rv; uint32_t a = 0, b = 0; TRY(ds_pop(fh, &a)); TRY(ds_pop(fh, &b)); TRY(ds_push(fh, a * b)); return FH_OK; } enum fh_error w_user_word(struct fh_thread_s *fh) { enum fh_error rv; const struct fh_word_s *w; const struct fh_word_s *w2; uint32_t wn; call: w = fh->exec_word; if (!w) { return FH_ERR_INTERNAL; } TRY(rs_push(fh, fh->execptr)); fh->execptr = w->start; instr:; // make sure it's aligned ALIGNWORD(fh->execptr); const struct fh_instruction_s *instr = (const struct fh_instruction_s *) &fh->compile[fh->execptr]; fh->execptr += sizeof(struct fh_instruction_s); uint32_t strl; uint32_t addr = 0; switch (instr->kind) { case FH_INSTR_NUMBER: TRY(ds_push(fh, instr->data)); goto instr; case FH_INSTR_WORD: wn = instr->data; switch (wn) { case CPLWORD_ALLOCSTR: case CPLWORD_TYPESTR: strl = *((uint32_t *) &fh->compile[fh->execptr]); fh->execptr += 4; if (wn == CPLWORD_ALLOCSTR) { TRY(fh_allot(fh, strl, &addr)); memcpy(&fh->heap[addr], &fh->compile[fh->execptr], strl); TRY(ds_push(fh, addr)); TRY(ds_push(fh, strl)); fh->execptr += strl; } else { printf("%.*s", (int) strl, &fh->compile[fh->execptr]); } goto instr; case CPLWORD_ENDWORD: TRY(rs_pop(fh, &fh->execptr)); if (fh->execptr == MAGICADDR_INTERACTIVE) { goto end; } goto instr; default: w2 = &fh->dict[instr->data]; if (w2->builtin) { w2->handler(fh); goto instr; } else { fh->exec_word = &fh->dict[instr->data]; goto call; } } } end: return FH_OK; } enum fh_error w_colon(struct fh_thread_s *fh) { if (fh->state != FH_STATE_INTERPRET) { return FH_ERR_INVALID_STATE; } fh->state = FH_STATE_COMPILE; fh->substate = FH_SUBSTATE_COLONNAME; if (fh->dict_top >= DICT_SIZE) { return FH_ERR_DICT_FULL; } fh->dict[fh->dict_top].start = fh->compile_top; fh->dict[fh->dict_top].handler = w_user_word; return FH_OK; } enum fh_error w_semicolon(struct fh_thread_s *fh) { enum fh_error rv; uint32_t addr = 0; struct fh_instruction_s instr; if (fh->state != FH_STATE_COMPILE) { return FH_ERR_INVALID_STATE; } TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); instr.kind = FH_INSTR_WORD; instr.data = CPLWORD_ENDWORD; memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); /* Return to interpret state */ fh->state = FH_STATE_INTERPRET; fh->dict[fh->dict_top].end = fh->compile_top; /* one past the end cell */ fh->dict_top++; return FH_OK; } enum fh_error w_dot(struct fh_thread_s *fh) { enum fh_error rv; uint32_t a = 0; TRY(ds_pop(fh, &a)); printf("%d ", (int32_t) a); return FH_OK; } enum fh_error w_type(struct fh_thread_s *fh) { enum fh_error rv; uint32_t count = 0, addr = 0; TRY(ds_pop(fh, &count)); TRY(ds_pop(fh, &addr)); printf("%.*s", count, &fh->heap[addr]); return FH_OK; } enum fh_error w_cr(struct fh_thread_s *fh) { printf("\r\n"); return FH_OK; } enum fh_error w_space(struct fh_thread_s *fh) { printf(" "); return FH_OK; } enum fh_error w_s_quote(struct fh_thread_s *fh) { fh->substate = FH_SUBSTATE_SQUOTE; return FH_OK; } enum fh_error w_dot_quote(struct fh_thread_s *fh) { fh->substate = FH_SUBSTATE_DOTQUOTE; return FH_OK; } enum fh_error w_backslash(struct fh_thread_s *fh) { fh->substate = FH_SUBSTATE_LINECOMMENT; return FH_OK; } enum fh_error w_paren(struct fh_thread_s *fh) { fh->substate = FH_SUBSTATE_PARENCOMMENT; return FH_OK; } enum fh_error w_bye(struct fh_thread_s *fh) { fh->state = FH_STATE_SHUTDOWN; return FH_OK; } enum fh_error register_builtin_words(struct fh_thread_s *fh) { struct name_and_handler { const char *name; word_exec_t handler; }; const struct name_and_handler builtins[] = { {"s\"", w_s_quote}, {".\"", w_dot_quote}, /* Compiler control words */ {"bye", w_bye}, /* Basic arithmetics */ {"+", w_add}, {"-", w_sub}, {"*", w_mul}, /* Control words */ {":", w_colon}, {";", w_semicolon}, {".", w_dot}, {"type", w_type}, {"cr", w_cr}, {"space", w_space}, {"\\", w_backslash}, // line comment {"(", w_paren}, // enclosed comment { /* end marker */ } }; struct fh_word_s w; const struct name_and_handler *p = builtins; enum fh_error rv; while (p->handler) { strcpy(w.name, p->name); w.handler = p->handler; w.builtin = 1; rv = fh_add_word(&w, fh); if (rv != FH_OK) { return rv; } p++; } return FH_OK; } #undef ADDWORD //endregion Builtin Words enum fh_error fh_init_thread(struct fh_thread_s *fh) { enum fh_error rv; /* Make sure we have a clean state */ memset(fh, 0, sizeof(struct fh_thread_s)); TRY(register_builtin_words(fh)); fh->execptr = MAGICADDR_INTERACTIVE; return FH_OK; } enum fh_error fh_handle_quoted_string( struct fh_thread_s *fh, char *start, size_t len ) { enum fh_error rv; uint32_t addr = 0; uint32_t addr2 = 0; struct fh_instruction_s instr; if (fh->state == FH_STATE_INTERPRET) { switch (fh->substate) { case FH_SUBSTATE_SQUOTE: TRY(fh_allot(fh, len, &addr)); memcpy(&fh->heap[addr], start, len); TRY(ds_push(fh, addr)); TRY(ds_push(fh, len)); break; case FH_SUBSTATE_DOTQUOTE: printf("%.*s", (int) len, start); break; default: printf("!!! Bad substate\r\n"); } } else { /* compile */ TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); TRY(fh_compile_reserve(fh, len + 4, &addr2)); instr.kind = FH_INSTR_WORD; instr.data = fh->substate == FH_SUBSTATE_SQUOTE ? CPLWORD_ALLOCSTR : CPLWORD_TYPESTR; uint32_t len32 = len; /* string is encoded as a special compiler command, the size, * and then the string, all 4-byte aligned. */ memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); memcpy(&fh->compile[addr2], &len32, 4); memcpy(&fh->compile[addr2 + 4], &start, len); } return FH_OK; } enum fh_error fh_handle_word( struct fh_thread_s *fh, char *start, size_t len ) { if (len >= MAX_NAME_LEN) { return FH_ERR_NAME_TOO_LONG; } /* First, try if it's a known word */ struct fh_word_s *w = &fh->dict[0]; struct fh_instruction_s instr; uint32_t cnt = 0; uint32_t addr = 0; enum fh_error rv; while (w->handler) { if (0 == strncasecmp(start, w->name, len) && w->name[len]==0) { // word found! if (fh->state == FH_STATE_COMPILE) { TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); instr.kind = FH_INSTR_WORD; instr.data = cnt; memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); } else { /* interpret */ fh->exec_word = w; TRY(w->handler(fh)); } return FH_OK; } w++; cnt++; } /* word not found, try parsing as number */ long v = strtol(start, NULL, 0); if (fh->state == FH_STATE_COMPILE) { TRY(fh_compile_reserve(fh, sizeof(struct fh_instruction_s), &addr)); instr.kind = FH_INSTR_NUMBER; instr.data = (uint32_t) v; memcpy(&fh->compile[addr], &instr, sizeof(struct fh_instruction_s)); } else { /* interpret */ TRY(ds_push(fh, (uint32_t)v)); } return FH_OK; } static bool iswhite(char c) { return c == ' ' || c == '\n' || c == '\t' || c == '\r'; } enum fh_error fh_process_input(struct fh_thread_s *fh) { enum fh_error rv; char *rp = &fh->linebuf[fh->linebuf_readptr]; while (fh->linebuf_readptr < fh->linebuf_len && fh->state != FH_STATE_SHUTDOWN) { /* skip whitespace */ char c = *rp; if (iswhite(c)) { rp++; fh->linebuf_readptr++; continue; } char *end; size_t stringlen; switch (fh->substate) { case FH_SUBSTATE_NONE: /* try read a word */ end = strchr(rp, ' '); if (end) { stringlen = end - rp; } else { stringlen = fh->linebuf_len - fh->linebuf_readptr; } // rtrim while (stringlen > 0 && iswhite(rp[stringlen - 1])) { stringlen--; } TRY(fh_handle_word(fh, rp, stringlen)); rp = end + 1; fh->linebuf_readptr = rp - &fh->linebuf[0]; break; case FH_SUBSTATE_COLONNAME: /* find space */ end = strchr(rp, ' '); stringlen = end - rp; if (end) { stringlen = end - rp; } else { stringlen = fh->linebuf_len - fh->linebuf_readptr; } // rtrim while (stringlen > 0 && iswhite(rp[stringlen - 1])) { stringlen--; } strncpy(fh->dict[fh->dict_top].name, rp, stringlen); fh->substate = FH_SUBSTATE_NONE; rp = end + 1; fh->linebuf_readptr = rp - &fh->linebuf[0]; break; case FH_SUBSTATE_SQUOTE: case FH_SUBSTATE_DOTQUOTE: end = strchr(rp, '"'); if (end) { stringlen = end - rp - 1; TRY(fh_handle_quoted_string(fh, rp, stringlen)); fh->substate = FH_SUBSTATE_NONE; rp = end + 1; fh->linebuf_readptr = rp - &fh->linebuf[0]; } else { /* no end, discard all */ goto end; } break; case FH_SUBSTATE_PARENCOMMENT: end = strchr(rp, ')'); if (end) { fh->substate = FH_SUBSTATE_NONE; rp = end + 1; fh->linebuf_readptr = rp - &fh->linebuf[0]; } else { /* no end, discard all */ goto end; } break; case FH_SUBSTATE_LINECOMMENT: end = strchr(rp, '\n'); if (end) { fh->substate = FH_SUBSTATE_NONE; rp = end + 1; fh->linebuf_readptr = rp - &fh->linebuf[0]; } else { /* no newline, discard all */ goto end; } break; } } end: return FH_OK; } int main() { enum fh_error rv; struct fh_thread_s fh; TRY_FAIL(fh_init_thread(&fh)); while (fh.state != FH_STATE_SHUTDOWN && fgets(fh.linebuf, MAXLINE, stdin)) { fh.linebuf_len = strlen(fh.linebuf); fh.linebuf_readptr = 0; rv = fh_process_input(&fh); if (rv == FH_OK) { printf("ok\r\n"); } else { printf("ERROR %d\r\n", rv); } } printf("Bye.\r\n"); return 0; fail: printf("Error %d\r\n", rv); return 1; }