完整代码

这里是上一章和这章完整的代码。注意,这里的代码并不依赖任何外部库:你不需要LLVM或者其它外部链接库(当然,除了C和C++的标准库)。编译命令如下:

  1. # Compile
  2. clang++ -g -O3 toy.cpp
  3. # Run
  4. ./a.out

这里是完整代码:

  1. #include <cctype>
  2. #include <cstdio>
  3. #include <cstdlib>
  4. #include <map>
  5. #include <string>
  6. #include <vector>
  7.  
  8. //===----------------------------------------------------------------------===//
  9. // Lexer
  10. //===----------------------------------------------------------------------===//
  11.  
  12. // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
  13. // of these for known things.
  14. enum Token {
  15. tok_eof = -1,
  16.  
  17. // commands
  18. tok_def = -2, tok_extern = -3,
  19.  
  20. // primary
  21. tok_identifier = -4, tok_number = -5
  22. };
  23.  
  24. static std::string IdentifierStr; // Filled in if tok_identifier
  25. static double NumVal; // Filled in if tok_number
  26.  
  27. /// gettok - Return the next token from standard input.
  28. static int gettok() {
  29. static int LastChar = ' ';
  30.  
  31. // Skip any whitespace.
  32. while (isspace(LastChar))
  33. LastChar = getchar();
  34.  
  35. if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
  36. IdentifierStr = LastChar;
  37. while (isalnum((LastChar = getchar())))
  38. IdentifierStr += LastChar;
  39.  
  40. if (IdentifierStr == "def") return tok_def;
  41. if (IdentifierStr == "extern") return tok_extern;
  42. return tok_identifier;
  43. }
  44.  
  45. if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
  46. std::string NumStr;
  47. do {
  48. NumStr += LastChar;
  49. LastChar = getchar();
  50. } while (isdigit(LastChar) || LastChar == '.');
  51.  
  52. NumVal = strtod(NumStr.c_str(), 0);
  53. return tok_number;
  54. }
  55.  
  56. if (LastChar == '#') {
  57. // Comment until end of line.
  58. do LastChar = getchar();
  59. while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
  60.  
  61. if (LastChar != EOF)
  62. return gettok();
  63. }
  64.  
  65. // Check for end of file. Don't eat the EOF.
  66. if (LastChar == EOF)
  67. return tok_eof;
  68.  
  69. // Otherwise, just return the character as its ascii value.
  70. int ThisChar = LastChar;
  71. LastChar = getchar();
  72. return ThisChar;
  73. }
  74.  
  75. //===----------------------------------------------------------------------===//
  76. // Abstract Syntax Tree (aka Parse Tree)
  77. //===----------------------------------------------------------------------===//
  78. namespace {
  79. /// ExprAST - Base class for all expression nodes.
  80. class ExprAST {
  81. public:
  82. virtual ~ExprAST() {}
  83. };
  84.  
  85. /// NumberExprAST - Expression class for numeric literals like "1.0".
  86. class NumberExprAST : public ExprAST {
  87. public:
  88. NumberExprAST(double val) {}
  89. };
  90.  
  91. /// VariableExprAST - Expression class for referencing a variable, like "a".
  92. class VariableExprAST : public ExprAST {
  93. std::string Name;
  94. public:
  95. VariableExprAST(const std::string &name) : Name(name) {}
  96. };
  97.  
  98. /// BinaryExprAST - Expression class for a binary operator.
  99. class BinaryExprAST : public ExprAST {
  100. public:
  101. BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {}
  102. };
  103.  
  104. /// CallExprAST - Expression class for function calls.
  105. class CallExprAST : public ExprAST {
  106. std::string Callee;
  107. std::vector<ExprAST*> Args;
  108. public:
  109. CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
  110. : Callee(callee), Args(args) {}
  111. };
  112.  
  113. /// PrototypeAST - This class represents the "prototype" for a function,
  114. /// which captures its name, and its argument names (thus implicitly the number
  115. /// of arguments the function takes).
  116. class PrototypeAST {
  117. std::string Name;
  118. std::vector<std::string> Args;
  119. public:
  120. PrototypeAST(const std::string &name, const std::vector<std::string> &args)
  121. : Name(name), Args(args) {}
  122.  
  123. };
  124.  
  125. /// FunctionAST - This class represents a function definition itself.
  126. class FunctionAST {
  127. public:
  128. FunctionAST(PrototypeAST *proto, ExprAST *body) {}
  129. };
  130. } // end anonymous namespace
  131.  
  132. //===----------------------------------------------------------------------===//
  133. // Parser
  134. //===----------------------------------------------------------------------===//
  135.  
  136. /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
  137. /// token the parser is looking at. getNextToken reads another token from the
  138. /// lexer and updates CurTok with its results.
  139. static int CurTok;
  140. static int getNextToken() {
  141. return CurTok = gettok();
  142. }
  143.  
  144. /// BinopPrecedence - This holds the precedence for each binary operator that is
  145. /// defined.
  146. static std::map<char, int> BinopPrecedence;
  147.  
  148. /// GetTokPrecedence - Get the precedence of the pending binary operator token.
  149. static int GetTokPrecedence() {
  150. if (!isascii(CurTok))
  151. return -1;
  152.  
  153. // Make sure it's a declared binop.
  154. int TokPrec = BinopPrecedence[CurTok];
  155. if (TokPrec <= 0) return -1;
  156. return TokPrec;
  157. }
  158.  
  159. /// Error* - These are little helper functions for error handling.
  160. ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
  161. PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
  162.  
  163. static ExprAST *ParseExpression();
  164.  
  165. /// identifierexpr
  166. /// ::= identifier
  167. /// ::= identifier '(' expression* ')'
  168. static ExprAST *ParseIdentifierExpr() {
  169. std::string IdName = IdentifierStr;
  170.  
  171. getNextToken(); // eat identifier.
  172.  
  173. if (CurTok != '(') // Simple variable ref.
  174. return new VariableExprAST(IdName);
  175.  
  176. // Call.
  177. getNextToken(); // eat (
  178. std::vector<ExprAST*> Args;
  179. if (CurTok != ')') {
  180. while (1) {
  181. ExprAST *Arg = ParseExpression();
  182. if (!Arg) return 0;
  183. Args.push_back(Arg);
  184.  
  185. if (CurTok == ')') break;
  186.  
  187. if (CurTok != ',')
  188. return Error("Expected ')' or ',' in argument list");
  189. getNextToken();
  190. }
  191. }
  192.  
  193. // Eat the ')'.
  194. getNextToken();
  195.  
  196. return new CallExprAST(IdName, Args);
  197. }
  198.  
  199. /// numberexpr ::= number
  200. static ExprAST *ParseNumberExpr() {
  201. ExprAST *Result = new NumberExprAST(NumVal);
  202. getNextToken(); // consume the number
  203. return Result;
  204. }
  205.  
  206. /// parenexpr ::= '(' expression ')'
  207. static ExprAST *ParseParenExpr() {
  208. getNextToken(); // eat (.
  209. ExprAST *V = ParseExpression();
  210. if (!V) return 0;
  211.  
  212. if (CurTok != ')')
  213. return Error("expected ')'");
  214. getNextToken(); // eat ).
  215. return V;
  216. }
  217.  
  218. /// primary
  219. /// ::= identifierexpr
  220. /// ::= numberexpr
  221. /// ::= parenexpr
  222. static ExprAST *ParsePrimary() {
  223. switch (CurTok) {
  224. default: return Error("unknown token when expecting an expression");
  225. case tok_identifier: return ParseIdentifierExpr();
  226. case tok_number: return ParseNumberExpr();
  227. case '(': return ParseParenExpr();
  228. }
  229. }
  230.  
  231. /// binoprhs
  232. /// ::= ('+' primary)*
  233. static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
  234. // If this is a binop, find its precedence.
  235. while (1) {
  236. int TokPrec = GetTokPrecedence();
  237.  
  238. // If this is a binop that binds at least as tightly as the current binop,
  239. // consume it, otherwise we are done.
  240. if (TokPrec < ExprPrec)
  241. return LHS;
  242.  
  243. // Okay, we know this is a binop.
  244. int BinOp = CurTok;
  245. getNextToken(); // eat binop
  246.  
  247. // Parse the primary expression after the binary operator.
  248. ExprAST *RHS = ParsePrimary();
  249. if (!RHS) return 0;
  250.  
  251. // If BinOp binds less tightly with RHS than the operator after RHS, let
  252. // the pending operator take RHS as its LHS.
  253. int NextPrec = GetTokPrecedence();
  254. if (TokPrec < NextPrec) {
  255. RHS = ParseBinOpRHS(TokPrec+1, RHS);
  256. if (RHS == 0) return 0;
  257. }
  258.  
  259. // Merge LHS/RHS.
  260. LHS = new BinaryExprAST(BinOp, LHS, RHS);
  261. }
  262. }
  263.  
  264. /// expression
  265. /// ::= primary binoprhs
  266. ///
  267. static ExprAST *ParseExpression() {
  268. ExprAST *LHS = ParsePrimary();
  269. if (!LHS) return 0;
  270.  
  271. return ParseBinOpRHS(0, LHS);
  272. }
  273.  
  274. /// prototype
  275. /// ::= id '(' id* ')'
  276. static PrototypeAST *ParsePrototype() {
  277. if (CurTok != tok_identifier)
  278. return ErrorP("Expected function name in prototype");
  279.  
  280. std::string FnName = IdentifierStr;
  281. getNextToken();
  282.  
  283. if (CurTok != '(')
  284. return ErrorP("Expected '(' in prototype");
  285.  
  286. std::vector<std::string> ArgNames;
  287. while (getNextToken() == tok_identifier)
  288. ArgNames.push_back(IdentifierStr);
  289. if (CurTok != ')')
  290. return ErrorP("Expected ')' in prototype");
  291.  
  292. // success.
  293. getNextToken(); // eat ')'.
  294.  
  295. return new PrototypeAST(FnName, ArgNames);
  296. }
  297.  
  298. /// definition ::= 'def' prototype expression
  299. static FunctionAST *ParseDefinition() {
  300. getNextToken(); // eat def.
  301. PrototypeAST *Proto = ParsePrototype();
  302. if (Proto == 0) return 0;
  303.  
  304. if (ExprAST *E = ParseExpression())
  305. return new FunctionAST(Proto, E);
  306. return 0;
  307. }
  308.  
  309. /// toplevelexpr ::= expression
  310. static FunctionAST *ParseTopLevelExpr() {
  311. if (ExprAST *E = ParseExpression()) {
  312. // Make an anonymous proto.
  313. PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
  314. return new FunctionAST(Proto, E);
  315. }
  316. return 0;
  317. }
  318.  
  319. /// external ::= 'extern' prototype
  320. static PrototypeAST *ParseExtern() {
  321. getNextToken(); // eat extern.
  322. return ParsePrototype();
  323. }
  324.  
  325. //===----------------------------------------------------------------------===//
  326. // Top-Level parsing
  327. //===----------------------------------------------------------------------===//
  328.  
  329. static void HandleDefinition() {
  330. if (ParseDefinition()) {
  331. fprintf(stderr, "Parsed a function definition.\n");
  332. } else {
  333. // Skip token for error recovery.
  334. getNextToken();
  335. }
  336. }
  337.  
  338. static void HandleExtern() {
  339. if (ParseExtern()) {
  340. fprintf(stderr, "Parsed an extern\n");
  341. } else {
  342. // Skip token for error recovery.
  343. getNextToken();
  344. }
  345. }
  346.  
  347. static void HandleTopLevelExpression() {
  348. // Evaluate a top-level expression into an anonymous function.
  349. if (ParseTopLevelExpr()) {
  350. fprintf(stderr, "Parsed a top-level expr\n");
  351. } else {
  352. // Skip token for error recovery.
  353. getNextToken();
  354. }
  355. }
  356.  
  357. /// top ::= definition | external | expression | ';'
  358. static void MainLoop() {
  359. while (1) {
  360. fprintf(stderr, "ready> ");
  361. switch (CurTok) {
  362. case tok_eof: return;
  363. case ';': getNextToken(); break; // ignore top-level semicolons.
  364. case tok_def: HandleDefinition(); break;
  365. case tok_extern: HandleExtern(); break;
  366. default: HandleTopLevelExpression(); break;
  367. }
  368. }
  369. }
  370.  
  371. //===----------------------------------------------------------------------===//
  372. // Main driver code.
  373. //===----------------------------------------------------------------------===//
  374.  
  375. int main() {
  376. // Install standard binary operators.
  377. // 1 is lowest precedence.
  378. BinopPrecedence['<'] = 10;
  379. BinopPrecedence['+'] = 20;
  380. BinopPrecedence['-'] = 20;
  381. BinopPrecedence['*'] = 40; // highest.
  382.  
  383. // Prime the first token.
  384. fprintf(stderr, "ready> ");
  385. getNextToken();
  386.  
  387. // Run the main "interpreter loop" now.
  388. MainLoop();
  389.  
  390. return 0;
  391. }