00001
00002
00003
00004
00005
00006
00007
00008 #include <ctype.h>
00009 #include <locale.h>
00010 #include <string.h>
00011
00012 #define llex_c
00013 #define LUA_CORE
00014
00015 #include "lua.h"
00016
00017 #include "ldo.h"
00018 #include "llex.h"
00019 #include "lobject.h"
00020 #include "lparser.h"
00021 #include "lstate.h"
00022 #include "lstring.h"
00023 #include "ltable.h"
00024 #include "lzio.h"
00025
00026
00027
00028 #define next(ls) (ls->current = zgetc(ls->z))
00029
00030
00031
00032
00033 #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
00034
00035
00036
00037 const char *const luaX_tokens [] = {
00038 "and", "break", "do", "else", "elseif",
00039 "end", "false", "for", "function", "if",
00040 "in", "local", "nil", "not", "or", "repeat",
00041 "return", "then", "true", "until", "while",
00042 "..", "...", "==", ">=", "<=", "~=",
00043 "<number>", "<name>", "<string>", "<eof>",
00044 NULL
00045 };
00046
00047
00048 #define save_and_next(ls) (save(ls, ls->current), next(ls))
00049
00050
00051 static void save (LexState *ls, int c) {
00052 Mbuffer *b = ls->buff;
00053 if (b->n + 1 > b->buffsize) {
00054 size_t newsize;
00055 if (b->buffsize >= MAX_SIZET/2)
00056 luaX_lexerror(ls, "lexical element too long", 0);
00057 newsize = b->buffsize * 2;
00058 luaZ_resizebuffer(ls->L, b, newsize);
00059 }
00060 b->buffer[b->n++] = cast(char, c);
00061 }
00062
00063
00064 void luaX_init (lua_State *L) {
00065 int i;
00066 for (i=0; i<NUM_RESERVED; i++) {
00067 TString *ts = luaS_new(L, luaX_tokens[i]);
00068 luaS_fix(ts);
00069 lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
00070 ts->tsv.reserved = cast_byte(i+1);
00071 }
00072 }
00073
00074
00075 #define MAXSRC 80
00076
00077
00078 const char *luaX_token2str (LexState *ls, int token) {
00079 if (token < FIRST_RESERVED) {
00080 lua_assert(token == cast(unsigned char, token));
00081 return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
00082 luaO_pushfstring(ls->L, "%c", token);
00083 }
00084 else
00085 return luaX_tokens[token-FIRST_RESERVED];
00086 }
00087
00088
00089 static const char *txtToken (LexState *ls, int token) {
00090 switch (token) {
00091 case TK_NAME:
00092 case TK_STRING:
00093 case TK_NUMBER:
00094 save(ls, '\0');
00095 return luaZ_buffer(ls->buff);
00096 default:
00097 return luaX_token2str(ls, token);
00098 }
00099 }
00100
00101
00102 void luaX_lexerror (LexState *ls, const char *msg, int token) {
00103 char buff[MAXSRC];
00104 luaO_chunkid(buff, getstr(ls->source), MAXSRC);
00105 msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
00106 if (token)
00107 luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
00108 luaD_throw(ls->L, LUA_ERRSYNTAX);
00109 }
00110
00111
00112 void luaX_syntaxerror (LexState *ls, const char *msg) {
00113 luaX_lexerror(ls, msg, ls->t.token);
00114 }
00115
00116
00117 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
00118 lua_State *L = ls->L;
00119 TString *ts = luaS_newlstr(L, str, l);
00120 TValue *o = luaH_setstr(L, ls->fs->h, ts);
00121 if (ttisnil(o)) {
00122 setbvalue(o, 1);
00123 luaC_checkGC(L);
00124 }
00125 return ts;
00126 }
00127
00128
00129 static void inclinenumber (LexState *ls) {
00130 int old = ls->current;
00131 lua_assert(currIsNewline(ls));
00132 next(ls);
00133 if (currIsNewline(ls) && ls->current != old)
00134 next(ls);
00135 if (++ls->linenumber >= MAX_INT)
00136 luaX_syntaxerror(ls, "chunk has too many lines");
00137 }
00138
00139
00140 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
00141 ls->decpoint = '.';
00142 ls->L = L;
00143 ls->lookahead.token = TK_EOS;
00144 ls->z = z;
00145 ls->fs = NULL;
00146 ls->linenumber = 1;
00147 ls->lastline = 1;
00148 ls->source = source;
00149 luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);
00150 next(ls);
00151 }
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163 static int check_next (LexState *ls, const char *set) {
00164 if (!strchr(set, ls->current))
00165 return 0;
00166 save_and_next(ls);
00167 return 1;
00168 }
00169
00170
00171 static void buffreplace (LexState *ls, char from, char to) {
00172 size_t n = luaZ_bufflen(ls->buff);
00173 char *p = luaZ_buffer(ls->buff);
00174 while (n--)
00175 if (p[n] == from) p[n] = to;
00176 }
00177
00178
00179 static void trydecpoint (LexState *ls, SemInfo *seminfo) {
00180
00181 struct lconv *cv = localeconv();
00182 char old = ls->decpoint;
00183 ls->decpoint = (cv ? cv->decimal_point[0] : '.');
00184 buffreplace(ls, old, ls->decpoint);
00185 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
00186
00187 buffreplace(ls, ls->decpoint, '.');
00188 luaX_lexerror(ls, "malformed number", TK_NUMBER);
00189 }
00190 }
00191
00192
00193
00194 static void read_numeral (LexState *ls, SemInfo *seminfo) {
00195 lua_assert(isdigit(ls->current));
00196 do {
00197 save_and_next(ls);
00198 } while (isdigit(ls->current) || ls->current == '.');
00199 if (check_next(ls, "Ee"))
00200 check_next(ls, "+-");
00201 while (isalnum(ls->current) || ls->current == '_')
00202 save_and_next(ls);
00203 save(ls, '\0');
00204 buffreplace(ls, '.', ls->decpoint);
00205 if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r))
00206 trydecpoint(ls, seminfo);
00207 }
00208
00209
00210 static int skip_sep (LexState *ls) {
00211 int count = 0;
00212 int s = ls->current;
00213 lua_assert(s == '[' || s == ']');
00214 save_and_next(ls);
00215 while (ls->current == '=') {
00216 save_and_next(ls);
00217 count++;
00218 }
00219 return (ls->current == s) ? count : (-count) - 1;
00220 }
00221
00222
00223 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
00224 int cont = 0;
00225 (void)(cont);
00226 save_and_next(ls);
00227 if (currIsNewline(ls))
00228 inclinenumber(ls);
00229 for (;;) {
00230 switch (ls->current) {
00231 case EOZ:
00232 luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
00233 "unfinished long comment", TK_EOS);
00234 break;
00235 #if defined(LUA_COMPAT_LSTR)
00236 case '[': {
00237 if (skip_sep(ls) == sep) {
00238 save_and_next(ls);
00239 cont++;
00240 #if LUA_COMPAT_LSTR == 1
00241 if (sep == 0)
00242 luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
00243 #endif
00244 }
00245 break;
00246 }
00247 #endif
00248 case ']': {
00249 if (skip_sep(ls) == sep) {
00250 save_and_next(ls);
00251 #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
00252 cont--;
00253 if (sep == 0 && cont >= 0) break;
00254 #endif
00255 goto endloop;
00256 }
00257 break;
00258 }
00259 case '\n':
00260 case '\r': {
00261 save(ls, '\n');
00262 inclinenumber(ls);
00263 if (!seminfo) luaZ_resetbuffer(ls->buff);
00264 break;
00265 }
00266 default: {
00267 if (seminfo) save_and_next(ls);
00268 else next(ls);
00269 }
00270 }
00271 } endloop:
00272 if (seminfo)
00273 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
00274 luaZ_bufflen(ls->buff) - 2*(2 + sep));
00275 }
00276
00277
00278 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
00279 save_and_next(ls);
00280 while (ls->current != del) {
00281 switch (ls->current) {
00282 case EOZ:
00283 luaX_lexerror(ls, "unfinished string", TK_EOS);
00284 continue;
00285 case '\n':
00286 case '\r':
00287 luaX_lexerror(ls, "unfinished string", TK_STRING);
00288 continue;
00289 case '\\': {
00290 int c;
00291 next(ls);
00292 switch (ls->current) {
00293 case 'a': c = '\a'; break;
00294 case 'b': c = '\b'; break;
00295 case 'f': c = '\f'; break;
00296 case 'n': c = '\n'; break;
00297 case 'r': c = '\r'; break;
00298 case 't': c = '\t'; break;
00299 case 'v': c = '\v'; break;
00300 case '\n':
00301 case '\r': save(ls, '\n'); inclinenumber(ls); continue;
00302 case EOZ: continue;
00303 default: {
00304 if (!isdigit(ls->current))
00305 save_and_next(ls);
00306 else {
00307 int i = 0;
00308 c = 0;
00309 do {
00310 c = 10*c + (ls->current-'0');
00311 next(ls);
00312 } while (++i<3 && isdigit(ls->current));
00313 if (c > UCHAR_MAX)
00314 luaX_lexerror(ls, "escape sequence too large", TK_STRING);
00315 save(ls, c);
00316 }
00317 continue;
00318 }
00319 }
00320 save(ls, c);
00321 next(ls);
00322 continue;
00323 }
00324 default:
00325 save_and_next(ls);
00326 }
00327 }
00328 save_and_next(ls);
00329 seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
00330 luaZ_bufflen(ls->buff) - 2);
00331 }
00332
00333
00334 static int llex (LexState *ls, SemInfo *seminfo) {
00335 luaZ_resetbuffer(ls->buff);
00336 for (;;) {
00337 switch (ls->current) {
00338 case '\n':
00339 case '\r': {
00340 inclinenumber(ls);
00341 continue;
00342 }
00343 case '-': {
00344 next(ls);
00345 if (ls->current != '-') return '-';
00346
00347 next(ls);
00348 if (ls->current == '[') {
00349 int sep = skip_sep(ls);
00350 luaZ_resetbuffer(ls->buff);
00351 if (sep >= 0) {
00352 read_long_string(ls, NULL, sep);
00353 luaZ_resetbuffer(ls->buff);
00354 continue;
00355 }
00356 }
00357
00358 while (!currIsNewline(ls) && ls->current != EOZ)
00359 next(ls);
00360 continue;
00361 }
00362 case '[': {
00363 int sep = skip_sep(ls);
00364 if (sep >= 0) {
00365 read_long_string(ls, seminfo, sep);
00366 return TK_STRING;
00367 }
00368 else if (sep == -1) return '[';
00369 else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
00370 }
00371 case '=': {
00372 next(ls);
00373 if (ls->current != '=') return '=';
00374 else { next(ls); return TK_EQ; }
00375 }
00376 case '<': {
00377 next(ls);
00378 if (ls->current != '=') return '<';
00379 else { next(ls); return TK_LE; }
00380 }
00381 case '>': {
00382 next(ls);
00383 if (ls->current != '=') return '>';
00384 else { next(ls); return TK_GE; }
00385 }
00386 case '~': {
00387 next(ls);
00388 if (ls->current != '=') return '~';
00389 else { next(ls); return TK_NE; }
00390 }
00391 case '"':
00392 case '\'': {
00393 read_string(ls, ls->current, seminfo);
00394 return TK_STRING;
00395 }
00396 case '.': {
00397 save_and_next(ls);
00398 if (check_next(ls, ".")) {
00399 if (check_next(ls, "."))
00400 return TK_DOTS;
00401 else return TK_CONCAT;
00402 }
00403 else if (!isdigit(ls->current)) return '.';
00404 else {
00405 read_numeral(ls, seminfo);
00406 return TK_NUMBER;
00407 }
00408 }
00409 case EOZ: {
00410 return TK_EOS;
00411 }
00412 default: {
00413 if (isspace(ls->current)) {
00414 lua_assert(!currIsNewline(ls));
00415 next(ls);
00416 continue;
00417 }
00418 else if (isdigit(ls->current)) {
00419 read_numeral(ls, seminfo);
00420 return TK_NUMBER;
00421 }
00422 else if (isalpha(ls->current) || ls->current == '_') {
00423
00424 TString *ts;
00425 do {
00426 save_and_next(ls);
00427 } while (isalnum(ls->current) || ls->current == '_');
00428 ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
00429 luaZ_bufflen(ls->buff));
00430 if (ts->tsv.reserved > 0)
00431 return ts->tsv.reserved - 1 + FIRST_RESERVED;
00432 else {
00433 seminfo->ts = ts;
00434 return TK_NAME;
00435 }
00436 }
00437 else {
00438 int c = ls->current;
00439 next(ls);
00440 return c;
00441 }
00442 }
00443 }
00444 }
00445 }
00446
00447
00448 void luaX_next (LexState *ls) {
00449 ls->lastline = ls->linenumber;
00450 if (ls->lookahead.token != TK_EOS) {
00451 ls->t = ls->lookahead;
00452 ls->lookahead.token = TK_EOS;
00453 }
00454 else
00455 ls->t.token = llex(ls, &ls->t.seminfo);
00456 }
00457
00458
00459 void luaX_lookahead (LexState *ls) {
00460 lua_assert(ls->lookahead.token == TK_EOS);
00461 ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
00462 }