scripttokeniser.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #if !defined(INCLUDED_SCRIPT_SCRIPTTOKENISER_H)
00023 #define INCLUDED_SCRIPT_SCRIPTTOKENISER_H
00024
00025 #include "iscriplib.h"
00026
00027 class ScriptTokeniser: public Tokeniser
00028 {
00029 enum CharType
00030 {
00031 eWhitespace, eCharToken, eNewline, eCharQuote, eCharSolidus, eCharStar, eCharSpecial,
00032 };
00033
00034 typedef bool (ScriptTokeniser::*Tokenise) (char c);
00035
00036 Tokenise m_stack[3];
00037 Tokenise* m_state;
00038 SingleCharacterInputStream<TextInputStream> m_istream;
00039 std::size_t m_scriptline;
00040 std::size_t m_scriptcolumn;
00041
00042 char m_token[MAXTOKEN];
00043 char* m_write;
00044
00045 char m_current;
00046 bool m_eof;
00047 bool m_unget;
00048 bool m_emit;
00049
00050 bool m_special;
00051
00052 CharType charType (const char c)
00053 {
00054 switch (c) {
00055 case '\n':
00056 return eNewline;
00057 case '"':
00058 return eCharQuote;
00059 case '/':
00060 return eCharSolidus;
00061 case '*':
00062 return eCharStar;
00063 case '{':
00064 case '(':
00065 case '}':
00066 case ')':
00067 case '[':
00068 case ']':
00069 case ',':
00070 case ':':
00071 return (m_special) ? eCharSpecial : eCharToken;
00072 }
00073
00074 if (c > 32) {
00075 return eCharToken;
00076 }
00077 return eWhitespace;
00078 }
00079
00080 Tokenise state ()
00081 {
00082 return *m_state;
00083 }
00084 void push (Tokenise state)
00085 {
00086 ASSERT_MESSAGE(m_state != m_stack + 2, "token parser: illegal stack push");
00087 *(++m_state) = state;
00088 }
00089 void pop ()
00090 {
00091 ASSERT_MESSAGE(m_state != m_stack, "token parser: illegal stack pop");
00092 --m_state;
00093 }
00094 void add (const char c)
00095 {
00096 if (m_write < m_token + MAXTOKEN - 1) {
00097 *m_write++ = c;
00098 }
00099 }
00100 void remove ()
00101 {
00102 ASSERT_MESSAGE(m_write > m_token, "no char to remove");
00103 --m_write;
00104 }
00105
00106 bool tokeniseDefault (char c)
00107 {
00108 switch (charType(c)) {
00109 case eNewline:
00110 break;
00111 case eCharToken:
00112 case eCharStar:
00113 push(Tokenise(&ScriptTokeniser::tokeniseToken));
00114 add(c);
00115 break;
00116 case eCharSpecial:
00117 push(Tokenise(&ScriptTokeniser::tokeniseSpecial));
00118 add(c);
00119 break;
00120 case eCharQuote:
00121 push(Tokenise(&ScriptTokeniser::tokeniseQuotedToken));
00122 break;
00123 case eCharSolidus:
00124 push(Tokenise(&ScriptTokeniser::tokeniseSolidus));
00125 break;
00126 default:
00127 break;
00128 }
00129 return true;
00130 }
00131 bool tokeniseToken (char c)
00132 {
00133 switch (charType(c)) {
00134 case eNewline:
00135 case eWhitespace:
00136 case eCharQuote:
00137 case eCharSpecial:
00138 pop();
00139 m_emit = true;
00140 break;
00141 case eCharSolidus:
00142 case eCharToken:
00143 case eCharStar:
00144 add(c);
00145 break;
00146 default:
00147 break;
00148 }
00149 return true;
00150 }
00151 bool tokeniseQuotedToken (char c)
00152 {
00153 switch (charType(c)) {
00154 case eNewline:
00155 break;
00156 case eWhitespace:
00157 case eCharToken:
00158 case eCharSolidus:
00159 case eCharStar:
00160 case eCharSpecial:
00161 add(c);
00162 break;
00163 case eCharQuote:
00164 pop();
00165 push(Tokenise(&ScriptTokeniser::tokeniseEndQuote));
00166 break;
00167 default:
00168 break;
00169 }
00170 return true;
00171 }
00172 bool tokeniseSolidus (char c)
00173 {
00174 switch (charType(c)) {
00175 case eNewline:
00176 case eWhitespace:
00177 case eCharQuote:
00178 case eCharSpecial:
00179 pop();
00180 add('/');
00181 m_emit = true;
00182 break;
00183 case eCharToken:
00184 pop();
00185 add('/');
00186 add(c);
00187 break;
00188 case eCharSolidus:
00189 pop();
00190 push(Tokenise(&ScriptTokeniser::tokeniseComment));
00191 break;
00192 case eCharStar:
00193 pop();
00194 push(Tokenise(&ScriptTokeniser::tokeniseBlockComment));
00195 break;
00196 default:
00197 break;
00198 }
00199 return true;
00200 }
00201 bool tokeniseComment (char c)
00202 {
00203 if (c == '\n') {
00204 pop();
00205 if (state() == Tokenise(&ScriptTokeniser::tokeniseToken)) {
00206 pop();
00207 m_emit = true;
00208 }
00209 }
00210 return true;
00211 }
00212 bool tokeniseBlockComment (char c)
00213 {
00214 if (c == '*') {
00215 pop();
00216 push(Tokenise(&ScriptTokeniser::tokeniseEndBlockComment));
00217 }
00218 return true;
00219 }
00220 bool tokeniseEndBlockComment (char c)
00221 {
00222 switch (c) {
00223 case '/':
00224 pop();
00225 if (state() == Tokenise(&ScriptTokeniser::tokeniseToken)) {
00226 pop();
00227 m_emit = true;
00228 }
00229 break;
00230 case '*':
00231 break;
00232 default:
00233 pop();
00234 push(Tokenise(&ScriptTokeniser::tokeniseBlockComment));
00235 break;
00236 }
00237 return true;
00238 }
00239 bool tokeniseEndQuote (char c)
00240 {
00241 pop();
00242 m_emit = true;
00243 return true;
00244 }
00245 bool tokeniseSpecial (char c)
00246 {
00247 pop();
00248 m_emit = true;
00249 return true;
00250 }
00251
00253 bool tokenise ()
00254 {
00255 m_write = m_token;
00256 while (!eof()) {
00257 char c = m_current;
00258
00259 if (!((*this).*state())(c)) {
00260
00261 m_eof = true;
00262 return false;
00263 }
00264 if (m_emit) {
00265 m_emit = false;
00266 return true;
00267 }
00268
00269 if (c == '\n') {
00270 ++m_scriptline;
00271 m_scriptcolumn = 1;
00272 } else {
00273 ++m_scriptcolumn;
00274 }
00275
00276 m_eof = !m_istream.readChar(m_current);
00277 }
00278 return m_write != m_token;
00279 }
00280
00281 const char* fillToken ()
00282 {
00283 if (!tokenise()) {
00284 return 0;
00285 }
00286
00287 add('\0');
00288 return m_token;
00289 }
00290
00291 bool eof ()
00292 {
00293 return m_eof;
00294 }
00295
00296 public:
00297 ScriptTokeniser (TextInputStream& istream, bool special) :
00298 m_state(m_stack), m_istream(istream), m_scriptline(1), m_scriptcolumn(1), m_unget(false), m_emit(false),
00299 m_special(special)
00300 {
00301 m_stack[0] = Tokenise(&ScriptTokeniser::tokeniseDefault);
00302 m_eof = !m_istream.readChar(m_current);
00303 m_token[MAXTOKEN - 1] = '\0';
00304 }
00305
00306 const std::string getToken ()
00307 {
00308 if (m_unget) {
00309 m_unget = false;
00310 return m_token;
00311 }
00312
00313 const char *token = fillToken();
00314 if (token)
00315 return std::string(token);
00316
00317 return "";
00318 }
00319
00320 void ungetToken ()
00321 {
00322 ASSERT_MESSAGE(!m_unget, "can't unget more than one token");
00323 m_unget = true;
00324 }
00325 std::size_t getLine () const
00326 {
00327 return m_scriptline;
00328 }
00329 std::size_t getColumn () const
00330 {
00331 return m_scriptcolumn;
00332 }
00333 };
00334
00335 inline Tokeniser* NewScriptTokeniser (TextInputStream& istream)
00336 {
00337 return new ScriptTokeniser(istream, true);
00338 }
00339
00340 inline Tokeniser* NewSimpleTokeniser (TextInputStream& istream)
00341 {
00342 return new ScriptTokeniser(istream, false);
00343 }
00344
00345 #endif