scripttokeniser.h

Go to the documentation of this file.
00001 /*
00002  Copyright (C) 2001-2006, William Joseph.
00003  All Rights Reserved.
00004 
00005  This file is part of GtkRadiant.
00006 
00007  GtkRadiant is free software; you can redistribute it and/or modify
00008  it under the terms of the GNU General Public License as published by
00009  the Free Software Foundation; either version 2 of the License, or
00010  (at your option) any later version.
00011 
00012  GtkRadiant is distributed in the hope that it will be useful,
00013  but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  GNU General Public License for more details.
00016 
00017  You should have received a copy of the GNU General Public License
00018  along with GtkRadiant; if not, write to the Free Software
00019  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00020  */
00021 
00022 #if !defined(INCLUDED_SCRIPT_SCRIPTTOKENISER_H)
00023 #define INCLUDED_SCRIPT_SCRIPTTOKENISER_H
00024 
00025 #include "iscriplib.h"
00026 
00027 class ScriptTokeniser: public Tokeniser
00028 {
00029         enum CharType
00030         {
00031             eWhitespace, eCharToken, eNewline, eCharQuote, eCharSolidus, eCharStar, eCharSpecial,
00032         };
00033 
00034         typedef bool (ScriptTokeniser::*Tokenise) (char c);
00035 
00036         Tokenise m_stack[3];
00037         Tokenise* m_state;
00038         SingleCharacterInputStream<TextInputStream> m_istream;
00039         std::size_t m_scriptline;
00040         std::size_t m_scriptcolumn;
00041 
00042         char m_token[MAXTOKEN];
00043         char* m_write;
00044 
00045         char m_current;
00046         bool m_eof;
00047         bool m_unget;
00048         bool m_emit;
00049 
00050         bool m_special;
00051 
00052         CharType charType (const char c)
00053         {
00054             switch (c) {
00055             case '\n':
00056                 return eNewline;
00057             case '"':
00058                 return eCharQuote;
00059             case '/':
00060                 return eCharSolidus;
00061             case '*':
00062                 return eCharStar;
00063             case '{':
00064             case '(':
00065             case '}':
00066             case ')':
00067             case '[':
00068             case ']':
00069             case ',':
00070             case ':':
00071                 return (m_special) ? eCharSpecial : eCharToken;
00072             }
00073 
00074             if (c > 32) {
00075                 return eCharToken;
00076             }
00077             return eWhitespace;
00078         }
00079 
00080         Tokenise state ()
00081         {
00082             return *m_state;
00083         }
00084         void push (Tokenise state)
00085         {
00086             ASSERT_MESSAGE(m_state != m_stack + 2, "token parser: illegal stack push");
00087             *(++m_state) = state;
00088         }
00089         void pop ()
00090         {
00091             ASSERT_MESSAGE(m_state != m_stack, "token parser: illegal stack pop");
00092             --m_state;
00093         }
00094         void add (const char c)
00095         {
00096             if (m_write < m_token + MAXTOKEN - 1) {
00097                 *m_write++ = c;
00098             }
00099         }
00100         void remove ()
00101         {
00102             ASSERT_MESSAGE(m_write > m_token, "no char to remove");
00103             --m_write;
00104         }
00105 
00106         bool tokeniseDefault (char c)
00107         {
00108             switch (charType(c)) {
00109             case eNewline:
00110                 break;
00111             case eCharToken:
00112             case eCharStar:
00113                 push(Tokenise(&ScriptTokeniser::tokeniseToken));
00114                 add(c);
00115                 break;
00116             case eCharSpecial:
00117                 push(Tokenise(&ScriptTokeniser::tokeniseSpecial));
00118                 add(c);
00119                 break;
00120             case eCharQuote:
00121                 push(Tokenise(&ScriptTokeniser::tokeniseQuotedToken));
00122                 break;
00123             case eCharSolidus:
00124                 push(Tokenise(&ScriptTokeniser::tokeniseSolidus));
00125                 break;
00126             default:
00127                 break;
00128             }
00129             return true;
00130         }
00131         bool tokeniseToken (char c)
00132         {
00133             switch (charType(c)) {
00134             case eNewline:
00135             case eWhitespace:
00136             case eCharQuote:
00137             case eCharSpecial:
00138                 pop();
00139                 m_emit = true; // emit token
00140                 break;
00141             case eCharSolidus:
00142             case eCharToken:
00143             case eCharStar:
00144                 add(c);
00145                 break;
00146             default:
00147                 break;
00148             }
00149             return true;
00150         }
00151         bool tokeniseQuotedToken (char c)
00152         {
00153             switch (charType(c)) {
00154             case eNewline:
00155                 break;
00156             case eWhitespace:
00157             case eCharToken:
00158             case eCharSolidus:
00159             case eCharStar:
00160             case eCharSpecial:
00161                 add(c);
00162                 break;
00163             case eCharQuote:
00164                 pop();
00165                 push(Tokenise(&ScriptTokeniser::tokeniseEndQuote));
00166                 break;
00167             default:
00168                 break;
00169             }
00170             return true;
00171         }
00172         bool tokeniseSolidus (char c)
00173         {
00174             switch (charType(c)) {
00175             case eNewline:
00176             case eWhitespace:
00177             case eCharQuote:
00178             case eCharSpecial:
00179                 pop();
00180                 add('/');
00181                 m_emit = true; // emit single slash
00182                 break;
00183             case eCharToken:
00184                 pop();
00185                 add('/');
00186                 add(c);
00187                 break;
00188             case eCharSolidus:
00189                 pop();
00190                 push(Tokenise(&ScriptTokeniser::tokeniseComment));
00191                 break; // don't emit single slash
00192             case eCharStar:
00193                 pop();
00194                 push(Tokenise(&ScriptTokeniser::tokeniseBlockComment));
00195                 break; // don't emit single slash
00196             default:
00197                 break;
00198             }
00199             return true;
00200         }
00201         bool tokeniseComment (char c)
00202         {
00203             if (c == '\n') {
00204                 pop();
00205                 if (state() == Tokenise(&ScriptTokeniser::tokeniseToken)) {
00206                     pop();
00207                     m_emit = true; // emit token immediately preceding comment
00208                 }
00209             }
00210             return true;
00211         }
00212         bool tokeniseBlockComment (char c)
00213         {
00214             if (c == '*') {
00215                 pop();
00216                 push(Tokenise(&ScriptTokeniser::tokeniseEndBlockComment));
00217             }
00218             return true;
00219         }
00220         bool tokeniseEndBlockComment (char c)
00221         {
00222             switch (c) {
00223             case '/':
00224                 pop();
00225                 if (state() == Tokenise(&ScriptTokeniser::tokeniseToken)) {
00226                     pop();
00227                     m_emit = true; // emit token immediately preceding comment
00228                 }
00229                 break; // don't emit comment
00230             case '*':
00231                 break; // no state change
00232             default:
00233                 pop();
00234                 push(Tokenise(&ScriptTokeniser::tokeniseBlockComment));
00235                 break;
00236             }
00237             return true;
00238         }
00239         bool tokeniseEndQuote (char c)
00240         {
00241             pop();
00242             m_emit = true; // emit quoted token
00243             return true;
00244         }
00245         bool tokeniseSpecial (char c)
00246         {
00247             pop();
00248             m_emit = true; // emit single-character token
00249             return true;
00250         }
00251 
00253         bool tokenise ()
00254         {
00255             m_write = m_token;
00256             while (!eof()) {
00257                 char c = m_current;
00258 
00259                 if (!((*this).*state())(c)) {
00260                     // parse error
00261                     m_eof = true;
00262                     return false;
00263                 }
00264                 if (m_emit) {
00265                     m_emit = false;
00266                     return true;
00267                 }
00268 
00269                 if (c == '\n') {
00270                     ++m_scriptline;
00271                     m_scriptcolumn = 1;
00272                 } else {
00273                     ++m_scriptcolumn;
00274                 }
00275 
00276                 m_eof = !m_istream.readChar(m_current);
00277             }
00278             return m_write != m_token;
00279         }
00280 
00281         const char* fillToken ()
00282         {
00283             if (!tokenise()) {
00284                 return 0;
00285             }
00286 
00287             add('\0');
00288             return m_token;
00289         }
00290 
00291         bool eof ()
00292         {
00293             return m_eof;
00294         }
00295 
00296     public:
00297         ScriptTokeniser (TextInputStream& istream, bool special) :
00298             m_state(m_stack), m_istream(istream), m_scriptline(1), m_scriptcolumn(1), m_unget(false), m_emit(false),
00299                     m_special(special)
00300         {
00301             m_stack[0] = Tokenise(&ScriptTokeniser::tokeniseDefault);
00302             m_eof = !m_istream.readChar(m_current);
00303             m_token[MAXTOKEN - 1] = '\0';
00304         }
00305 
00306         const std::string getToken ()
00307         {
00308             if (m_unget) {
00309                 m_unget = false;
00310                 return m_token;
00311             }
00312 
00313             const char *token = fillToken();
00314             if (token)
00315                 return std::string(token);
00316 
00317             return "";
00318         }
00319 
00320         void ungetToken ()
00321         {
00322             ASSERT_MESSAGE(!m_unget, "can't unget more than one token");
00323             m_unget = true;
00324         }
00325         std::size_t getLine () const
00326         {
00327             return m_scriptline;
00328         }
00329         std::size_t getColumn () const
00330         {
00331             return m_scriptcolumn;
00332         }
00333 };
00334 
00335 inline Tokeniser* NewScriptTokeniser (TextInputStream& istream)
00336 {
00337     return new ScriptTokeniser(istream, true);
00338 }
00339 
00340 inline Tokeniser* NewSimpleTokeniser (TextInputStream& istream)
00341 {
00342     return new ScriptTokeniser(istream, false);
00343 }
00344 
00345 #endif