fast math expression parser
#1
Posted 15 October 2009 - 02:00 PM
Its a math parser, i am working for adding support for math function and maybe in the future
more advanced like integration and differentiation but this goes outside my main game development interest :P
I would love to know how and if you are using in your pojects, there shouldn't be any bugs ,
i spent much time working on error detection but something may always slip from hands.
[code=cpp]
#include "stdafx.h"
#include <string>
#include <iostream>
#include <fstream>
#include <cstdarg>
#include <assert.h>
using namespace std;
/////////////////////////////////////////////////////////////////////////
#define MAXIDSIZE 256
enum EToken
{
TEnd,
TValue,
TPlus,
TMinus,
TMul,
TDiv,
TMod,
TExp,
TLparen,
TRparen,
TAssign,
TIdent
};
enum EError
{
ETextExpected,
EUnknownSymbol,
EIncompleteExpression,
EExpectedIdentifierOrNumeric,
EExpectedParenBalance,
EExpectedIdentifier,
EExpcetedNumeric,
EExpectedLParen,
EExpectedRParen,
EIdentifierTooLong
};
//////////////////////////////////////////////////////////////////////////
class CExprParser
{
char *Look;
double Value;
EToken Token;
char Identifier[MAXIDSIZE];
bool EatSymbol( void )
{
int Lenght=0;
memset( Identifier,0,sizeof(char)*MAXIDSIZE );
if ( isalpha( *Look ) || *Look=='_' )
{
Token=TIdent;
do
{
Identifier[ Lenght++ ]=*Look;
if ( Lenght>=MAXIDSIZE)
return Error( EIdentifierTooLong );
Look++;
}
while ( isalnum( *Look ) || *Look =='_' );
}
else
{
return false;
}
return true;
};
bool GetNextToken( void )
{
if ( *Look==NULL )
{
Token=TEnd;
return true; }
// Skip over white space.
while ( isspace( *Look ) )
Look++;
switch ( *Look )
{
// check for delimiter
case '+' : *Look++ ; Token= TPlus; break;
case '-' : *Look++ ; Token= TMinus; break;
case '*' : *Look++ ; Token= TMul; break;
case '/' : *Look++ ; Token= TDiv; break;
case '=' : *Look++ ; Token= TAssign; break;
case '(' : *Look++ ; Token= TLparen; break;
case ')' : *Look++ ; Token= TRparen; break;
case '%' : *Look++ ; Token= TMod; break;
case '^' : *Look++ ; Token= TExp; break;
// check for numeric
case '0' :
case '1' :
case '2' :
case '3' :
case '4' :
case '5' :
case '6' :
case '7' :
case '8' :
case '9' :
case '.' :
//////////////////////////////////////////
// eat numeric value
char *p;
Value=strtod( Look,&p );
Look=p;
Token=TValue;
break;
// end of text
case '\0' : Token=TEnd; break;
// check for symbolic
default :
if ( !EatSymbol() )
return Error( EUnknownSymbol );
Token=TIdent ;
break;
}
return true;
}
bool Error( int err )
{
switch ( err )
{
case ETextExpected: ErrorString="Error: No Text\n"; break;
case EIncompleteExpression: ErrorString="Error: Syntax error\n"; break;
case EUnknownSymbol: ErrorString="Error: Unknown identifier\n" ; break;
case EExpectedIdentifierOrNumeric: ErrorString="Expected Identifier or numeric\n"; break;
case EExpectedIdentifier: ErrorString="Expected Identifier\n"; break;
case EExpcetedNumeric: ErrorString="Expected numeric\n"; break;
case EExpectedLParen: ErrorString="Error: expected '('"; break;
case EExpectedRParen: ErrorString="Error: expected ')'"; break;
case EExpectedParenBalance: ErrorString="Error: Parenthesys unbalanced\n"; break;
case EIdentifierTooLong : ErrorString="Error: Identifier too long\n"; break;
};
return false;
}
bool VerifyParenthesysBalance( void )
{
int count=0;
char *ptr;
ptr=Look;
do
{
if ( *ptr=='(' ) count++;
else
if ( *ptr==')' ) count--;
ptr++;
}
while ( *ptr!=0 ) ;
return ( count==0 ) ;
}
bool Factor ( double &c )
{
if ( Token==TValue )
{
c=Value;
}
else if ( Token==TMinus )
{
if ( GetNextToken()==false )
return false;
if ( Token==TValue )
c=-Value;
else if ( Token==TLparen )
{
double b;
if ( GetNextToken()==false )
return false;
Expr( b );
c=-b;
if ( Token!=TRparen && Token!=TEnd )
return Error( EExpectedRParen );
}
else
{
return Error( EIncompleteExpression );
}
}
else if ( Token==TPlus )
{
if ( GetNextToken()==false )
return false;
if ( Token==TValue )
c=Value;
else if ( Token==TLparen )
{
double b;
if ( GetNextToken()==false )
return false;
Expr( b );
c=b;
if ( Token!=TRparen && Token!=TEnd )
return Error( EExpectedRParen );
}
else
{
return Error( EIncompleteExpression );
}
}
else if ( Token==TLparen )
{
double b;
if ( GetNextToken()==false )
return false;
Expr( b );
c=b;
if ( Token!=TRparen && Token!=TEnd )
return Error( EExpectedRParen );
}
else if ( Token==TRparen )
{
// () condition, no value returned
c=0.0f;
return true;
}
else if ( Token==TEnd )
{
return Error( EIncompleteExpression );
}
else
{
return Error( EUnknownSymbol );
}
return true;
}
bool Term ( double &c )
{
double b;
if ( Factor( c )==false )
return false;
if ( GetNextToken()==false )
return false;
while ( Token==TMul || Token==TDiv )
{
if ( Token==TMul )
{
if ( GetNextToken()==false )
return false;
if ( Factor ( b )==false )
return false;
c*=b;
}
else if ( Token==TDiv )
{
if ( GetNextToken()==false )
return false;
if ( Factor ( b )==false )
return false;
c/=b;
}
if ( GetNextToken()==false )
return false;
}
return true;
}
bool Expr( double &c )
{
double b;
if ( Term ( c )==false )
return false;
while ( Token==TPlus || Token==TMinus )
{
if ( Token == TPlus )
{
if ( GetNextToken()==false )
return false;
if ( Term( b )==false )
return false;
c+=b;
}
else if ( Token == TMinus )
{
if ( GetNextToken()==false )
return false;
if ( Term( b )==false )
return false;
c-=b;
}
if ( Token!=TPlus &&
Token!=TMinus &&
Token!=TRparen &&
Token!=TEnd )
{
return Error( EIncompleteExpression );
}
}
return true;
}
public :
bool Parse( char *text )
{
if ( text==NULL )
return Error( ETextExpected ) ;
double result;
Look=text;
Token=TEnd;
Value=0.0f;
if ( VerifyParenthesysBalance()==false )
return Error(EExpectedParenBalance);
GetNextToken();
Expr( result );
cout << result << endl;
return true;
}
//////////////////////////////////
string ErrorString;
CExprParser()
{
Value=0.0f;
Look=NULL;
};
~CExprParser()
{
};
};
//////////////////////////////////////////////////////////////////////////
int _tmain( )
{
string text=" 2 + 3 - 2*( 3+2 )*4 ";
cout << "Parsing : " << text << endl << endl;
CExprParser parser;
parser.Parse( &text[0] );
cout << parser.ErrorString << endl;
cin.get();
return 0;
}
[/code]
#2
Posted 16 October 2009 - 08:51 AM
Btw, it's parantheses, not paranthesys
-
Currently working on: the 3D engine for Tomb Raider.
#3
Posted 16 October 2009 - 09:56 AM
#4
Posted 16 October 2009 - 09:33 PM
#5
Posted 16 October 2009 - 09:58 PM
#6
Posted 17 October 2009 - 09:47 PM
Quote
(='.'=) This is Bunny. Copy and paste bunny into
(")_(") your signature to help him gain world domination.
bunny also wants to fight spam: Click Here Bots!
#7
Posted 18 October 2009 - 08:45 AM
#8
Posted 18 October 2009 - 03:24 PM
v71 said:
Do a good job there = flamed
(='.'=) This is Bunny. Copy and paste bunny into
(")_(") your signature to help him gain world domination.
bunny also wants to fight spam: Click Here Bots!
#9
Posted 19 October 2009 - 02:23 AM
#10
Posted 19 October 2009 - 02:59 AM
For benchmarking purposes, here are my results:
Core 2 Duo 1.8GHz, optimized release build VS 2008
100000 Tests.
Parsing : 2 + 3 - 2 * (3 + 2) * 4 = -35
Test 1: 250 milliseconds. Result = -35
Test 2: 266 milliseconds. Result = -35
Test 3: 265 milliseconds. Result = -35
Test 4: 250 milliseconds. Result = -35
Test 5: 250 milliseconds. Result = -35
Parsing : ((1 + (2 / 3)) * 0.5) + 0.1666666 = 1
Test 1: 312 milliseconds. Result = 1
Test 2: 313 milliseconds. Result = 1
Test 3: 297 milliseconds. Result = 1
Test 4: 297 milliseconds. Result = 1
Test 5: 313 milliseconds. Result = 1
#11
Posted 19 October 2009 - 07:37 AM
#12
Posted 19 October 2009 - 10:18 AM
#13
Posted 19 October 2009 - 10:38 AM
Quote
If you really want to see a nightmare for readability, try adding a manual follow-set based error recovery to your expression parser. I had to do this for an university assignment once. It was not nice.
Good work with the parser, though :)
Guess this is a piece of code one can reuse over and over again.
Cheers,
- Wernaeh
#14
Posted 22 October 2009 - 03:01 PM
starstutter said:
Do a good job there = flamed
Uhm, if I look at http://www.gamedev.n...trictTo=Replies, then three years later http://www.gamedev.n...trictTo=Replies under ban evasion, then I wonder what good stuff that would be in that case.
Anyways, Vincenzo, so far that's good work.
Let me analyze your code syntactically/cosmetically only for now:
#include "stdafx.h"MSVC only.
using namespace std;Dirty, not good practice.
LenghtTypo: "Length".
#define MAXIDSIZE 256Hardcoded maximum size. Not good practice, error prone.
bool EatSymbol( void )Putting "void" there is C practice, but is uncommon C++ practice.
if ( GetNextToken()==false )Explitly comparing against boolean literals is uncommon and redundant. Just write "if (!GetNextToken())", which many programmers will also find more readable.
NULLNot standard C++ but MSVC++. Use the literal "0" instead.
CExprParser()
{
Value=0.0f;
Look=NULL;
}; You might want to initialize instead of assign to your members: "CExprParser() : Value(0.0f), Look(0)".bool Parse( char *text )If you don't plan to modify the passed string, then use "const char *text". See also http://www.parashift...orrectness.html about const correct code.
int _tmain( )In standard C++, we say "int main ()", "_tmain()" is MSVC++ again.
parser.Parse( &text[0] );Undefined behaviour.
Quote
const_reference operator[](size_type pos) const; reference operator[](size_type pos);1 Returns: If pos < size(), returns data()[pos]. Otherwise, if pos == size(), the const version returns charT(). Otherwise, the behavior is undefined.
Better use text.c_str():
Quote
const charT* c_str() const;1 Returns: A pointer to the initial element of an array of length size() + 1 whose first size() elements equal the corresponding elements of the string controlled by *this and whose last element is a null character specified by charT().
The internal representation of std::string that you are dissecting does not have a \0 (null character).
cin.get();What if your program is run in non-interactive mode?
edit: I forgot about your list of #includes:
//#include "stdafx.h" //! why? #include <string> #include <iostream> //#include <fstream> //! why? //#include <cstdarg> //! why? #include <cassert> //! assert.h is for C #include <cstring> //! was missing, needed for memset() #include <cstdlib> //! was missing, needed for strtod()
edit: Another issue:
c/=b;Here you should catch a division by zero before it happens, otherwise your program yields undefined behaviour (cf. ISO/IEC 14882:2003(E), Section 5 Expressions:
Quote
~/www/ | picogen.org | metatrace
- --- / .-.. . - / -- -.-- / ... .. --. -. .- - ..- .-. . / .- .--. .--. . .- .-. / --. . . -.- -.--
#15
Posted 22 October 2009 - 03:51 PM
phresnel said:
Thanks for the great introduction to the C++ standard, freely available to all interested, but I hope you are aware of the difference between prototyping something fast and getting a stable, clear code to the market for cash that adheres to the specs and would justify the countless hours of reading the standard. Moreover there are tools for that... I actually can't believe you waste as much time writing here than he probably needed to develop the code, which is not that bad at all. Never used includes? Who cares...
#16
Posted 22 October 2009 - 04:14 PM
Mihail121 said:
I posted a more-than-1-line-critique for a buggy program, and tried to give some hints to make it less bug-full and well-formed. I think it actually helps the OP, because he stated that he thinks that
Quote
Quote
Anyways: What you say is "learn unportable c++ that yields undefined behaviour and trust 3rd parties to fix that for you" :worthy:
Quote
See also starstutters second post which might be interesting.
~/www/ | picogen.org | metatrace
- --- / .-.. . - / -- -.-- / ... .. --. -. .- - ..- .-. . / .- .--. .--. . .- .-. / --. . . -.- -.--
#17
Posted 22 October 2009 - 04:19 PM
phresnel said:
Not flamming you for anything, it's evident you are proficient with the specification, it's completely fine to provide help, it's fine to reveal bugs, it's just that my personal belief that the point of the post is to show a quick parser and nothing more. But I'm just being ignorant, most honestly your post was fully ok, sorry. Let's not start an off-topic war.
P.S.
Quote
No, that's not what I've said, contact me on ICQ\Skype and I'll explain it in German :)
#18
Posted 22 October 2009 - 04:55 PM
Mihail121 said:
If my first post was a tad too offensive in some point, then I am sorry, that was not my intention. But yes, I am guilty of prosecuting not to write unportable code, and probably go into too much detail sometimes.
On the other hand, I also highly welcome other peoples critique on me or my stuff, e.g. once I release picogen 0.3, I'd be happy if people point out bugs or otherwise bad/dumb code that I did not recognize. Because once written, the probability is pretty high that I'll never fix some subtleties that really needed to be.
Quote
~/www/ | picogen.org | metatrace
- --- / .-.. . - / -- -.-- / ... .. --. -. .- - ..- .-. . / .- .--. .--. . .- .-. / --. . . -.- -.--
#19
Posted 22 October 2009 - 04:59 PM
why posting a 3 years old rants from gamedev ? how did you know my name ?
why is using namespace std , dirty ?????
#20
Posted 22 October 2009 - 06:37 PM
phresnel said:
#include "stdafx.h"MSVC only.
Quote
using namespace std;Dirty, not good practice.
Quote
NULLNot standard C++ but MSVC++. Use the literal "0" instead.
Quote
Quote
#include <cassert> //! assert.h is for C
-
Currently working on: the 3D engine for Tomb Raider.
1 user(s) are reading this topic
0 members, 1 guests, 0 anonymous users












