Jump to content


fast math expression parser


28 replies to this topic

#1 v71

    Valued Member

  • Members
  • PipPipPipPip
  • 357 posts

Posted 15 October 2009 - 02:00 PM

This is a code i wrote when i had inspiration from a recent topic about designing languages.
Its a math parser, i am working for adding support for math function and maybe in the future
more advanced like integration and differentiation but this goes outside my main game development interest :P
I would love to know how and if you are using in your pojects, there shouldn't be any bugs ,
i spent much time working on error detection but something may always slip from hands.

[code=cpp]


#include "stdafx.h"
#include <string>
#include <iostream>
#include <fstream>
#include <cstdarg>
#include <assert.h>

using namespace std;

/////////////////////////////////////////////////////////////////////////

#define MAXIDSIZE 256

enum EToken
{
TEnd,
TValue,
TPlus,
TMinus,
TMul,
TDiv,
TMod,
TExp,
TLparen,
TRparen,
TAssign,
TIdent
};

enum EError
{
ETextExpected,
EUnknownSymbol,
EIncompleteExpression,
EExpectedIdentifierOrNumeric,
EExpectedParenBalance,
EExpectedIdentifier,
EExpcetedNumeric,
EExpectedLParen,
EExpectedRParen,
EIdentifierTooLong
};

//////////////////////////////////////////////////////////////////////////

class CExprParser
{
char *Look;
double Value;
EToken Token;
char Identifier[MAXIDSIZE];

bool EatSymbol( void )
{

int Lenght=0;

memset( Identifier,0,sizeof(char)*MAXIDSIZE );

if ( isalpha( *Look ) || *Look=='_' )
{

Token=TIdent;

do
{
Identifier[ Lenght++ ]=*Look;

if ( Lenght>=MAXIDSIZE)
return Error( EIdentifierTooLong );

Look++;
}
while ( isalnum( *Look ) || *Look =='_' );

}
else
{
return false;
}

return true;
};

bool GetNextToken( void )
{

if ( *Look==NULL )
{
Token=TEnd;
return true; }

// Skip over white space.

while ( isspace( *Look ) )
Look++;

switch ( *Look )
{
// check for delimiter

case '+' : *Look++ ; Token= TPlus; break;
case '-' : *Look++ ; Token= TMinus; break;
case '*' : *Look++ ; Token= TMul; break;
case '/' : *Look++ ; Token= TDiv; break;
case '=' : *Look++ ; Token= TAssign; break;
case '(' : *Look++ ; Token= TLparen; break;
case ')' : *Look++ ; Token= TRparen; break;
case '%' : *Look++ ; Token= TMod; break;
case '^' : *Look++ ; Token= TExp; break;

// check for numeric

case '0' :
case '1' :
case '2' :
case '3' :
case '4' :
case '5' :
case '6' :
case '7' :
case '8' :
case '9' :
case '.' :
//////////////////////////////////////////
// eat numeric value

char *p;
Value=strtod( Look,&p );
Look=p;
Token=TValue;

break;

// end of text

case '\0' : Token=TEnd; break;

// check for symbolic

default :

if ( !EatSymbol() )
return Error( EUnknownSymbol );

Token=TIdent ;

break;
}

return true;
}

bool Error( int err )
{
switch ( err )
{
case ETextExpected: ErrorString="Error: No Text\n"; break;
case EIncompleteExpression: ErrorString="Error: Syntax error\n"; break;
case EUnknownSymbol: ErrorString="Error: Unknown identifier\n" ; break;
case EExpectedIdentifierOrNumeric: ErrorString="Expected Identifier or numeric\n"; break;
case EExpectedIdentifier: ErrorString="Expected Identifier\n"; break;
case EExpcetedNumeric: ErrorString="Expected numeric\n"; break;
case EExpectedLParen: ErrorString="Error: expected '('"; break;
case EExpectedRParen: ErrorString="Error: expected ')'"; break;
case EExpectedParenBalance: ErrorString="Error: Parenthesys unbalanced\n"; break;
case EIdentifierTooLong : ErrorString="Error: Identifier too long\n"; break;
};
return false;
}

bool VerifyParenthesysBalance( void )
{
int count=0;

char *ptr;

ptr=Look;

do
{
if ( *ptr=='(' ) count++;
else
if ( *ptr==')' ) count--;

ptr++;
}
while ( *ptr!=0 ) ;

return ( count==0 ) ;
}

bool Factor ( double &c )
{
if ( Token==TValue )
{
c=Value;
}
else if ( Token==TMinus )
{
if ( GetNextToken()==false )
return false;

if ( Token==TValue )
c=-Value;

else if ( Token==TLparen )
{
double b;

if ( GetNextToken()==false )
return false;

Expr( b );

c=-b;

if ( Token!=TRparen && Token!=TEnd )
return Error( EExpectedRParen );

}
else
{
return Error( EIncompleteExpression );
}
}
else if ( Token==TPlus )
{
if ( GetNextToken()==false )
return false;

if ( Token==TValue )
c=Value;

else if ( Token==TLparen )
{
double b;

if ( GetNextToken()==false )
return false;

Expr( b );

c=b;

if ( Token!=TRparen && Token!=TEnd )
return Error( EExpectedRParen );

}
else
{
return Error( EIncompleteExpression );
}

}
else if ( Token==TLparen )
{
double b;

if ( GetNextToken()==false )
return false;

Expr( b );

c=b;

if ( Token!=TRparen && Token!=TEnd )
return Error( EExpectedRParen );

}
else if ( Token==TRparen )
{
// () condition, no value returned
c=0.0f;
return true;
}
else if ( Token==TEnd )
{
return Error( EIncompleteExpression );
}

else
{
return Error( EUnknownSymbol );
}

return true;
}

bool Term ( double &c )
{
double b;

if ( Factor( c )==false )
return false;

if ( GetNextToken()==false )
return false;

while ( Token==TMul || Token==TDiv )
{

if ( Token==TMul )
{
if ( GetNextToken()==false )
return false;

if ( Factor ( b )==false )
return false;

c*=b;
}
else if ( Token==TDiv )
{
if ( GetNextToken()==false )
return false;

if ( Factor ( b )==false )
return false;

c/=b;

}

if ( GetNextToken()==false )
return false;
}

return true;
}

bool Expr( double &c )
{

double b;

if ( Term ( c )==false )
return false;

while ( Token==TPlus || Token==TMinus )
{

if ( Token == TPlus )
{
if ( GetNextToken()==false )
return false;

if ( Term( b )==false )
return false;

c+=b;
}
else if ( Token == TMinus )
{
if ( GetNextToken()==false )
return false;

if ( Term( b )==false )
return false;

c-=b;
}

if ( Token!=TPlus &&
Token!=TMinus &&
Token!=TRparen &&
Token!=TEnd )
{
return Error( EIncompleteExpression );
}
}

return true;
}

public :

bool Parse( char *text )
{
if ( text==NULL )
return Error( ETextExpected ) ;

double result;

Look=text;

Token=TEnd;

Value=0.0f;

if ( VerifyParenthesysBalance()==false )
return Error(EExpectedParenBalance);

GetNextToken();

Expr( result );

cout << result << endl;

return true;

}

//////////////////////////////////

string ErrorString;

CExprParser()
{
Value=0.0f;
Look=NULL;
};

~CExprParser()
{
};

};


//////////////////////////////////////////////////////////////////////////

int _tmain( )
{

string text=" 2 + 3 - 2*( 3+2 )*4 ";

cout << "Parsing : " << text << endl << endl;

CExprParser parser;

parser.Parse( &text[0] );

cout << parser.ErrorString << endl;

cin.get();

return 0;
}

[/code]

#2 .oisyn

    DevMaster Staff

  • Moderators
  • 1842 posts

Posted 16 October 2009 - 08:51 AM

Nice, a recursive descent parser. It could be simpler though, by using Dijkstra's Shunting Yard algorithm.

Btw, it's parantheses, not paranthesys :)
C++ addict
-
Currently working on: the 3D engine for Tomb Raider.

#3 v71

    Valued Member

  • Members
  • PipPipPipPip
  • 357 posts

Posted 16 October 2009 - 09:56 AM

Sorry english is not my native language

#4 imerso

    Senior Member

  • Members
  • PipPipPipPip
  • 433 posts
  • LocationBrasil

Posted 16 October 2009 - 09:33 PM

Oh interesting, I wrote an expression parser only a month ago. I implemented the Reverse Polish Notation algorithm, though. It was used on a proprietary bytecode language for a specific system I'm involved (I got to develop the custom language...).

#5 v71

    Valued Member

  • Members
  • PipPipPipPip
  • 357 posts

Posted 16 October 2009 - 09:58 PM

...i was expecting harsh comments...

#6 starstutter

    Senior Member

  • Members
  • PipPipPipPip
  • 1039 posts

Posted 17 October 2009 - 09:47 PM

oh wow, this could actually be very useful for something like an in-game console. Nice work.

Quote

...i was expecting harsh comments...
Are you disappointed? :P
(\__/)
(='.'=)
This is Bunny. Copy and paste bunny into
(")_(") your signature to help him gain world domination.
bunny also wants to fight spam: Click Here Bots!

#7 v71

    Valued Member

  • Members
  • PipPipPipPip
  • 357 posts

Posted 18 October 2009 - 08:45 AM

i forget that i am not anymore in gamedev comunity :P

#8 starstutter

    Senior Member

  • Members
  • PipPipPipPip
  • 1039 posts

Posted 18 October 2009 - 03:24 PM

v71 said:

i forget that i am not anymore in gamedev comunity :P
yep, that's what I thought XD
Do a good job there = flamed
(\__/)
(='.'=)
This is Bunny. Copy and paste bunny into
(")_(") your signature to help him gain world domination.
bunny also wants to fight spam: Click Here Bots!

#9 rouncer

    Senior Member

  • Members
  • PipPipPipPip
  • 2758 posts

Posted 19 October 2009 - 02:23 AM

This is also good if you want to use interpreted ai in your game, with real arithmetic calculations interpreted on the fly...
you used to be able to fit a game on a disk, then you used to be able to fit a game on a cd, then you used to be able to fit a game on a dvd, now you can barely fit one on your harddrive.

#10 TheNut

    Senior Member

  • Moderators
  • 1718 posts
  • LocationCyberspace

Posted 19 October 2009 - 02:59 AM

Cool stuff, I thought about doing something like this a while back (though more like using someone else's library ;)

For benchmarking purposes, here are my results:

Core 2 Duo 1.8GHz, optimized release build VS 2008

100000 Tests.

Parsing : 2 + 3 - 2 * (3 + 2) * 4 = -35
Test 1: 250 milliseconds. Result = -35
Test 2: 266 milliseconds. Result = -35
Test 3: 265 milliseconds. Result = -35
Test 4: 250 milliseconds. Result = -35
Test 5: 250 milliseconds. Result = -35

Parsing : ((1 + (2 / 3)) * 0.5) + 0.1666666 = 1
Test 1: 312 milliseconds. Result = 1
Test 2: 313 milliseconds. Result = 1
Test 3: 297 milliseconds. Result = 1
Test 4: 297 milliseconds. Result = 1
Test 5: 313 milliseconds. Result = 1
http://www.nutty.ca - Being a nut has its advantages.

#11 v71

    Valued Member

  • Members
  • PipPipPipPip
  • 357 posts

Posted 19 October 2009 - 07:37 AM

It could be speeded up a little putting the term function inside the expr function, but it would be a nightmare for readability.

#12 TheNut

    Senior Member

  • Moderators
  • 1718 posts
  • LocationCyberspace

Posted 19 October 2009 - 10:18 AM

Readability and performance usually don't see eye to eye ;) But I think these results are not bad considering the flexibility here, which is more important.
http://www.nutty.ca - Being a nut has its advantages.

#13 Wernaeh

    Senior Member

  • Members
  • PipPipPipPip
  • 368 posts

Posted 19 October 2009 - 10:38 AM

Quote

but it would be a nightmare for readability

If you really want to see a nightmare for readability, try adding a manual follow-set based error recovery to your expression parser. I had to do this for an university assignment once. It was not nice.

Good work with the parser, though :)
Guess this is a piece of code one can reuse over and over again.
Cheers,
- Wernaeh
Some call me mathematician, some just call me computer guy. Yet, I prefer the term professional weirdo :)

#14 phresnel

    Member

  • Members
  • PipPip
  • 47 posts

Posted 22 October 2009 - 03:01 PM

starstutter said:

yep, that's what I thought XD
Do a good job there = flamed

Uhm, if I look at http://www.gamedev.n...trictTo=Replies, then three years later http://www.gamedev.n...trictTo=Replies under ban evasion, then I wonder what good stuff that would be in that case.

Anyways, Vincenzo, so far that's good work.

Let me analyze your code syntactically/cosmetically only for now:

#include "stdafx.h"
MSVC only.


using namespace std;
Dirty, not good practice.


Lenght
Typo: "Length".


#define		MAXIDSIZE		256
Hardcoded maximum size. Not good practice, error prone.


bool EatSymbol( void )
Putting "void" there is C practice, but is uncommon C++ practice.


if ( GetNextToken()==false )
Explitly comparing against boolean literals is uncommon and redundant. Just write "if (!GetNextToken())", which many programmers will also find more readable.


NULL
Not standard C++ but MSVC++. Use the literal "0" instead.


CExprParser()
	{
		Value=0.0f;
		Look=NULL;	
	};
You might want to initialize instead of assign to your members: "CExprParser() : Value(0.0f), Look(0)".

bool Parse( char *text )
If you don't plan to modify the passed string, then use "const char *text". See also http://www.parashift...orrectness.html about const correct code.

int _tmain( )
In standard C++, we say "int main ()", "_tmain()" is MSVC++ again.


parser.Parse( &text[0] );
Undefined behaviour.

Quote

21.3.4 basic_string element access [lib.string.access]
const_reference operator[](size_type pos) const;
reference operator[](size_type pos);
1 Returns: If pos < size(), returns data()[pos]. Otherwise, if pos == size(), the const version returns charT(). Otherwise, the behavior is undefined.


Better use text.c_str():

Quote

21.3.6 basic_string string operations [lib.string.ops]
const charT* c_str() const;
1 Returns: A pointer to the initial element of an array of length size() + 1 whose first size() elements equal the corresponding elements of the string controlled by *this and whose last element is a null character specified by charT().

The internal representation of std::string that you are dissecting does not have a \0 (null character).


cin.get();
What if your program is run in non-interactive mode?


edit: I forgot about your list of #includes:
//#include "stdafx.h" //! why?
#include <string>
#include <iostream>
//#include <fstream> //! why?
//#include <cstdarg> //! why?
#include <cassert> //! assert.h is for C
#include <cstring> //! was missing, needed for memset()
#include <cstdlib> //! was missing, needed for strtod()


edit: Another issue:
c/=b;
Here you should catch a division by zero before it happens, otherwise your program yields undefined behaviour (cf. ISO/IEC 14882:2003(E), Section 5 Expressions:

Quote

If during the evaluation of an expression, the result is not mathematically defined or not in the range of representable values for its type, the behavior is undefined
).

~/www/ | picogen.org | metatrace
- --- / .-.. . - / -- -.-- / ... .. --. -. .- - ..- .-. . / .- .--. .--. . .- .-. / --. . . -.- -.--


#15 Mihail121

    Senior Member

  • Members
  • PipPipPipPip
  • 1059 posts

Posted 22 October 2009 - 03:51 PM

phresnel said:

...

Thanks for the great introduction to the C++ standard, freely available to all interested, but I hope you are aware of the difference between prototyping something fast and getting a stable, clear code to the market for cash that adheres to the specs and would justify the countless hours of reading the standard. Moreover there are tools for that... I actually can't believe you waste as much time writing here than he probably needed to develop the code, which is not that bad at all. Never used includes? Who cares...

#16 phresnel

    Member

  • Members
  • PipPip
  • 47 posts

Posted 22 October 2009 - 04:14 PM

Mihail121 said:

Thanks for the great introduction to the C++ standard, but I hope you are aware of the difference between prototyping something fast and releasing a stable, clear code that adheres to the specification and follows design guidelines.

I posted a more-than-1-line-critique for a buggy program, and tried to give some hints to make it less bug-full and well-formed. I think it actually helps the OP, because he stated that he thinks that

Quote

there shouldn't be any bugs
.


Quote

Moreover there are tools for that...
Tools for what? I wrote more than one point.

Anyways: What you say is "learn unportable c++ that yields undefined behaviour and trust 3rd parties to fix that for you" :worthy:


Quote

I actually can't believe you waste as much time writing here than he probably needed to develop the code, which is not that bad at all. Not used includes? Who cares...
Not so much, just 25 Minutes (=roughly 0.00006 % percent of an average 73 year human life), which I hope will help v71 to learn C++ better and improve his program. What is wrong with investing some spare time to write a proper post?

See also starstutters second post which might be interesting.

~/www/ | picogen.org | metatrace
- --- / .-.. . - / -- -.-- / ... .. --. -. .- - ..- .-. . / .- .--. .--. . .- .-. / --. . . -.- -.--


#17 Mihail121

    Senior Member

  • Members
  • PipPipPipPip
  • 1059 posts

Posted 22 October 2009 - 04:19 PM

phresnel said:

....

Not flamming you for anything, it's evident you are proficient with the specification, it's completely fine to provide help, it's fine to reveal bugs, it's just that my personal belief that the point of the post is to show a quick parser and nothing more. But I'm just being ignorant, most honestly your post was fully ok, sorry. Let's not start an off-topic war.

P.S.

Quote

What you say is "learn unportable c++ that yields undefined behaviour and trust 3rd parties to fix that for you"

No, that's not what I've said, contact me on ICQ\Skype and I'll explain it in German :)

#18 phresnel

    Member

  • Members
  • PipPip
  • 47 posts

Posted 22 October 2009 - 04:55 PM

Mihail121 said:

Not flamming you for anything, it's evident you are proficient with the specification, it's completely fine to provide help, it's fine to reveal bugs, it's just that my personal belief that the point of the post is to show a quick parser and nothing more. But I'm just being ignorant, most honestly your post was fully ok, sorry.

If my first post was a tad too offensive in some point, then I am sorry, that was not my intention. But yes, I am guilty of prosecuting not to write unportable code, and probably go into too much detail sometimes.

On the other hand, I also highly welcome other peoples critique on me or my stuff, e.g. once I release picogen 0.3, I'd be happy if people point out bugs or otherwise bad/dumb code that I did not recognize. Because once written, the probability is pretty high that I'll never fix some subtleties that really needed to be.


Quote

Let's not start an off-topic war.
Amen :D

~/www/ | picogen.org | metatrace
- --- / .-.. . - / -- -.-- / ... .. --. -. .- - ..- .-. . / .- .--. .--. . .- .-. / --. . . -.- -.--


#19 v71

    Valued Member

  • Members
  • PipPipPipPip
  • 357 posts

Posted 22 October 2009 - 04:59 PM

Thanks for your dissection, i will work to fix those 'bugs'
why posting a 3 years old rants from gamedev ? how did you know my name ?

why is using namespace std , dirty ?????

#20 .oisyn

    DevMaster Staff

  • Moderators
  • 1842 posts

Posted 22 October 2009 - 06:37 PM

phresnel said:

#include "stdafx.h"
MSVC only.
Nonsense. Naming a header 'stdafx.h' is by no means MSVC only. There is no standard stdafx.h header in MSVC++. It's a user-defined header, usually containing precompiled stuff. Probably just a result of copypasting his code out of his project. There's nothing wrong with it, and there's nothing unportable about it.

Quote

using namespace std;
Dirty, not good practice.
In a header, yes. In a sourcefile everybody should decide for himself. All names in de std namespace are defined by the standard. Other symbols in the std namespace provided by the implementation should follow reserved names rules (start with a underscore followed by a capital letter, or contain double underscores), so accidental name clashes shouldn't happen.

Quote

NULL
Not standard C++ but MSVC++. Use the literal "0" instead.
Again, nonsense. According to ISO/IEC 14882:2003, paragraph C.2.2.3/1:

Quote

The macro NULL, defined in any of <clocale>, <cstddef>, <cstdio>, <cstdlib>, <cstring>, <ctime>, or <cwchar>, is an implementation-defined C++ null pointer constant in this International Standard (18.1).

Quote

#include <cassert> //! assert.h is for C
Oh, then what is the C++ counterpart of assert()?
C++ addict
-
Currently working on: the 3D engine for Tomb Raider.





1 user(s) are reading this topic

0 members, 1 guests, 0 anonymous users