2019-08-28 21:24:54 +03:00
#!/usr/bin/python3
# Slang Tokens
from utils import *
class Keyword ( str ) : pass
class ExprKeyword ( Keyword ) : pass
2020-03-18 07:00:12 +03:00
class DefKeyword ( Keyword ) : pass
class DefNamedKeyword ( Keyword ) : pass
class DefArgsKeyword ( Keyword ) : pass
class ClassdefKeyword ( DefKeyword ) : pass # TODO validate!
class ClassdefNamedKeyword ( ClassdefKeyword , DefNamedKeyword ) : pass
class ClassdefArgsKeyword ( ClassdefKeyword , DefArgsKeyword ) : pass
2019-08-28 21:24:54 +03:00
class Modifier ( Keyword ) : pass
class ReservedKeyword ( Keyword ) : pass
class Operator ( str ) : pass
2024-01-11 02:55:07 +03:00
#def __eq__(self, x):
# if (isinstance(x, Operator)): return (type(x) is type(self) and x == self)
# return super().__eq__(x)
2019-08-28 21:24:54 +03:00
class UnaryOperator ( Operator ) : pass
class BinaryOperator ( Operator ) : pass
2020-03-18 07:00:12 +03:00
class BothOperator ( UnaryOperator , BinaryOperator ) : pass
2019-08-28 21:24:54 +03:00
keywords = (
Keyword ( ' if ' ) ,
Keyword ( ' for ' ) ,
Keyword ( ' in ' ) ,
Keyword ( ' while ' ) ,
Keyword ( ' else ' ) ,
2020-03-18 07:00:12 +03:00
Keyword ( ' class ' ) ,
2019-08-28 21:24:54 +03:00
ExprKeyword ( ' return ' ) ,
ExprKeyword ( ' break ' ) ,
ExprKeyword ( ' continue ' ) ,
ExprKeyword ( ' import ' ) ,
2020-03-18 07:00:12 +03:00
ExprKeyword ( ' delete ' ) ,
ExprKeyword ( ' assert ' ) ,
ExprKeyword ( ' breakpoint ' ) ,
DefKeyword ( ' main ' ) ,
DefKeyword ( ' exit ' ) ,
ClassdefKeyword ( ' init ' ) ,
ClassdefArgsKeyword ( ' constr ' ) ,
ClassdefNamedKeyword ( ' property ' ) ,
ClassdefKeyword ( ' repr ' ) ,
ClassdefKeyword ( ' eq ' ) ,
2019-08-28 21:24:54 +03:00
Modifier ( ' const ' ) ,
2020-03-18 07:00:12 +03:00
Modifier ( ' static ' ) ,
2019-08-28 21:24:54 +03:00
Modifier ( ' volatile ' ) ,
ReservedKeyword ( ' def ' ) ,
2020-03-18 07:00:12 +03:00
ReservedKeyword ( ' try ' ) ,
ReservedKeyword ( ' catch ' ) ,
ReservedKeyword ( ' except ' ) ,
ReservedKeyword ( ' finally ' ) ,
ReservedKeyword ( ' raise ' ) ,
ReservedKeyword ( ' with ' ) ,
ReservedKeyword ( ' yield ' ) ,
2024-01-11 02:55:07 +03:00
ReservedKeyword ( ' include ' ) ,
ReservedKeyword ( ' using ' ) ,
ReservedKeyword ( ' default ' ) ,
2019-08-28 21:24:54 +03:00
)
2020-03-18 07:00:12 +03:00
operators = ( * ( tuple ( map ( * i ) ) for i in ( # ordered by priority
2024-01-11 02:55:07 +03:00
( UnaryOperator , ( * ' !+-~ ' , ' ++ ' , ' -- ' , ' ** ' ) ) ,
2019-08-28 21:24:54 +03:00
( BinaryOperator , ( ' ** ' , ) ) ,
( BinaryOperator , ( ' // ' , * ' */ % ' ) ) ,
( BinaryOperator , ' +- ' ) ,
( BinaryOperator , ( ' << ' , ' >> ' ) ) ,
( BinaryOperator , ' & ' ) ,
( BinaryOperator , ' ^ ' ) ,
( BinaryOperator , ' | ' ) ,
2020-03-18 07:00:12 +03:00
( BinaryOperator , ( ' < ' , ' <= ' , ' > ' , ' >= ' , ' == ' , ' != ' , ' is ' , ' is not ' , ' in ' , ' not in ' , ' isof ' ) ) ,
2019-08-28 21:24:54 +03:00
( UnaryOperator , ( ' not ' , ) ) ,
2020-03-18 07:00:12 +03:00
( BinaryOperator , ( ' && ' , ' and ' , ' but ' ) ) ,
2019-08-28 21:24:54 +03:00
( BinaryOperator , ( ' ^^ ' , ' xor ' ) ) ,
( BinaryOperator , ( ' || ' , ' or ' ) ) ,
( BinaryOperator , ( ' to ' , ) ) ,
) ) , )
2024-01-11 02:55:07 +03:00
unaryops = ( ' ++ ' , ' -- ' , ' ** ' )
bothoperators = ( * ' +- ' , * unaryops )
2019-08-28 21:24:54 +03:00
attrops = ( ' -> ' , ' @. ' , * ' @.: ' )
2020-03-18 07:00:12 +03:00
logical_operators = ( ' is ' , ' is not ' , ' in ' , ' not in ' , ' not ' , ' and ' , ' but ' , ' xor ' , ' or ' , ' isof ' )
2019-08-28 21:24:54 +03:00
whitespace = ' \t \r \v \f '
2020-03-18 07:00:12 +03:00
specials = ( ' .. ' , ' := ' , * attrops , * r ' # \ ,;?=()[] {} ' )
2019-08-28 21:24:54 +03:00
def find_identifier ( s ) :
if ( not s or not s [ 0 ] . isidentifier ( ) ) : return
i = 1
for i in range ( 1 , len ( s ) ) :
if ( not s [ i ] . isalnum ( ) and s [ i ] != ' _ ' ) : break
else : i + = 1
if ( s [ : i ] . isidentifier ( ) ) : return i
2024-01-11 02:55:07 +03:00
return ( 0 , i )
2019-08-28 21:24:54 +03:00
def find_keyword ( s ) :
if ( not s ) : return
for i in keywords :
if ( s . startswith ( i ) ) :
l = len ( i )
2020-03-18 07:00:12 +03:00
if ( not s [ l : l + 1 ] . isidentifier ( ) ) : return ( l , i )
2019-08-28 21:24:54 +03:00
def find_literal ( s ) :
if ( not s ) : return
if ( s [ 0 ] in ' " \' ' ) :
esc = bool ( )
for i in range ( 1 , len ( s ) ) :
if ( esc ) : esc = False ; continue
if ( s [ i ] == ' \\ ' ) : esc = True ; continue
2024-01-11 02:55:07 +03:00
if ( s [ i ] == ' \n ' ) : break
2019-08-28 21:24:54 +03:00
if ( s [ i ] == s [ 0 ] ) : return i + 1
2024-01-11 02:55:07 +03:00
return ( 0 , i )
2019-08-28 21:24:54 +03:00
if ( s [ 0 ] . isdigit ( ) or s [ 0 ] == ' . ' ) :
i = int ( )
digits = ' 0123456789abcdef '
radix = 10
digit = True
2020-03-18 07:00:12 +03:00
dp = ( s [ 0 ] == ' . ' )
2019-08-28 21:24:54 +03:00
for i in range ( 1 , len ( s ) ) :
if ( i == 1 and s [ 0 ] == ' 0 ' ) :
if ( s [ 1 ] not in ' box ' ) :
2024-01-11 02:55:07 +03:00
if ( s [ 1 ] . isalnum ( ) ) : break
2019-08-28 21:24:54 +03:00
return 1
else :
radix = ( 2 , 8 , 16 ) [ ' box ' . index ( s [ 1 ] ) ]
digit = False
continue
if ( s [ i ] . casefold ( ) not in digits [ : radix ] ) :
if ( s [ i ] == ' _ ' ) : continue
if ( s [ i ] == ' . ' and not dp ) : dp = True ; continue
2024-01-11 02:55:07 +03:00
if ( not digit or s [ i ] . isalnum ( ) ) : break
2019-08-28 21:24:54 +03:00
return i
digit = True
2024-01-11 02:55:07 +03:00
else :
if ( s [ i ] . casefold ( ) in digits [ : radix ] or s [ i ] == ' . ' and dp ) : return i + 1
c = i
while ( i < len ( s ) and ( s [ i ] . isalnum ( ) or s [ i ] == ' . ' ) ) : i + = 1
return ( - i + 1 , c )
2019-08-28 21:24:54 +03:00
def find_operator ( s ) :
if ( not s ) : return
for i in sorted ( itertools . chain ( * operators ) , key = len , reverse = True ) :
if ( s . startswith ( i ) ) :
l = len ( i )
2024-01-11 02:55:07 +03:00
if ( not ( i [ - 1 ] . isalpha ( ) and s [ l : l + 1 ] . isidentifier ( ) ) ) : return ( l , BothOperator ( i ) if ( i in bothoperators ) else i )
2019-08-28 21:24:54 +03:00
def find_special ( s ) :
if ( not s ) : return
if ( s [ 0 ] == ' . ' and s [ 1 : 2 ] . isdigit ( ) ) : return
if ( s [ : 2 ] == ' #| ' ) :
2024-01-11 02:55:07 +03:00
o = 2
l = 1
while ( s [ o : ] and l > 0 ) :
#if (s[o:o+3] in ('#|#', '|#|')): o += 2; break
if ( s [ o : o + 2 ] == ' #| ' ) : l + = 1 ; o + = 2
elif ( s [ o : o + 2 ] == ' |# ' ) : l - = 1 ; o + = 2
else : o + = 1
if ( l > 0 ) : return ( 0 , o )
return o
2019-08-28 21:24:54 +03:00
if ( s [ 0 ] == ' # ' ) :
2024-01-11 02:55:07 +03:00
s = s . partition ( ' \n ' ) [ 0 ]
return len ( s ) if ( s [ 1 : ] and not s [ 1 : ] . isspace ( ) ) else ( 0 , 1 )
2020-03-18 07:00:12 +03:00
if ( s [ 0 ] == ' \\ ' ) : return 1
2019-08-28 21:24:54 +03:00
for i in sorted ( specials , key = len , reverse = True ) :
if ( s [ : len ( i ) ] == i ) :
if ( i == ' = ' and s [ : len ( i ) + 1 ] == ' == ' ) : break
return len ( i )
def operator_precedence ( op ) :
for ii , i in enumerate ( operators ) :
2024-01-11 02:55:07 +03:00
if ( any ( op == j and type ( op ) is type ( j ) for j in i ) ) : return ii
else : raise WTFException ( op )
2019-08-28 21:24:54 +03:00
2024-01-11 02:55:07 +03:00
class Token ( Slots ) :
types = ( ' SPECIAL ' , ' OPERATOR ' , ' LITERAL ' , ' KEYWORD ' , ' IDENTIFIER ' ) # order in tuple is resolution order
type : . . .
token : . . .
lineno : . . .
offset : . . .
2019-08-28 21:24:54 +03:00
def __init__ ( self , type , token , * , lineno , offset ) :
self . type , self . token , self . lineno , self . offset = type , token , lineno , offset
def __repr__ ( self ) :
return f " <Token { self . types [ self . type ] } « { repr ( self . token ) [ 1 : - 1 ] } » at line { self . lineno } , offset { self . offset } > "
def __eq__ ( self , x ) :
2024-01-11 02:55:07 +03:00
return ( self . token == x )
2019-08-28 21:24:54 +03:00
@property
def typename ( self ) :
return self . types [ self . type ]
@property
def length ( self ) :
return len ( self . token )
2024-01-11 02:55:07 +03:00
class SlSyntaxException ( Exception , Slots ) : pass
2019-08-28 21:24:54 +03:00
class SlSyntaxNoToken ( SlSyntaxException ) : pass
class SlSyntaxEmpty ( SlSyntaxNoToken ) : pass
class SlSyntaxError ( SlSyntaxException ) :
2024-01-11 02:55:07 +03:00
desc : . . .
srclines : . . .
lineno : . . .
offset : . . .
length : . . .
char : . . .
usage : None
2019-08-28 21:24:54 +03:00
2024-01-11 02:55:07 +03:00
def __init__ ( self , desc = ' Syntax error ' , srclines = ( ) , * , lineno , offset , length , char = 0 , scope = None ) :
self . desc , self . srclines , self . lineno , self . offset , self . length , self . char = ( f ' \033 [2m(in { scope } ) \033 [0m ' if ( scope is not None ) else ' ' ) + desc , srclines , lineno , offset , length , char
2019-08-28 21:24:54 +03:00
def __str__ ( self ) :
2024-01-11 02:55:07 +03:00
l , line = lstripcount ( self . srclines [ self . lineno - 1 ] . partition ( ' \n ' ) [ 0 ] ) if ( self . srclines ) else ( 0 , ' ' )
2020-03-18 07:00:12 +03:00
offset = ( self . offset - l ) if ( self . offset > = 0 ) else ( len ( line ) + self . offset + 1 )
2024-01-11 02:55:07 +03:00
return f " { self . desc } { self . at } " + ( ' : \n ' + \
' ' * 2 + ' \033 [1m ' + line [ : offset ] + ' \033 [91m ' * ( self . offset > = 0 ) + line [ offset : ] + ' \033 [0m \n ' + \
' ' * ( 2 + offset ) + ' \033 [95m ' + ' ~ ' * self . char + ' ^ ' + ' ~ ' * ( self . length - 1 - self . char ) + ' \033 [0m ' if ( line ) else ' ' ) + \
( f " \n \n \033 [1;95mCaused by: \033 [0m \n { self . __cause__ if ( isinstance ( self . __cause__ , ( SlSyntaxError , SlValidationError , SlCompilationError ) ) ) else ' ' + str ( ) . join ( traceback . format_exception ( type ( self . __cause__ ) , self . __cause__ , self . __cause__ . __traceback__ ) ) } " if ( self . __cause__ is not None ) else ' ' )
2019-08-28 21:24:54 +03:00
@property
def at ( self ) :
2024-01-11 02:55:07 +03:00
return f " at line { self . lineno } , offset { self . offset } " if ( self . offset > = 0 ) else f " at the end of line { self . lineno } "
2019-08-28 21:24:54 +03:00
class SlSyntaxExpectedError ( SlSyntaxError ) :
2024-01-11 02:55:07 +03:00
expected : . . .
found : . . .
2019-08-28 21:24:54 +03:00
def __init__ ( self , expected = ' nothing ' , found = ' nothing ' , * , lineno = None , offset = None , length = 0 , scope = None ) :
2020-03-18 07:00:12 +03:00
assert ( expected != found )
2019-08-28 21:24:54 +03:00
if ( not isinstance ( found , str ) ) : lineno , offset , length , found = found . lineno , found . offset , found . length , found . typename if ( hasattr ( found , ' typename ' ) ) else found
2020-03-18 07:00:12 +03:00
assert ( lineno is not None and offset is not None )
2019-08-28 21:24:54 +03:00
super ( ) . __init__ ( f " Expected { expected . lower ( ) } , \n { ' ' * ( len ( scope ) + 6 if ( scope is not None ) else 3 ) } found { found . lower ( ) } " , lineno = lineno , offset = offset , length = length , scope = scope )
self . expected , self . found = expected , found
class SlSyntaxExpectedNothingError ( SlSyntaxExpectedError ) :
2020-03-18 07:00:12 +03:00
def __init__ ( self , found , * * kwargs ) :
2019-08-28 21:24:54 +03:00
super ( ) . __init__ ( found = found , * * kwargs )
2020-03-18 07:00:12 +03:00
class SlSyntaxExpectedMoreTokensError ( SlSyntaxExpectedError ) :
def __init__ ( self , for_ , * , offset = - 1 , * * kwargs ) :
assert ( offset < 0 )
super ( ) . __init__ ( expected = f " More tokens for { for_ } " , offset = offset , * * kwargs )
2019-08-28 21:24:54 +03:00
class SlSyntaxMultiExpectedError ( SlSyntaxExpectedError ) :
2024-01-11 02:55:07 +03:00
sl : . . .
errlist : . . .
2019-08-28 21:24:54 +03:00
2020-03-18 07:00:12 +03:00
def __init__ ( self , expected , found , * , scope = None , errlist = None , * * kwargs ) :
self . errlist = errlist
2019-08-28 21:24:54 +03:00
self . sl = len ( scope ) + 6 if ( scope is not None ) else 0
2020-03-18 07:00:12 +03:00
super ( ) . __init__ (
2024-01-11 02:55:07 +03:00
expected = S ( ' , \n ' + ' ' * ( self . sl + 9 ) ) . join ( Stuple ( ( i . expected + f " at { f ' offset { i . offset } ' if ( i . offset > = 0 ) else ' the end of line ' } " if ( not isinstance ( i , SlSyntaxMultiExpectedError ) ) else i . expected ) + ( f ' (for { i . usage } ) ' if ( i . usage ) else ' ' ) for i in expected ) . strip ( ' nothing ' ) . uniquize ( str . casefold ) , last = ' , \n ' + ' ' * ( self . sl + 6 ) + ' or ' ) or ' nothing ' ,
found = S ( ' , \n ' + ' ' * ( self . sl + 6 ) ) . join ( Stuple ( i . found + f " at { f ' offset { i . offset } ' if ( i . offset > = 0 ) else ' the end of line ' } " if ( not isinstance ( i , SlSyntaxMultiExpectedError ) ) else i . expected for i in found ) . strip ( ' nothing ' ) . uniquize ( str . casefold ) , last = ' , \n ' + ' ' * ( self . sl + 2 ) + ' and ' ) or ' nothing ' ,
#{i.offset+1 if (i.offset < -1) else ''}
2020-03-18 07:00:12 +03:00
scope = scope ,
* * kwargs
)
2019-08-28 21:24:54 +03:00
@property
def at ( self ) :
return f " \n { ' ' * self . sl } at line { self . lineno } "
2020-03-18 07:00:12 +03:00
@classmethod
def from_list ( cls , err , scope = None , * * kwargs ) :
sl = len ( scope ) + 6 if ( scope is not None ) else 0
for i in err :
if ( isinstance ( i , SlSyntaxMultiExpectedError ) ) :
loff = 0
i . expected = ' ' * loff + S ( i . expected ) . indent ( sl + loff ) . lstrip ( )
i . found = ' ' * loff + S ( i . found ) . indent ( sl + loff ) . lstrip ( )
2024-01-11 02:55:07 +03:00
lineno = max ( err , key = operator . attrgetter ( ' lineno ' ) ) . lineno
2020-03-18 07:00:12 +03:00
return cls (
expected = sorted ( err , key = operator . attrgetter ( ' expected ' ) , reverse = True ) ,
found = sorted ( err , key = operator . attrgetter ( ' offset ' ) ) ,
2024-01-11 02:55:07 +03:00
lineno = lineno ,
offset = max ( ( i for i in err if i . lineno == lineno ) , key = lambda x : x . offset if ( x . offset > = 0 ) else inf ) . offset ,
2020-03-18 07:00:12 +03:00
length = min ( i . length for i in err if i . length ) if ( not any ( i . offset < 0 for i in err ) ) else 0 ,
scope = scope ,
errlist = list ( err ) ,
* * kwargs
)
# by Sdore, 2020