181 lines
5.5 KiB
Python
181 lines
5.5 KiB
Python
|
# -*- coding: utf-8 mode: python -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
|
||
|
|
||
|
"""Lexer pour un fichier de configuration
|
||
|
|
||
|
Syntaxe:
|
||
|
~~~
|
||
|
# comment
|
||
|
object id var=value
|
||
|
continuation="line starting with a space"
|
||
|
-link otherid
|
||
|
|
||
|
-link objectref1 predicate -link objectref2
|
||
|
~~~
|
||
|
"""
|
||
|
|
||
|
__all__ = (
|
||
|
'Lexer',
|
||
|
)
|
||
|
|
||
|
import re
|
||
|
|
||
|
class EOL(object):
|
||
|
"""fin de ligne"""
|
||
|
__repr__ = __string__ = lambda self: 'EOL'
|
||
|
EOL = EOL()
|
||
|
class CONTL(object):
|
||
|
"""ligne de continuation"""
|
||
|
__repr__ = __string__ = lambda self: 'CONTL'
|
||
|
CONTL = CONTL()
|
||
|
class EOF(object):
|
||
|
"""fin de fichier"""
|
||
|
__repr__ = __string__ = lambda self: 'EOF'
|
||
|
EOF = EOF()
|
||
|
|
||
|
class Lexer(object):
|
||
|
file = None
|
||
|
lexems = None
|
||
|
_inf = None
|
||
|
_lcount = None
|
||
|
_line = None
|
||
|
|
||
|
def __init__(self, file, parse=True):
|
||
|
self.file = file
|
||
|
if parse: self.parse()
|
||
|
|
||
|
def next_line(self):
|
||
|
line = self._inf.readline()
|
||
|
if line == '': return None
|
||
|
if line.endswith("\r\n"): line = line[:-2]
|
||
|
elif line.endswith("\n"): line = line[:-1]
|
||
|
elif line.endswith("\r"): line = line[:-1]
|
||
|
self._lcount += 1
|
||
|
self._line = line
|
||
|
return line
|
||
|
|
||
|
def is_empty(self): return self._line == ''
|
||
|
def isa_comment(self): return self._line[:1] == '#'
|
||
|
def isa_squote(self): return self._line[:1] == "'"
|
||
|
def isa_dquote(self): return self._line[:1] == '"'
|
||
|
|
||
|
RE_SPACE = re.compile(r'\s+')
|
||
|
RE_COMMENT = re.compile(r'#.*')
|
||
|
def parse_ws(self):
|
||
|
# c'est une ligne de continuation si elle commence par des espaces et ne
|
||
|
# rencontre pas de commentaire
|
||
|
contl = False
|
||
|
mo = self.RE_SPACE.match(self._line)
|
||
|
if mo is not None:
|
||
|
self._line = self._line[mo.end(0):]
|
||
|
contl = True
|
||
|
mo = self.RE_COMMENT.match(self._line)
|
||
|
if mo is not None:
|
||
|
self._line = self._line[mo.end(0):]
|
||
|
contl = False
|
||
|
return contl
|
||
|
def isa_space(self): return self.RE_SPACE.match(self._line) is not None
|
||
|
def isa_comment(self): return self.RE_COMMENT.match(self._line) is not None
|
||
|
|
||
|
RE_SQUOTE = re.compile(r"'")
|
||
|
def parse_sstring(self):
|
||
|
slos = self._lcount
|
||
|
lexem = ''
|
||
|
self._line = self._line[1:]
|
||
|
mo = self.RE_SQUOTE.search(self._line)
|
||
|
while mo is None:
|
||
|
lexem += self._line
|
||
|
if self.next_line() is None:
|
||
|
raise ValueError("unterminated quoted string starting at line %i" % slos)
|
||
|
lexem += "\n"
|
||
|
mo = self.RE_SQUOTE.search(self._line)
|
||
|
lexem += self._line[0:mo.start(0)]
|
||
|
self._line = self._line[mo.end(0):]
|
||
|
return lexem
|
||
|
|
||
|
RE_DQUOTE = re.compile(r'"')
|
||
|
def parse_dstring(self):
|
||
|
slos = self._lcount
|
||
|
lexem = ''
|
||
|
self._line = self._line[1:]
|
||
|
mo = self.RE_DQUOTE.search(self._line)
|
||
|
while mo is None:
|
||
|
lexem += self._line
|
||
|
if self.next_line() is None:
|
||
|
raise ValueError("unterminated double-quoted string starting at line %i" % slos)
|
||
|
lexem += "\n"
|
||
|
mo = self.RE_DQUOTE.search(self._line)
|
||
|
lexem += self._line[0:mo.start(0)]
|
||
|
self._line = self._line[mo.end(0):]
|
||
|
lexem = lexem.replace('\\"', '"')
|
||
|
lexem = lexem.replace("\\'", "'")
|
||
|
lexem = lexem.replace('\\\\', '\\')
|
||
|
return lexem
|
||
|
|
||
|
RE_EOS = re.compile(r'''\s|(?<!\\)['"]''')
|
||
|
def parse_string(self):
|
||
|
mo = self.RE_EOS.search(self._line)
|
||
|
if mo is not None:
|
||
|
lexem = self._line[0:mo.start(0)]
|
||
|
self._line = self._line[mo.start(0):]
|
||
|
else:
|
||
|
lexem = self._line
|
||
|
self._line = ''
|
||
|
lexem = lexem.replace('\\"', '"')
|
||
|
lexem = lexem.replace("\\'", "'")
|
||
|
lexem = lexem.replace('\\\\', '\\')
|
||
|
return lexem
|
||
|
|
||
|
def parse(self):
|
||
|
if self.lexems is not None: return self.lexems
|
||
|
|
||
|
lexems = self.lexems = []
|
||
|
self._inf = open(self.file, 'rb')
|
||
|
self._lcount = 0
|
||
|
self._line = ''
|
||
|
try:
|
||
|
SOL = False
|
||
|
while True:
|
||
|
# Ignorer lignes vides et commentaires
|
||
|
contl = self.parse_ws()
|
||
|
stop = False
|
||
|
while self.is_empty():
|
||
|
if SOL:
|
||
|
lexems.append(EOL)
|
||
|
SOL = False
|
||
|
if self.next_line() is None:
|
||
|
stop = True
|
||
|
break
|
||
|
contl = self.parse_ws()
|
||
|
if stop: break
|
||
|
if contl and not SOL: lexems.append(CONTL)
|
||
|
SOL = True
|
||
|
# Construire une chaine
|
||
|
lexem = ''
|
||
|
while True:
|
||
|
if self.is_empty(): break
|
||
|
elif self.isa_space(): break
|
||
|
if self.isa_squote(): lexem += self.parse_sstring()
|
||
|
elif self.isa_dquote(): lexem += self.parse_dstring()
|
||
|
else: lexem += self.parse_string()
|
||
|
lexems.append(lexem)
|
||
|
lexems.append(EOF)
|
||
|
return lexems
|
||
|
finally:
|
||
|
self._inf.close()
|
||
|
self._inf = None
|
||
|
return lexems
|
||
|
|
||
|
def get_predicates(self):
|
||
|
predicates = []
|
||
|
predicate = []
|
||
|
for lexem in self.parse():
|
||
|
if lexem is EOF: break
|
||
|
elif lexem is EOL:
|
||
|
predicates.append(predicate)
|
||
|
predicate = []
|
||
|
elif lexem is CONTL:
|
||
|
if predicates: predicate = predicates.pop()
|
||
|
else:
|
||
|
predicate.append(lexem)
|
||
|
return predicates
|