nutools/lib/nulib/python/deploydb/lexer.py

181 lines
5.5 KiB
Python
Raw Permalink Normal View History

2018-04-26 23:19:17 +04:00
# -*- coding: utf-8 mode: python -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
"""Lexer pour un fichier de configuration
Syntaxe:
~~~
# comment
object id var=value
continuation="line starting with a space"
-link otherid
-link objectref1 predicate -link objectref2
~~~
"""
__all__ = (
'Lexer',
)
import re
class EOL(object):
"""fin de ligne"""
__repr__ = __string__ = lambda self: 'EOL'
EOL = EOL()
class CONTL(object):
"""ligne de continuation"""
__repr__ = __string__ = lambda self: 'CONTL'
CONTL = CONTL()
class EOF(object):
"""fin de fichier"""
__repr__ = __string__ = lambda self: 'EOF'
EOF = EOF()
class Lexer(object):
file = None
lexems = None
_inf = None
_lcount = None
_line = None
def __init__(self, file, parse=True):
self.file = file
if parse: self.parse()
def next_line(self):
line = self._inf.readline()
if line == '': return None
if line.endswith("\r\n"): line = line[:-2]
elif line.endswith("\n"): line = line[:-1]
elif line.endswith("\r"): line = line[:-1]
self._lcount += 1
self._line = line
return line
def is_empty(self): return self._line == ''
def isa_comment(self): return self._line[:1] == '#'
def isa_squote(self): return self._line[:1] == "'"
def isa_dquote(self): return self._line[:1] == '"'
RE_SPACE = re.compile(r'\s+')
RE_COMMENT = re.compile(r'#.*')
def parse_ws(self):
# c'est une ligne de continuation si elle commence par des espaces et ne
# rencontre pas de commentaire
contl = False
mo = self.RE_SPACE.match(self._line)
if mo is not None:
self._line = self._line[mo.end(0):]
contl = True
mo = self.RE_COMMENT.match(self._line)
if mo is not None:
self._line = self._line[mo.end(0):]
contl = False
return contl
def isa_space(self): return self.RE_SPACE.match(self._line) is not None
def isa_comment(self): return self.RE_COMMENT.match(self._line) is not None
RE_SQUOTE = re.compile(r"'")
def parse_sstring(self):
slos = self._lcount
lexem = ''
self._line = self._line[1:]
mo = self.RE_SQUOTE.search(self._line)
while mo is None:
lexem += self._line
if self.next_line() is None:
raise ValueError("unterminated quoted string starting at line %i" % slos)
lexem += "\n"
mo = self.RE_SQUOTE.search(self._line)
lexem += self._line[0:mo.start(0)]
self._line = self._line[mo.end(0):]
return lexem
RE_DQUOTE = re.compile(r'"')
def parse_dstring(self):
slos = self._lcount
lexem = ''
self._line = self._line[1:]
mo = self.RE_DQUOTE.search(self._line)
while mo is None:
lexem += self._line
if self.next_line() is None:
raise ValueError("unterminated double-quoted string starting at line %i" % slos)
lexem += "\n"
mo = self.RE_DQUOTE.search(self._line)
lexem += self._line[0:mo.start(0)]
self._line = self._line[mo.end(0):]
lexem = lexem.replace('\\"', '"')
lexem = lexem.replace("\\'", "'")
lexem = lexem.replace('\\\\', '\\')
return lexem
RE_EOS = re.compile(r'''\s|(?<!\\)['"]''')
def parse_string(self):
mo = self.RE_EOS.search(self._line)
if mo is not None:
lexem = self._line[0:mo.start(0)]
self._line = self._line[mo.start(0):]
else:
lexem = self._line
self._line = ''
lexem = lexem.replace('\\"', '"')
lexem = lexem.replace("\\'", "'")
lexem = lexem.replace('\\\\', '\\')
return lexem
def parse(self):
if self.lexems is not None: return self.lexems
lexems = self.lexems = []
self._inf = open(self.file, 'rb')
self._lcount = 0
self._line = ''
try:
SOL = False
while True:
# Ignorer lignes vides et commentaires
contl = self.parse_ws()
stop = False
while self.is_empty():
if SOL:
lexems.append(EOL)
SOL = False
if self.next_line() is None:
stop = True
break
contl = self.parse_ws()
if stop: break
if contl and not SOL: lexems.append(CONTL)
SOL = True
# Construire une chaine
lexem = ''
while True:
if self.is_empty(): break
elif self.isa_space(): break
if self.isa_squote(): lexem += self.parse_sstring()
elif self.isa_dquote(): lexem += self.parse_dstring()
else: lexem += self.parse_string()
lexems.append(lexem)
lexems.append(EOF)
return lexems
finally:
self._inf.close()
self._inf = None
return lexems
def get_predicates(self):
predicates = []
predicate = []
for lexem in self.parse():
if lexem is EOF: break
elif lexem is EOL:
predicates.append(predicate)
predicate = []
elif lexem is CONTL:
if predicates: predicate = predicates.pop()
else:
predicate.append(lexem)
return predicates