# -*- coding: utf-8 mode: python -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8 """Lexer pour un fichier de configuration Syntaxe: ~~~ # comment object id var=value continuation="line starting with a space" -link otherid -link objectref1 predicate -link objectref2 ~~~ """ __all__ = ( 'Lexer', ) import re class EOL(object): """fin de ligne""" __repr__ = __string__ = lambda self: 'EOL' EOL = EOL() class CONTL(object): """ligne de continuation""" __repr__ = __string__ = lambda self: 'CONTL' CONTL = CONTL() class EOF(object): """fin de fichier""" __repr__ = __string__ = lambda self: 'EOF' EOF = EOF() class Lexer(object): file = None lexems = None _inf = None _lcount = None _line = None def __init__(self, file, parse=True): self.file = file if parse: self.parse() def next_line(self): line = self._inf.readline() if line == '': return None if line.endswith("\r\n"): line = line[:-2] elif line.endswith("\n"): line = line[:-1] elif line.endswith("\r"): line = line[:-1] self._lcount += 1 self._line = line return line def is_empty(self): return self._line == '' def isa_comment(self): return self._line[:1] == '#' def isa_squote(self): return self._line[:1] == "'" def isa_dquote(self): return self._line[:1] == '"' RE_SPACE = re.compile(r'\s+') RE_COMMENT = re.compile(r'#.*') def parse_ws(self): # c'est une ligne de continuation si elle commence par des espaces et ne # rencontre pas de commentaire contl = False mo = self.RE_SPACE.match(self._line) if mo is not None: self._line = self._line[mo.end(0):] contl = True mo = self.RE_COMMENT.match(self._line) if mo is not None: self._line = self._line[mo.end(0):] contl = False return contl def isa_space(self): return self.RE_SPACE.match(self._line) is not None def isa_comment(self): return self.RE_COMMENT.match(self._line) is not None RE_SQUOTE = re.compile(r"'") def parse_sstring(self): slos = self._lcount lexem = '' self._line = self._line[1:] mo = self.RE_SQUOTE.search(self._line) while mo is None: lexem += self._line if self.next_line() is None: raise ValueError("unterminated quoted string starting at line %i" % slos) lexem += "\n" mo = self.RE_SQUOTE.search(self._line) lexem += self._line[0:mo.start(0)] self._line = self._line[mo.end(0):] return lexem RE_DQUOTE = re.compile(r'"') def parse_dstring(self): slos = self._lcount lexem = '' self._line = self._line[1:] mo = self.RE_DQUOTE.search(self._line) while mo is None: lexem += self._line if self.next_line() is None: raise ValueError("unterminated double-quoted string starting at line %i" % slos) lexem += "\n" mo = self.RE_DQUOTE.search(self._line) lexem += self._line[0:mo.start(0)] self._line = self._line[mo.end(0):] lexem = lexem.replace('\\"', '"') lexem = lexem.replace("\\'", "'") lexem = lexem.replace('\\\\', '\\') return lexem RE_EOS = re.compile(r'''\s|(?<!\\)['"]''') def parse_string(self): mo = self.RE_EOS.search(self._line) if mo is not None: lexem = self._line[0:mo.start(0)] self._line = self._line[mo.start(0):] else: lexem = self._line self._line = '' lexem = lexem.replace('\\"', '"') lexem = lexem.replace("\\'", "'") lexem = lexem.replace('\\\\', '\\') return lexem def parse(self): if self.lexems is not None: return self.lexems lexems = self.lexems = [] self._inf = open(self.file, 'rb') self._lcount = 0 self._line = '' try: SOL = False while True: # Ignorer lignes vides et commentaires contl = self.parse_ws() stop = False while self.is_empty(): if SOL: lexems.append(EOL) SOL = False if self.next_line() is None: stop = True break contl = self.parse_ws() if stop: break if contl and not SOL: lexems.append(CONTL) SOL = True # Construire une chaine lexem = '' while True: if self.is_empty(): break elif self.isa_space(): break if self.isa_squote(): lexem += self.parse_sstring() elif self.isa_dquote(): lexem += self.parse_dstring() else: lexem += self.parse_string() lexems.append(lexem) lexems.append(EOF) return lexems finally: self._inf.close() self._inf = None return lexems def get_predicates(self): predicates = [] predicate = [] for lexem in self.parse(): if lexem is EOF: break elif lexem is EOL: predicates.append(predicate) predicate = [] elif lexem is CONTL: if predicates: predicate = predicates.pop() else: predicate.append(lexem) return predicates