# -*- coding: utf-8 mode: python -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8

"""Lexer pour un fichier de configuration

Syntaxe:
~~~
# comment
object id var=value
  continuation="line starting with a space"
  -link otherid

-link objectref1 predicate -link objectref2
~~~
"""

__all__ = (
    'Lexer',
)

import re

class EOL(object):
    """fin de ligne"""
    __repr__ = __string__ = lambda self: 'EOL'
EOL = EOL()
class CONTL(object):
    """ligne de continuation"""
    __repr__ = __string__ = lambda self: 'CONTL'
CONTL = CONTL()
class EOF(object):
    """fin de fichier"""
    __repr__ = __string__ = lambda self: 'EOF'
EOF = EOF()

class Lexer(object):
    file = None
    lexems = None
    _inf = None
    _lcount = None
    _line = None

    def __init__(self, file, parse=True):
        self.file = file
        if parse: self.parse()

    def next_line(self):
        line = self._inf.readline()
        if line == '': return None
        if line.endswith("\r\n"): line = line[:-2]
        elif line.endswith("\n"): line = line[:-1]
        elif line.endswith("\r"): line = line[:-1]
        self._lcount += 1
        self._line = line
        return line

    def is_empty(self): return self._line == ''
    def isa_comment(self): return self._line[:1] == '#'
    def isa_squote(self): return self._line[:1] == "'"
    def isa_dquote(self): return self._line[:1] == '"'

    RE_SPACE = re.compile(r'\s+')
    RE_COMMENT = re.compile(r'#.*')
    def parse_ws(self):
        # c'est une ligne de continuation si elle commence par des espaces et ne
        # rencontre pas de commentaire
        contl = False
        mo = self.RE_SPACE.match(self._line)
        if mo is not None:
            self._line = self._line[mo.end(0):]
            contl = True
        mo = self.RE_COMMENT.match(self._line)
        if mo is not None:
            self._line = self._line[mo.end(0):]
            contl = False
        return contl
    def isa_space(self): return self.RE_SPACE.match(self._line) is not None
    def isa_comment(self): return self.RE_COMMENT.match(self._line) is not None

    RE_SQUOTE = re.compile(r"'")
    def parse_sstring(self):
        slos = self._lcount
        lexem = ''
        self._line = self._line[1:]
        mo = self.RE_SQUOTE.search(self._line)
        while mo is None:
            lexem += self._line
            if self.next_line() is None:
                raise ValueError("unterminated quoted string starting at line %i" % slos)
            lexem += "\n"
            mo = self.RE_SQUOTE.search(self._line)
        lexem += self._line[0:mo.start(0)]
        self._line = self._line[mo.end(0):]
        return lexem

    RE_DQUOTE = re.compile(r'"')
    def parse_dstring(self):
        slos = self._lcount
        lexem = ''
        self._line = self._line[1:]
        mo = self.RE_DQUOTE.search(self._line)
        while mo is None:
            lexem += self._line
            if self.next_line() is None:
                raise ValueError("unterminated double-quoted string starting at line %i" % slos)
            lexem += "\n"
            mo = self.RE_DQUOTE.search(self._line)
        lexem += self._line[0:mo.start(0)]
        self._line = self._line[mo.end(0):]
        lexem = lexem.replace('\\"', '"')
        lexem = lexem.replace("\\'", "'")
        lexem = lexem.replace('\\\\', '\\')
        return lexem

    RE_EOS = re.compile(r'''\s|(?<!\\)['"]''')
    def parse_string(self):
        mo = self.RE_EOS.search(self._line)
        if mo is not None:
            lexem = self._line[0:mo.start(0)]
            self._line = self._line[mo.start(0):]
        else:
            lexem = self._line
            self._line = ''
        lexem = lexem.replace('\\"', '"')
        lexem = lexem.replace("\\'", "'")
        lexem = lexem.replace('\\\\', '\\')
        return lexem

    def parse(self):
        if self.lexems is not None: return self.lexems

        lexems = self.lexems = []
        self._inf = open(self.file, 'rb')
        self._lcount = 0
        self._line = ''
        try:
            SOL = False
            while True:
                # Ignorer lignes vides et commentaires
                contl = self.parse_ws()
                stop = False
                while self.is_empty():
                    if SOL:
                        lexems.append(EOL)
                        SOL = False
                    if self.next_line() is None:
                        stop = True
                        break
                    contl = self.parse_ws()
                if stop: break
                if contl and not SOL: lexems.append(CONTL)
                SOL = True
                # Construire une chaine
                lexem = ''
                while True:
                    if self.is_empty(): break
                    elif self.isa_space(): break
                    if self.isa_squote(): lexem += self.parse_sstring()
                    elif self.isa_dquote(): lexem += self.parse_dstring()
                    else: lexem += self.parse_string()
                lexems.append(lexem)
            lexems.append(EOF)
            return lexems
        finally:
            self._inf.close()
            self._inf = None
        return lexems

    def get_predicates(self):
        predicates = []
        predicate = []
        for lexem in self.parse():
            if lexem is EOF: break
            elif lexem is EOL:
                predicates.append(predicate)
                predicate = []
            elif lexem is CONTL:
                if predicates: predicate = predicates.pop()
            else:
                predicate.append(lexem)
        return predicates