131 lines
4.3 KiB
Python
131 lines
4.3 KiB
Python
# -*- coding: utf-8 -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
|
||
|
||
"""Des fonctions pour gérer les entrées/sorties en unicode.
|
||
"""
|
||
|
||
__all__ = ('UnicodeIO',
|
||
'EditorIO', 'Utf8IO', 'Latin1IO', 'Latin9IO',
|
||
'defaultIO', '_u', '_s', 'uinput', 'uprint',
|
||
'_son', '_uon',
|
||
'latin1compat',
|
||
)
|
||
|
||
import re
|
||
from sys import stdout, stderr
|
||
from types import StringType, UnicodeType, StringTypes
|
||
|
||
from .base import make_prop, isstr
|
||
from .encoding import LATIN1, LATIN9, UTF8
|
||
from .env import get_input_encoding, get_output_encoding, get_editor_encoding
|
||
|
||
OUTS = {'stdout': stdout, 'out': stdout,
|
||
'stderr': stderr, 'err': stderr,
|
||
}
|
||
|
||
class UnicodeIO(object):
|
||
_encoding, encoding, get_encoding = make_prop('_encoding', None, setter=False)
|
||
_errors, errors, get_errors = make_prop('_errors', 'strict', setter=False)
|
||
|
||
def __init__(self, encoding=None, errors=None):
|
||
if encoding is not None: self._encoding = encoding
|
||
if errors is not None: self._errors = errors
|
||
|
||
def _get_input_encoding(self):
|
||
return get_input_encoding()
|
||
|
||
def u(self, u, encoding=None, errors=None):
|
||
"""Transformer u en unicode avec l'encoding spécifié.
|
||
"""
|
||
if encoding is None: encoding = self._encoding
|
||
if encoding is None: encoding = self._get_input_encoding()
|
||
if errors is None: errors = self._errors
|
||
|
||
if type(u) not in StringTypes: u = unicode(str(u), encoding, errors)
|
||
elif type(u) is not UnicodeType: u = unicode(u, encoding, errors)
|
||
return u
|
||
|
||
def _get_output_encoding(self):
|
||
return get_output_encoding()
|
||
|
||
def s(self, s, encoding=None):
|
||
"""Transfomer s en chaine d'octets dans l'encoding spécifié.
|
||
"""
|
||
if encoding is None: encoding = self._encoding
|
||
if encoding is None: encoding = self._get_output_encoding()
|
||
|
||
if type(s) is UnicodeType: s = s.encode(encoding)
|
||
elif type(s) is not StringType: s = str(s)
|
||
return s
|
||
|
||
def uinput(self, errors='ignore', encoding=None):
|
||
"""Lire depuis stdin et décoder le résultat avec input_encoding.
|
||
"""
|
||
return self.u(raw_input(), encoding, errors)
|
||
|
||
def uprint(self, s, nl=True, flush=None, encoding=None, out=None):
|
||
"""Afficher une chaine unicode en l'encodant avec output_encoding.
|
||
"""
|
||
if flush is None: flush = nl
|
||
if out is None: out = stdout
|
||
elif isstr(out): out = OUTS[out.lower()]
|
||
|
||
out.write(self.s(s, encoding))
|
||
if nl: out.write("\n")
|
||
if flush: out.flush()
|
||
|
||
class EditorIO(UnicodeIO):
|
||
def _get_input_encoding(self):
|
||
return get_editor_encoding()
|
||
|
||
def _get_output_encoding(self):
|
||
return get_editor_encoding()
|
||
|
||
class Utf8IO(UnicodeIO):
|
||
def __init__(self, errors=None):
|
||
UnicodeIO.__init__(self, UTF8, errors)
|
||
|
||
class Latin1IO(UnicodeIO):
|
||
def __init__(self, errors=None):
|
||
UnicodeIO.__init__(self, LATIN1, errors)
|
||
|
||
class Latin9IO(UnicodeIO):
|
||
def __init__(self, errors=None):
|
||
UnicodeIO.__init__(self, LATIN9, errors)
|
||
|
||
defaultIO = UnicodeIO()
|
||
def set_defaultIO(defio):
|
||
global defaultIO
|
||
defaultIO = defio
|
||
def _u(u, encoding=None, errors=None): return defaultIO.u(u, encoding, errors)
|
||
def _s(s, encoding=None): return defaultIO.s(s, encoding)
|
||
def uinput(errors='ignore', encoding=None): return defaultIO.uinput(errors, encoding)
|
||
def uprint(s, nl=True, flush=None, encoding=None, out=None): defaultIO.uprint(s, nl, flush, encoding, out)
|
||
|
||
def _son(s, encoding=None):
|
||
if s is None: return None
|
||
else: return _s(s, encoding)
|
||
def _uon(u, encoding=None, errors=None):
|
||
if u is None: return None
|
||
else: return _u(u, encoding, errors)
|
||
|
||
RE_SQUOTE = re.compile(ur'[‘’]')
|
||
RE_DQUOTE = re.compile(ur'[«»“”]')
|
||
RE_SPACE = re.compile(r'[\xC2\xA0\xE2\x80\x87\xE2\x80\xAF\xE2\x81\xA0]'.decode('utf-8'))
|
||
RE_LOE = re.compile(ur'[œ]')
|
||
RE_UOE = re.compile(ur'[Œ]')
|
||
RE_LAE = re.compile(ur'[æ]')
|
||
RE_UAE = re.compile(ur'[Æ]')
|
||
def latin1compat(u):
|
||
"""Remplacer dans la chaine u des caractères unicode par des équivalents qui
|
||
peuvent être transformés en latin1.
|
||
"""
|
||
u = _u(u)
|
||
u = RE_SQUOTE.sub("'", u)
|
||
u = RE_DQUOTE.sub('"', u)
|
||
u = RE_SPACE.sub(' ', u)
|
||
u = RE_LOE.sub('oe', u)
|
||
u = RE_UOE.sub('OE', u)
|
||
u = RE_LAE.sub('ae', u)
|
||
u = RE_UAE.sub('AE', u)
|
||
return u
|