nutools/lib/nulib/python/nulib/uio.py

131 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
"""Des fonctions pour gérer les entrées/sorties en unicode.
"""
__all__ = ('UnicodeIO',
'EditorIO', 'Utf8IO', 'Latin1IO', 'Latin9IO',
'defaultIO', '_u', '_s', 'uinput', 'uprint',
'_son', '_uon',
'latin1compat',
)
import re
from sys import stdout, stderr
from types import StringType, UnicodeType, StringTypes
from .base import make_prop, isstr
from .encoding import LATIN1, LATIN9, UTF8
from .env import get_input_encoding, get_output_encoding, get_editor_encoding
OUTS = {'stdout': stdout, 'out': stdout,
'stderr': stderr, 'err': stderr,
}
class UnicodeIO(object):
_encoding, encoding, get_encoding = make_prop('_encoding', None, setter=False)
_errors, errors, get_errors = make_prop('_errors', 'strict', setter=False)
def __init__(self, encoding=None, errors=None):
if encoding is not None: self._encoding = encoding
if errors is not None: self._errors = errors
def _get_input_encoding(self):
return get_input_encoding()
def u(self, u, encoding=None, errors=None):
"""Transformer u en unicode avec l'encoding spécifié.
"""
if encoding is None: encoding = self._encoding
if encoding is None: encoding = self._get_input_encoding()
if errors is None: errors = self._errors
if type(u) not in StringTypes: u = unicode(str(u), encoding, errors)
elif type(u) is not UnicodeType: u = unicode(u, encoding, errors)
return u
def _get_output_encoding(self):
return get_output_encoding()
def s(self, s, encoding=None):
"""Transfomer s en chaine d'octets dans l'encoding spécifié.
"""
if encoding is None: encoding = self._encoding
if encoding is None: encoding = self._get_output_encoding()
if type(s) is UnicodeType: s = s.encode(encoding)
elif type(s) is not StringType: s = str(s)
return s
def uinput(self, errors='ignore', encoding=None):
"""Lire depuis stdin et décoder le résultat avec input_encoding.
"""
return self.u(raw_input(), encoding, errors)
def uprint(self, s, nl=True, flush=None, encoding=None, out=None):
"""Afficher une chaine unicode en l'encodant avec output_encoding.
"""
if flush is None: flush = nl
if out is None: out = stdout
elif isstr(out): out = OUTS[out.lower()]
out.write(self.s(s, encoding))
if nl: out.write("\n")
if flush: out.flush()
class EditorIO(UnicodeIO):
def _get_input_encoding(self):
return get_editor_encoding()
def _get_output_encoding(self):
return get_editor_encoding()
class Utf8IO(UnicodeIO):
def __init__(self, errors=None):
UnicodeIO.__init__(self, UTF8, errors)
class Latin1IO(UnicodeIO):
def __init__(self, errors=None):
UnicodeIO.__init__(self, LATIN1, errors)
class Latin9IO(UnicodeIO):
def __init__(self, errors=None):
UnicodeIO.__init__(self, LATIN9, errors)
defaultIO = UnicodeIO()
def set_defaultIO(defio):
global defaultIO
defaultIO = defio
def _u(u, encoding=None, errors=None): return defaultIO.u(u, encoding, errors)
def _s(s, encoding=None): return defaultIO.s(s, encoding)
def uinput(errors='ignore', encoding=None): return defaultIO.uinput(errors, encoding)
def uprint(s, nl=True, flush=None, encoding=None, out=None): defaultIO.uprint(s, nl, flush, encoding, out)
def _son(s, encoding=None):
if s is None: return None
else: return _s(s, encoding)
def _uon(u, encoding=None, errors=None):
if u is None: return None
else: return _u(u, encoding, errors)
RE_SQUOTE = re.compile(ur'[]')
RE_DQUOTE = re.compile(ur'[«»“”]')
RE_SPACE = re.compile(r'[\xC2\xA0\xE2\x80\x87\xE2\x80\xAF\xE2\x81\xA0]'.decode('utf-8'))
RE_LOE = re.compile(ur'[œ]')
RE_UOE = re.compile(ur'[Œ]')
RE_LAE = re.compile(ur'[æ]')
RE_UAE = re.compile(ur'[Æ]')
def latin1compat(u):
"""Remplacer dans la chaine u des caractères unicode par des équivalents qui
peuvent être transformés en latin1.
"""
u = _u(u)
u = RE_SQUOTE.sub("'", u)
u = RE_DQUOTE.sub('"', u)
u = RE_SPACE.sub(' ', u)
u = RE_LOE.sub('oe', u)
u = RE_UOE.sub('OE', u)
u = RE_LAE.sub('ae', u)
u = RE_UAE.sub('AE', u)
return u