diff --git a/lib/pyulib/src/ulib/base/uio.py b/lib/pyulib/src/ulib/base/uio.py index 41c6f0f..d48ead0 100644 --- a/lib/pyulib/src/ulib/base/uio.py +++ b/lib/pyulib/src/ulib/base/uio.py @@ -7,8 +7,11 @@ import i_need_py23 __all__ = ('UnicodeIO', 'EditorIO', 'Utf8IO', 'Latin1IO', 'Latin9IO', 'defaultIO', '_u', '_s', 'uinput', 'uprint', + '_son', '_uon', + 'latin1compat', ) +import re from sys import stdout, stderr from types import StringType, UnicodeType, StringTypes @@ -98,3 +101,30 @@ def _u(u, encoding=None, errors=None): return defaultIO.u(u, encoding, errors) def _s(s, encoding=None): return defaultIO.s(s, encoding) def uinput(errors='ignore', encoding=None): return defaultIO.uinput(errors, encoding) def uprint(s, nl=True, flush=None, encoding=None, out=None): defaultIO.uprint(s, nl, flush, encoding, out) + +def _son(s, encoding=None): + if s is None: return None + else: return _s(s, encoding) +def _uon(u, encoding=None, errors=None): + if u is None: return None + else: return _u(u, encoding, errors) + +RE_SQUOTE = re.compile(ur'[‘’]') +RE_DQUOTE = re.compile(ur'[«»“”]') +RE_SPACE = re.compile(r'[\xC2\xA0\xE2\x80\x87\xE2\x80\xAF\xE2\x81\xA0]'.decode('utf-8')) +RE_LOE = re.compile(ur'[œ]') +RE_UOE = re.compile(ur'[Œ]') +RE_LAE = re.compile(ur'[æ]') +RE_UAE = re.compile(ur'[Æ]') +def latin1compat(u): + """Remplacer dans la chaine u des caractères unicode par des équivalents qui + peuvent être transformés en latin1. + """ + u = RE_SQUOTE.sub("'", u) + u = RE_DQUOTE.sub('"', u) + u = RE_SPACE.sub(' ', u) + u = RE_LOE.sub('oe', u) + u = RE_UOE.sub('OE', u) + u = RE_LAE.sub('ae', u) + u = RE_UAE.sub('AE', u) + return u