nutools/lib/b36sha1.py

#!/usr/bin/env python2
# -*- coding: utf-8 mode: python -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8

u"""%(scriptname)s: afficher le hash SHA-1 d'un fichier exprimé en base 36

USAGE
    %(scriptname)s [options]

OPTIONS
    -s, --skip
        Ignorer les premières lignes modeline + empty. Le dernier saut de ligne
        est toujours ignoré pour compatibilité avec le code PHP de MediaWiki.
    -f, --input INPUT
        Lire INPUT au lieu de STDIN
    --mediawiki MWDIR
        Mode de fonctionnement pour le script mediawiki. Une liste de fichiers
        est lu sur STDIN, et pour chacun de ces fichiers afficher une ligne de
        la forme 'name,path,hash,' où name est le nom du fichier, path son
        chemin relativement à MWDIR, et hash le hash SHA-1 exprimé en base 36."""

import os, sys, re, csv
from os import path

try:
    from hashlib import sha1
except ImportError:
    from sha import new
    sha1 = new

from ulib.base.base import strip_nl
from ulib.base.args import build_options, get_args

DIGIT_CHARS = '0123456789abcdefghijklmnopqrstuvwxyz'
def hex2b36(input):
    input = input.lower()
    inDigits = [DIGIT_CHARS.index(c) for c in input]
    outChars = ''
    while inDigits:
        work = 0
        workDigits = []
        for digit in inDigits:
            work *= 16
            work += digit
            if work < 36:
                if workDigits:
                    workDigits.append(0)
            else:
                workDigits.append(int(work / 36))
                work = work % 36
        outChars = DIGIT_CHARS[work] + outChars
        inDigits = workDigits
    while len(outChars) < 31:
        outChars = '0' + outChars
    return outChars

RE_MODELINE = re.compile(r'^#')
RE_BLANKLINE = re.compile(r'^$')
def get_digest(inf, skip=False):
    skip_modeline = skip
    skip_blankline = False
    m = sha1()
    prevnl = None
    while True:
        l = inf.readline()
        if l == '': break

        if prevnl is not None:
            m.update(prevnl)
        l0 = strip_nl(l)
        prevnl = l[len(l0):]
        l = l0

        if skip_modeline:
            if RE_MODELINE.match(l) is not None:
                prevnl = None
                continue
            skip_modeline = False
            skip_blankline = True
        if skip_blankline:
            if RE_BLANKLINE.match(l) is not None:
                prevnl = None
                continue
            skip_blankline = False

        m.update(l)

    hexdigest = m.hexdigest()
    return hex2b36(hexdigest)

HEADERS = ['title', 'path', 'hash', 'doublon']
def do_mediawiki(inf, mwdir, skip=False, outf=None):
    if not mwdir.endswith('/'): mwdir += '/'
    if outf is None: outf = sys.stdout
    outcsv = csv.writer(outf, lineterminator='\n')

    outcsv.writerow(HEADERS)
    while True:
        fp = inf.readline()
        if fp == '': break
        fp = strip_nl(fp)
        if fp.startswith(mwdir): fpath = fp[len(mwdir):]
        else: fpath = fp
        ftitle = path.splitext(path.basename(fpath))[0]
        fpinf = open(fp, 'rb')
        try:
            fhash = get_digest(fpinf, skip)
        finally:
            fpinf.close()
        outcsv.writerow([ftitle, fpath, fhash, ''])

def display_help():
    uprint(__doc__ % globals())

def run_b36sha1():
    options, longoptions = build_options([
        ('h', 'help', "Afficher l'aide"),
        ('s', 'skip', "Ignorer les premières lignes et le dernier saut de ligne"),
        ('f:', 'input=', "Spécifier un fichier à lire"),
        ('m:', 'mediawiki=', "Support pour le script mediawiki"),
        ])
    options, args = get_args(None, options, longoptions)
    skip = False
    inputfile = None
    mwdir = None
    for option, value in options:
        if option in ('-h', '--help'):
            display_help()
            sys.exit(0)
        elif option in ('-s', '--skip'):
            skip = True
        elif option in ('-f', '--input'):
            inputfile = value
        elif option in ('-m', '--mediawiki'):
            mwdir = value

    if inputfile is None or inputfile == '-':
        inf = sys.stdin
        close = False
    else:
        inf = open(inputfile, 'rb')
        close = True

    try:
        if mwdir is None: print get_digest(inf, skip)
        else: do_mediawiki(inf, mwdir, skip)
    finally:
        if close: inf.close()

if __name__ == '__main__':
    run_b36sha1()