diff --git a/awk/base.array.awk b/awk/base.array.awk new file mode 100644 index 0000000..bd5ac32 --- /dev/null +++ b/awk/base.array.awk @@ -0,0 +1,157 @@ +# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8 + +function mkindices(values, indices, i, j) { + array_new(indices) + j = 1 + for (i in values) { + indices[j++] = int(i) + } + return asort(indices) +} +function array_new(dest) { + dest[0] = 0 # forcer awk à considérer dest comme un tableau + delete dest +} +function array_newsize(dest, size, i) { + dest[0] = 0 # forcer awk à considérer dest comme un tableau + delete dest + size = int(size) + for (i = 1; i <= size; i++) { + dest[i] = "" + } +} +function array_len(values, count, i) { + # length(array) a un bug sur awk 3.1.5 + # cette version est plus lente mais fonctionne toujours + count = 0 + for (i in values) { + count++ + } + return count +} +function array_copy(dest, src, count, indices, i) { + array_new(dest) + count = mkindices(src, indices) + for (i = 1; i <= count; i++) { + dest[indices[i]] = src[indices[i]] + } +} +function array_getlastindex(src, count, indices) { + count = mkindices(src, indices) + if (count == 0) return 0 + return indices[count] +} +function array_add(dest, value, lastindex) { + lastindex = array_getlastindex(dest) + dest[lastindex + 1] = value +} +function array_deli(dest, i, l) { + i = int(i) + if (i == 0) return + l = array_len(dest) + while (i < l) { + dest[i] = dest[i + 1] + i++ + } + delete dest[l] +} +function array_del(dest, value, ignoreCase, i) { + do { + i = key_index(value, dest, ignoreCase) + if (i != 0) array_deli(dest, i) + } while (i != 0) +} +function array_extend(dest, src, count, lastindex, indices, i) { + lastindex = array_getlastindex(dest) + count = mkindices(src, indices) + for (i = 1; i <= count; i++) { + dest[lastindex + i] = src[indices[i]] + } +} +function array_fill(dest, i) { + array_new(dest) + for (i = 1; i <= NF; i++) { + dest[i] = $i + } +} +function array_getline(src, count, indices, i, j) { + $0 = "" + count = mkindices(src, indices) + for (i = 1; i <= count; i++) { + j = indices[i] + $j = src[j] + } +} +function array_appendline(src, count, indices, i, nf, j) { + count = mkindices(src, indices) + nf = NF + for (i = 1; i <= count; i++) { + j = nf + indices[i] + $j = src[indices[i]] + } +} +function in_array(value, values, ignoreCase, i) { + if (ignoreCase) { + value = tolower(value) + for (i in values) { + if (tolower(values[i]) == value) return 1 + } + } else { + for (i in values) { + if (values[i] == value) return 1 + } + } + return 0 +} +function key_index(value, values, ignoreCase, i) { + if (ignoreCase) { + value = tolower(value) + for (i in values) { + if (tolower(values[i]) == value) return int(i) + } + } else { + for (i in values) { + if (values[i] == value) return int(i) + } + } + return 0 +} +function array2s(values, prefix, sep, suffix, noindices, first, i, s) { + if (!prefix) prefix = "[" + if (!sep) sep = ", " + if (!suffix) suffix = "]" + s = prefix + first = 1 + for (i in values) { + if (first) first = 0 + else s = s sep + if (!noindices) s = s "[" i "]=" + s = s values[i] + } + s = s suffix + return s +} +function array2so(values, prefix, sep, suffix, noindices, count, indices, i, s) { + if (!prefix) prefix = "[" + if (!sep) sep = ", " + if (!suffix) suffix = "]" + s = prefix + count = mkindices(values, indices) + for (i = 1; i <= count; i++) { + if (i > 1) s = s sep + if (!noindices) s = s "[" indices[i] "]=" + s = s values[indices[i]] + } + s = s suffix + return s +} +function array_join(values, sep, prefix, suffix, count, indices, i, s) { + s = prefix + count = mkindices(values, indices) + for (i = 1; i <= count; i++) { + if (i > 1) s = s sep + s = s values[indices[i]] + } + s = s suffix + return s +} diff --git a/awk/base.awk b/awk/base.awk new file mode 100644 index 0000000..ae59617 --- /dev/null +++ b/awk/base.awk @@ -0,0 +1,4 @@ +# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8 +@include "base.core.awk" +@include "base.array.awk" +@include "base.date.awk" diff --git a/awk/base.core.awk b/awk/base.core.awk new file mode 100644 index 0000000..49a4b58 --- /dev/null +++ b/awk/base.core.awk @@ -0,0 +1,141 @@ +# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8 + +function num(s) { + if (s ~ /^[0-9]+$/) return int(s) + else return s +} +function ord(s, i) { + s = substr(s, 1, 1) + i = index(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", s) + if (i != 0) i += 32 - 1 + return i +} +function hex(i, s) { + s = sprintf("%x", i) + if (length(s) < 2) s = "0" s + return s +} +function qhtml(s) { + gsub(/&/, "\\&", s) + gsub(/"/, "\\"", s) + gsub(/>/, "\\>", s) + gsub(/", s) + gsub(/"/, "\"", s) + gsub(/&/, "\\&", s) + return s +} +function qawk(s) { + gsub(/\\/, "\\\\", s) + gsub(/"/, "\\\"", s) + gsub(/\n/, "\\n", s) + return "\"" s "\"" +} +function qval(s) { + gsub(/'/, "'\\''", s) + return "'" s "'" +} +function sqval(s) { + return " " qval(s) +} +function qvals( i, line) { + line = "" + for (i = 1; i <= NF; i++) { + if (i > 1) line = line " " + line = line qval($i) + } + return line +} +function sqvals() { + return " " qvals() +} +function qarr(values, prefix, i, count, line) { + line = prefix + count = array_len(values) + for (i = 1; i <= count; i++) { + if (i > 1 || line != "") line = line " " + line = line qval(values[i]) + } + return line +} +function qregexp(s) { + gsub(/[[\\.^$*+?()|{]/, "\\\\&", s) + return s +} +function qsubrepl(s) { + gsub(/\\/, "\\\\", s) + gsub(/&/, "\\\\&", s) + return s +} +function qgrep(s) { + gsub(/[[\\.^$*]/, "\\\\&", s) + return s +} +function qegrep(s) { + gsub(/[[\\.^$*+?()|{]/, "\\\\&", s) + return s +} +function qsql(s, suffix) { + gsub(/'/, "''", s) + return "'" s "'" (suffix != ""? " " suffix: "") +} +function cqsql(s, suffix) { + return "," qsql(s, suffix) +} +function unquote_mysqlcsv(s) { + gsub(/\\n/, "\n", s) + gsub(/\\t/, "\t", s) + gsub(/\\0/, "\0", s) + gsub(/\\\\/, "\\", s) + return s +} +function sval(s) { + if (s == "") return s + else return " " s +} +function cval(s, suffix) { + suffix = suffix != ""? " " suffix: "" + if (s == "") return s + else return "," s suffix +} + +function printto(s, output) { + if (output == "") { + print s + } else if (output ~ /^>>/) { + sub(/^>>/, "", output) + print s >>output + } else if (output ~ /^>/) { + sub(/^>/, "", output) + print s >output + } else if (output ~ /^\|&/) { + sub(/^\|&/, "", output) + print s |&output + } else if (output ~ /^\|/) { + sub(/^\|/, "", output) + print s |output + } else { + print s >output + } +} +function find_line(input, field, value, orig, line) { + orig = $0 + line = "" + while ((getline 0) { + if ($field == value) { + line = $0 + break + } + } + close(input) + $0 = orig + return line +} +function merge_line(input, field, key, line) { + line = find_line(input, field, $key) + if (line != "") $0 = $0 FS line +} diff --git a/awk/base.date.awk b/awk/base.date.awk new file mode 100644 index 0000000..48e3eff --- /dev/null +++ b/awk/base.date.awk @@ -0,0 +1,52 @@ +# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8 + +function date__parse_fr(date, parts, y, m, d) { + if (match(date, /([0-9][0-9]?)\/([0-9][0-9]?)\/([0-9][0-9][0-9][0-9])/, parts)) { + y = int(parts[3]) + m = int(parts[2]) + d = int(parts[1]) + return mktime(sprintf("%04i %02i %02i 00 00 00 +0400", y, m, d)) + } else if (match(date, /([0-9][0-9]?)\/([0-9][0-9]?)\/([0-9][0-9])/, parts)) { + basey = int(strftime("%Y")); basey = basey - basey % 100 + y = basey + int(parts[3]) + m = int(parts[2]) + d = int(parts[1]) + return mktime(sprintf("%04i %02i %02i 00 00 00 +0400", y, m, d)) + } + return -1 +} +function date__parse_mysql(date, parts, y, m, d) { + if (match(date, /([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])/, parts)) { + y = int(parts[1]) + m = int(parts[2]) + d = int(parts[3]) + return mktime(sprintf("%04i %02i %02i 00 00 00 +0400", y, m, d)) + } + return -1 +} +function date__parse_any(date, serial) { + serial = date__parse_fr(date) + if (serial == -1) serial = date__parse_mysql(date) + return serial +} +function date_serial(date) { + return date__parse_any(date) +} +function date_parse(date, serial) { + serial = date__parse_any(date) + if (serial == -1) return date + return strftime("%d/%m/%Y", serial) +} +function date_monday(date, serial, dow) { + serial = date__parse_any(date) + if (serial == -1) return date + dow = strftime("%u", serial) + serial -= (dow - 1) * 86400 + return strftime("%d/%m/%Y", serial) +} +function date_add(date, nbdays, serial) { + serial = date__parse_any(date) + if (serial == -1) return date + serial += nbdays * 86400 + return strftime("%d/%m/%Y", serial) +} diff --git a/awk/csv.awk b/awk/csv.awk new file mode 100644 index 0000000..c58e41b --- /dev/null +++ b/awk/csv.awk @@ -0,0 +1,201 @@ +# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8 +@include "base.core.awk" +@include "base.array.awk" + +function csv__parse_quoted(line, destl, colsep, qchar, echar, pos, tmpl, nextc, resl) { + line = substr(line, 2) + resl = "" + while (1) { + pos = index(line, qchar) + if (pos == 0) { + # chaine mal terminee + resl = resl line + destl[0] = "" + destl[1] = 0 + return resl + } + if (echar != "" && pos > 1) { + # tenir compte du fait qu"un caratère peut être mis en échappement + prevc = substr(line, pos - 1, 1) + quotec = substr(line, pos, 1) + nextc = substr(line, pos + 1, 1) + if (prevc == echar) { + # qchar en échappement + tmpl = substr(line, 1, pos - 2) + resl = resl tmpl quotec + line = substr(line, pos + 1) + continue + } + tmpl = substr(line, 1, pos - 1) + if (nextc == colsep || nextc == "") { + # fin de champ ou fin de ligne + resl = resl tmpl + destl[0] = substr(line, pos + 2) + destl[1] = nextc == colsep + return resl + } else { + # erreur de syntaxe: guillemet non mis en échappement + # ignorer cette erreur et prendre le guillemet quand meme + resl = resl tmpl quotec + line = substr(line, pos + 1) + } + } else { + # pas d"échappement pour qchar. il est éventuellement doublé + tmpl = substr(line, 1, pos - 1) + quotec = substr(line, pos, 1) + nextc = substr(line, pos + 1, 1) + if (nextc == colsep || nextc == "") { + # fin de champ ou fin de ligne + resl = resl tmpl + destl[0] = substr(line, pos + 2) + destl[1] = nextc == colsep + return resl + } else if (nextc == qchar) { + # qchar en echappement + resl = resl tmpl quotec + line = substr(line, pos + 2) + } else { + # erreur de syntaxe: guillemet non mis en échappement + # ignorer cette erreur et prendre le guillemet quand meme + resl = resl tmpl quotec + line = substr(line, pos + 1) + } + } + } +} +function csv__parse_unquoted(line, destl, colsep, qchar, echar, pos) { + pos = index(line, colsep) + if (pos == 0) { + destl[0] = "" + destl[1] = 0 + return line + } else { + destl[0] = substr(line, pos + 1) + destl[1] = 1 + return substr(line, 1, pos - 1) + } +} +function csv__array_parse(fields, line, nbfields, colsep, qchar, echar, shouldparse, destl, i) { + array_new(fields) + array_new(destl) + i = 1 + shouldparse = 0 + # shouldparse permet de gérer le cas où un champ vide est en fin de ligne. + # en effet, après "," il faut toujours parser, même si line=="" + while (shouldparse || line != "") { + if (index(line, qchar) == 1) { + value = csv__parse_quoted(line, destl, colsep, qchar, echar) + line = destl[0] + shouldparse = destl[1] + } else { + value = csv__parse_unquoted(line, destl, colsep, qchar, echar) + line = destl[0] + shouldparse = destl[1] + } + fields[i] = value + i = i + 1 + } + if (nbfields) { + nbfields = int(nbfields) + i = array_len(fields) + while (i < nbfields) { + i++ + fields[i] = "" + } + } + return array_len(fields) +} +BEGIN { + DEFAULT_COLSEP = "," + DEFAULT_QCHAR = "\"" + DEFAULT_ECHAR = "" +} +function array_parsecsv2(fields, line, nbfields, colsep, qchar, echar) { + return csv__array_parse(fields, line, nbfields, colsep, qchar, echar) +} +function array_parsecsv(fields, line, nbfields, colsep, qchar, echar) { + if (colsep == "") colsep = DEFAULT_COLSEP + if (qchar == "") qchar = DEFAULT_QCHAR + if (echar == "") echar = DEFAULT_ECHAR + return csv__array_parse(fields, line, nbfields, colsep, qchar, echar) +} +function parsecsv(line, fields) { + array_parsecsv(fields, line) + array_getline(fields) + return NF +} +function getlinecsv(file, fields) { + if (file) { + getline 1) line = line colsep + if (qchar != "" && index(value, qchar) != 0) { + if (echar != "") gsub(qchar, quote_subrepl(echar) "&", value); + else gsub(qchar, "&&", value); + } + if (qchar != "" && (index(value, mvsep) != 0 || index(value, colsep) != 0 || index(value, qchar) != 0 || csv__should_quote(value))) { + line = line qchar value qchar + } else { + line = line value + } + } + return line +} +function array_formatcsv(fields) { + return array_formatcsv2(fields, ",", ";", "\"", "") +} +function array_printcsv(fields, output) { + printto(array_formatcsv(fields), output) +} +function get_formatcsv( fields) { + array_fill(fields) + return array_formatcsv(fields) +} +function formatcsv() { + $0 = get_formatcsv() +} +function printcsv(output, fields) { + array_fill(fields) + array_printcsv(fields, output) +} +function array_findcsv(fields, input, field, value, nbfields, orig, found, i) { + array_new(orig) + array_fill(orig) + array_new(fields) + found = 0 + while ((getline 0) { + array_parsecsv(fields, $0, nbfields) + if (fields[field] == value) { + found = 1 + break + } + } + close(input) + array_getline(orig) + if (!found) { + delete fields + if (nbfields) { + nbfields = int(nbfields) + i = array_len(fields) + while (i < nbfields) { + i++ + fields[i] = "" + } + } + } + return found +} diff --git a/awk/enc.base64.awk b/awk/enc.base64.awk new file mode 100644 index 0000000..3ce38e2 --- /dev/null +++ b/awk/enc.base64.awk @@ -0,0 +1,57 @@ +# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8 + +function base64__and(var, x, l_res, l_i) { + l_res = 0 + for (l_i = 0; l_i < 8; l_i++) { + if (var%2 == 1 && x%2 == 1) l_res = l_res/2 + 128 + else l_res /= 2 + var = int(var/2) + x = int(x/2) + } + return l_res +} +# Rotate bytevalue left x times +function base64__lshift(var, x) { + while(x > 0) { + var *= 2 + x-- + } + return var +} +# Rotate bytevalue right x times +function base64__rshift(var, x) { + while(x > 0) { + var = int(var/2) + x-- + } + return var +} +BEGIN { + BASE64__BYTES = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" +} +function b64decode(src, result, base1, base2, base3, base4) { + result = "" + while (length(src) > 0) { + # Specify byte values + base1 = substr(src, 1, 1) + base2 = substr(src, 2, 1) + base3 = substr(src, 3, 1); if (base3 == "") base3 = "=" + base4 = substr(src, 4, 1); if (base4 == "") base4 = "=" + # Now find numerical position in BASE64 string + byte1 = index(BASE64__BYTES, base1) - 1 + if (byte1 < 0) byte1 = 0 + byte2 = index(BASE64__BYTES, base2) - 1 + if (byte2 < 0) byte2 = 0 + byte3 = index(BASE64__BYTES, base3) - 1 + if (byte3 < 0) byte3 = 0 + byte4 = index(BASE64__BYTES, base4) - 1 + if (byte4 < 0) byte4 = 0 + # Reconstruct ASCII string + result = result sprintf( "%c", base64__lshift(base64__and(byte1, 63), 2) + base64__rshift(base64__and(byte2, 48), 4) ) + if (base3 != "=") result = result sprintf( "%c", base64__lshift(base64__and(byte2, 15), 4) + base64__rshift(base64__and(byte3, 60), 2) ) + if (base4 != "=") result = result sprintf( "%c", base64__lshift(base64__and(byte3, 3), 6) + byte4 ) + # Decrease incoming string with 4 + src = substr(src, 5) + } + return result +}