ajout librairies awk

This commit is contained in:
Jephté Clain 2023-10-03 05:45:10 +04:00
parent eba69bea41
commit 5d86f60b11
6 changed files with 612 additions and 0 deletions

157
awk/base.array.awk Normal file
View File

@ -0,0 +1,157 @@
# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
function mkindices(values, indices, i, j) {
array_new(indices)
j = 1
for (i in values) {
indices[j++] = int(i)
}
return asort(indices)
}
function array_new(dest) {
dest[0] = 0 # forcer awk à considérer dest comme un tableau
delete dest
}
function array_newsize(dest, size, i) {
dest[0] = 0 # forcer awk à considérer dest comme un tableau
delete dest
size = int(size)
for (i = 1; i <= size; i++) {
dest[i] = ""
}
}
function array_len(values, count, i) {
# length(array) a un bug sur awk 3.1.5
# cette version est plus lente mais fonctionne toujours
count = 0
for (i in values) {
count++
}
return count
}
function array_copy(dest, src, count, indices, i) {
array_new(dest)
count = mkindices(src, indices)
for (i = 1; i <= count; i++) {
dest[indices[i]] = src[indices[i]]
}
}
function array_getlastindex(src, count, indices) {
count = mkindices(src, indices)
if (count == 0) return 0
return indices[count]
}
function array_add(dest, value, lastindex) {
lastindex = array_getlastindex(dest)
dest[lastindex + 1] = value
}
function array_deli(dest, i, l) {
i = int(i)
if (i == 0) return
l = array_len(dest)
while (i < l) {
dest[i] = dest[i + 1]
i++
}
delete dest[l]
}
function array_del(dest, value, ignoreCase, i) {
do {
i = key_index(value, dest, ignoreCase)
if (i != 0) array_deli(dest, i)
} while (i != 0)
}
function array_extend(dest, src, count, lastindex, indices, i) {
lastindex = array_getlastindex(dest)
count = mkindices(src, indices)
for (i = 1; i <= count; i++) {
dest[lastindex + i] = src[indices[i]]
}
}
function array_fill(dest, i) {
array_new(dest)
for (i = 1; i <= NF; i++) {
dest[i] = $i
}
}
function array_getline(src, count, indices, i, j) {
$0 = ""
count = mkindices(src, indices)
for (i = 1; i <= count; i++) {
j = indices[i]
$j = src[j]
}
}
function array_appendline(src, count, indices, i, nf, j) {
count = mkindices(src, indices)
nf = NF
for (i = 1; i <= count; i++) {
j = nf + indices[i]
$j = src[indices[i]]
}
}
function in_array(value, values, ignoreCase, i) {
if (ignoreCase) {
value = tolower(value)
for (i in values) {
if (tolower(values[i]) == value) return 1
}
} else {
for (i in values) {
if (values[i] == value) return 1
}
}
return 0
}
function key_index(value, values, ignoreCase, i) {
if (ignoreCase) {
value = tolower(value)
for (i in values) {
if (tolower(values[i]) == value) return int(i)
}
} else {
for (i in values) {
if (values[i] == value) return int(i)
}
}
return 0
}
function array2s(values, prefix, sep, suffix, noindices, first, i, s) {
if (!prefix) prefix = "["
if (!sep) sep = ", "
if (!suffix) suffix = "]"
s = prefix
first = 1
for (i in values) {
if (first) first = 0
else s = s sep
if (!noindices) s = s "[" i "]="
s = s values[i]
}
s = s suffix
return s
}
function array2so(values, prefix, sep, suffix, noindices, count, indices, i, s) {
if (!prefix) prefix = "["
if (!sep) sep = ", "
if (!suffix) suffix = "]"
s = prefix
count = mkindices(values, indices)
for (i = 1; i <= count; i++) {
if (i > 1) s = s sep
if (!noindices) s = s "[" indices[i] "]="
s = s values[indices[i]]
}
s = s suffix
return s
}
function array_join(values, sep, prefix, suffix, count, indices, i, s) {
s = prefix
count = mkindices(values, indices)
for (i = 1; i <= count; i++) {
if (i > 1) s = s sep
s = s values[indices[i]]
}
s = s suffix
return s
}

4
awk/base.awk Normal file
View File

@ -0,0 +1,4 @@
# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
@include "base.core.awk"
@include "base.array.awk"
@include "base.date.awk"

141
awk/base.core.awk Normal file
View File

@ -0,0 +1,141 @@
# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
function num(s) {
if (s ~ /^[0-9]+$/) return int(s)
else return s
}
function ord(s, i) {
s = substr(s, 1, 1)
i = index(" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~", s)
if (i != 0) i += 32 - 1
return i
}
function hex(i, s) {
s = sprintf("%x", i)
if (length(s) < 2) s = "0" s
return s
}
function qhtml(s) {
gsub(/&/, "\\&amp;", s)
gsub(/"/, "\\&quot;", s)
gsub(/>/, "\\&gt;", s)
gsub(/</, "\\&lt;", s)
return s
}
function unquote_html(s) {
gsub(/&lt;/, "<", s)
gsub(/&gt;/, ">", s)
gsub(/&quot;/, "\"", s)
gsub(/&amp;/, "\\&", s)
return s
}
function qawk(s) {
gsub(/\\/, "\\\\", s)
gsub(/"/, "\\\"", s)
gsub(/\n/, "\\n", s)
return "\"" s "\""
}
function qval(s) {
gsub(/'/, "'\\''", s)
return "'" s "'"
}
function sqval(s) {
return " " qval(s)
}
function qvals( i, line) {
line = ""
for (i = 1; i <= NF; i++) {
if (i > 1) line = line " "
line = line qval($i)
}
return line
}
function sqvals() {
return " " qvals()
}
function qarr(values, prefix, i, count, line) {
line = prefix
count = array_len(values)
for (i = 1; i <= count; i++) {
if (i > 1 || line != "") line = line " "
line = line qval(values[i])
}
return line
}
function qregexp(s) {
gsub(/[[\\.^$*+?()|{]/, "\\\\&", s)
return s
}
function qsubrepl(s) {
gsub(/\\/, "\\\\", s)
gsub(/&/, "\\\\&", s)
return s
}
function qgrep(s) {
gsub(/[[\\.^$*]/, "\\\\&", s)
return s
}
function qegrep(s) {
gsub(/[[\\.^$*+?()|{]/, "\\\\&", s)
return s
}
function qsql(s, suffix) {
gsub(/'/, "''", s)
return "'" s "'" (suffix != ""? " " suffix: "")
}
function cqsql(s, suffix) {
return "," qsql(s, suffix)
}
function unquote_mysqlcsv(s) {
gsub(/\\n/, "\n", s)
gsub(/\\t/, "\t", s)
gsub(/\\0/, "\0", s)
gsub(/\\\\/, "\\", s)
return s
}
function sval(s) {
if (s == "") return s
else return " " s
}
function cval(s, suffix) {
suffix = suffix != ""? " " suffix: ""
if (s == "") return s
else return "," s suffix
}
function printto(s, output) {
if (output == "") {
print s
} else if (output ~ /^>>/) {
sub(/^>>/, "", output)
print s >>output
} else if (output ~ /^>/) {
sub(/^>/, "", output)
print s >output
} else if (output ~ /^\|&/) {
sub(/^\|&/, "", output)
print s |&output
} else if (output ~ /^\|/) {
sub(/^\|/, "", output)
print s |output
} else {
print s >output
}
}
function find_line(input, field, value, orig, line) {
orig = $0
line = ""
while ((getline <input) > 0) {
if ($field == value) {
line = $0
break
}
}
close(input)
$0 = orig
return line
}
function merge_line(input, field, key, line) {
line = find_line(input, field, $key)
if (line != "") $0 = $0 FS line
}

52
awk/base.date.awk Normal file
View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
function date__parse_fr(date, parts, y, m, d) {
if (match(date, /([0-9][0-9]?)\/([0-9][0-9]?)\/([0-9][0-9][0-9][0-9])/, parts)) {
y = int(parts[3])
m = int(parts[2])
d = int(parts[1])
return mktime(sprintf("%04i %02i %02i 00 00 00 +0400", y, m, d))
} else if (match(date, /([0-9][0-9]?)\/([0-9][0-9]?)\/([0-9][0-9])/, parts)) {
basey = int(strftime("%Y")); basey = basey - basey % 100
y = basey + int(parts[3])
m = int(parts[2])
d = int(parts[1])
return mktime(sprintf("%04i %02i %02i 00 00 00 +0400", y, m, d))
}
return -1
}
function date__parse_mysql(date, parts, y, m, d) {
if (match(date, /([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])/, parts)) {
y = int(parts[1])
m = int(parts[2])
d = int(parts[3])
return mktime(sprintf("%04i %02i %02i 00 00 00 +0400", y, m, d))
}
return -1
}
function date__parse_any(date, serial) {
serial = date__parse_fr(date)
if (serial == -1) serial = date__parse_mysql(date)
return serial
}
function date_serial(date) {
return date__parse_any(date)
}
function date_parse(date, serial) {
serial = date__parse_any(date)
if (serial == -1) return date
return strftime("%d/%m/%Y", serial)
}
function date_monday(date, serial, dow) {
serial = date__parse_any(date)
if (serial == -1) return date
dow = strftime("%u", serial)
serial -= (dow - 1) * 86400
return strftime("%d/%m/%Y", serial)
}
function date_add(date, nbdays, serial) {
serial = date__parse_any(date)
if (serial == -1) return date
serial += nbdays * 86400
return strftime("%d/%m/%Y", serial)
}

201
awk/csv.awk Normal file
View File

@ -0,0 +1,201 @@
# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
@include "base.core.awk"
@include "base.array.awk"
function csv__parse_quoted(line, destl, colsep, qchar, echar, pos, tmpl, nextc, resl) {
line = substr(line, 2)
resl = ""
while (1) {
pos = index(line, qchar)
if (pos == 0) {
# chaine mal terminee
resl = resl line
destl[0] = ""
destl[1] = 0
return resl
}
if (echar != "" && pos > 1) {
# tenir compte du fait qu"un caratère peut être mis en échappement
prevc = substr(line, pos - 1, 1)
quotec = substr(line, pos, 1)
nextc = substr(line, pos + 1, 1)
if (prevc == echar) {
# qchar en échappement
tmpl = substr(line, 1, pos - 2)
resl = resl tmpl quotec
line = substr(line, pos + 1)
continue
}
tmpl = substr(line, 1, pos - 1)
if (nextc == colsep || nextc == "") {
# fin de champ ou fin de ligne
resl = resl tmpl
destl[0] = substr(line, pos + 2)
destl[1] = nextc == colsep
return resl
} else {
# erreur de syntaxe: guillemet non mis en échappement
# ignorer cette erreur et prendre le guillemet quand meme
resl = resl tmpl quotec
line = substr(line, pos + 1)
}
} else {
# pas d"échappement pour qchar. il est éventuellement doublé
tmpl = substr(line, 1, pos - 1)
quotec = substr(line, pos, 1)
nextc = substr(line, pos + 1, 1)
if (nextc == colsep || nextc == "") {
# fin de champ ou fin de ligne
resl = resl tmpl
destl[0] = substr(line, pos + 2)
destl[1] = nextc == colsep
return resl
} else if (nextc == qchar) {
# qchar en echappement
resl = resl tmpl quotec
line = substr(line, pos + 2)
} else {
# erreur de syntaxe: guillemet non mis en échappement
# ignorer cette erreur et prendre le guillemet quand meme
resl = resl tmpl quotec
line = substr(line, pos + 1)
}
}
}
}
function csv__parse_unquoted(line, destl, colsep, qchar, echar, pos) {
pos = index(line, colsep)
if (pos == 0) {
destl[0] = ""
destl[1] = 0
return line
} else {
destl[0] = substr(line, pos + 1)
destl[1] = 1
return substr(line, 1, pos - 1)
}
}
function csv__array_parse(fields, line, nbfields, colsep, qchar, echar, shouldparse, destl, i) {
array_new(fields)
array_new(destl)
i = 1
shouldparse = 0
# shouldparse permet de gérer le cas où un champ vide est en fin de ligne.
# en effet, après "," il faut toujours parser, même si line==""
while (shouldparse || line != "") {
if (index(line, qchar) == 1) {
value = csv__parse_quoted(line, destl, colsep, qchar, echar)
line = destl[0]
shouldparse = destl[1]
} else {
value = csv__parse_unquoted(line, destl, colsep, qchar, echar)
line = destl[0]
shouldparse = destl[1]
}
fields[i] = value
i = i + 1
}
if (nbfields) {
nbfields = int(nbfields)
i = array_len(fields)
while (i < nbfields) {
i++
fields[i] = ""
}
}
return array_len(fields)
}
BEGIN {
DEFAULT_COLSEP = ","
DEFAULT_QCHAR = "\""
DEFAULT_ECHAR = ""
}
function array_parsecsv2(fields, line, nbfields, colsep, qchar, echar) {
return csv__array_parse(fields, line, nbfields, colsep, qchar, echar)
}
function array_parsecsv(fields, line, nbfields, colsep, qchar, echar) {
if (colsep == "") colsep = DEFAULT_COLSEP
if (qchar == "") qchar = DEFAULT_QCHAR
if (echar == "") echar = DEFAULT_ECHAR
return csv__array_parse(fields, line, nbfields, colsep, qchar, echar)
}
function parsecsv(line, fields) {
array_parsecsv(fields, line)
array_getline(fields)
return NF
}
function getlinecsv(file, fields) {
if (file) {
getline <file
} else {
getline
}
return parsecsv($0)
}
function csv__should_quote(s) {
if (s ~ /^[[:blank:][:cntrl:][:space:]]/) return 1
if (s ~ /[[:blank:][:cntrl:][:space:]]$/) return 1
return 0
}
function array_formatcsv2(fields, colsep, mvsep, qchar, echar, count, indices, line, i, value) {
line = ""
count = mkindices(fields, indices)
for (i = 1; i <= count; i++) {
value = fields[indices[i]]
if (i > 1) line = line colsep
if (qchar != "" && index(value, qchar) != 0) {
if (echar != "") gsub(qchar, quote_subrepl(echar) "&", value);
else gsub(qchar, "&&", value);
}
if (qchar != "" && (index(value, mvsep) != 0 || index(value, colsep) != 0 || index(value, qchar) != 0 || csv__should_quote(value))) {
line = line qchar value qchar
} else {
line = line value
}
}
return line
}
function array_formatcsv(fields) {
return array_formatcsv2(fields, ",", ";", "\"", "")
}
function array_printcsv(fields, output) {
printto(array_formatcsv(fields), output)
}
function get_formatcsv( fields) {
array_fill(fields)
return array_formatcsv(fields)
}
function formatcsv() {
$0 = get_formatcsv()
}
function printcsv(output, fields) {
array_fill(fields)
array_printcsv(fields, output)
}
function array_findcsv(fields, input, field, value, nbfields, orig, found, i) {
array_new(orig)
array_fill(orig)
array_new(fields)
found = 0
while ((getline <input) > 0) {
array_parsecsv(fields, $0, nbfields)
if (fields[field] == value) {
found = 1
break
}
}
close(input)
array_getline(orig)
if (!found) {
delete fields
if (nbfields) {
nbfields = int(nbfields)
i = array_len(fields)
while (i < nbfields) {
i++
fields[i] = ""
}
}
}
return found
}

57
awk/enc.base64.awk Normal file
View File

@ -0,0 +1,57 @@
# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
function base64__and(var, x, l_res, l_i) {
l_res = 0
for (l_i = 0; l_i < 8; l_i++) {
if (var%2 == 1 && x%2 == 1) l_res = l_res/2 + 128
else l_res /= 2
var = int(var/2)
x = int(x/2)
}
return l_res
}
# Rotate bytevalue left x times
function base64__lshift(var, x) {
while(x > 0) {
var *= 2
x--
}
return var
}
# Rotate bytevalue right x times
function base64__rshift(var, x) {
while(x > 0) {
var = int(var/2)
x--
}
return var
}
BEGIN {
BASE64__BYTES = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
}
function b64decode(src, result, base1, base2, base3, base4) {
result = ""
while (length(src) > 0) {
# Specify byte values
base1 = substr(src, 1, 1)
base2 = substr(src, 2, 1)
base3 = substr(src, 3, 1); if (base3 == "") base3 = "="
base4 = substr(src, 4, 1); if (base4 == "") base4 = "="
# Now find numerical position in BASE64 string
byte1 = index(BASE64__BYTES, base1) - 1
if (byte1 < 0) byte1 = 0
byte2 = index(BASE64__BYTES, base2) - 1
if (byte2 < 0) byte2 = 0
byte3 = index(BASE64__BYTES, base3) - 1
if (byte3 < 0) byte3 = 0
byte4 = index(BASE64__BYTES, base4) - 1
if (byte4 < 0) byte4 = 0
# Reconstruct ASCII string
result = result sprintf( "%c", base64__lshift(base64__and(byte1, 63), 2) + base64__rshift(base64__and(byte2, 48), 4) )
if (base3 != "=") result = result sprintf( "%c", base64__lshift(base64__and(byte2, 15), 4) + base64__rshift(base64__and(byte3, 60), 2) )
if (base4 != "=") result = result sprintf( "%c", base64__lshift(base64__and(byte3, 3), 6) + byte4 )
# Decrease incoming string with 4
src = substr(src, 5)
}
return result
}