202 lines
5.6 KiB
Plaintext
202 lines
5.6 KiB
Plaintext
|
# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
|
||
|
@include "base.core"
|
||
|
@include "base.array"
|
||
|
|
||
|
function csv__parse_quoted(line, destl, colsep, qchar, echar, pos, tmpl, nextc, resl) {
|
||
|
line = substr(line, 2)
|
||
|
resl = ""
|
||
|
while (1) {
|
||
|
pos = index(line, qchar)
|
||
|
if (pos == 0) {
|
||
|
# chaine mal terminee
|
||
|
resl = resl line
|
||
|
destl[0] = ""
|
||
|
destl[1] = 0
|
||
|
return resl
|
||
|
}
|
||
|
if (echar != "" && pos > 1) {
|
||
|
# tenir compte du fait qu"un caratère peut être mis en échappement
|
||
|
prevc = substr(line, pos - 1, 1)
|
||
|
quotec = substr(line, pos, 1)
|
||
|
nextc = substr(line, pos + 1, 1)
|
||
|
if (prevc == echar) {
|
||
|
# qchar en échappement
|
||
|
tmpl = substr(line, 1, pos - 2)
|
||
|
resl = resl tmpl quotec
|
||
|
line = substr(line, pos + 1)
|
||
|
continue
|
||
|
}
|
||
|
tmpl = substr(line, 1, pos - 1)
|
||
|
if (nextc == colsep || nextc == "") {
|
||
|
# fin de champ ou fin de ligne
|
||
|
resl = resl tmpl
|
||
|
destl[0] = substr(line, pos + 2)
|
||
|
destl[1] = nextc == colsep
|
||
|
return resl
|
||
|
} else {
|
||
|
# erreur de syntaxe: guillemet non mis en échappement
|
||
|
# ignorer cette erreur et prendre le guillemet quand meme
|
||
|
resl = resl tmpl quotec
|
||
|
line = substr(line, pos + 1)
|
||
|
}
|
||
|
} else {
|
||
|
# pas d"échappement pour qchar. il est éventuellement doublé
|
||
|
tmpl = substr(line, 1, pos - 1)
|
||
|
quotec = substr(line, pos, 1)
|
||
|
nextc = substr(line, pos + 1, 1)
|
||
|
if (nextc == colsep || nextc == "") {
|
||
|
# fin de champ ou fin de ligne
|
||
|
resl = resl tmpl
|
||
|
destl[0] = substr(line, pos + 2)
|
||
|
destl[1] = nextc == colsep
|
||
|
return resl
|
||
|
} else if (nextc == qchar) {
|
||
|
# qchar en echappement
|
||
|
resl = resl tmpl quotec
|
||
|
line = substr(line, pos + 2)
|
||
|
} else {
|
||
|
# erreur de syntaxe: guillemet non mis en échappement
|
||
|
# ignorer cette erreur et prendre le guillemet quand meme
|
||
|
resl = resl tmpl quotec
|
||
|
line = substr(line, pos + 1)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function csv__parse_unquoted(line, destl, colsep, qchar, echar, pos) {
|
||
|
pos = index(line, colsep)
|
||
|
if (pos == 0) {
|
||
|
destl[0] = ""
|
||
|
destl[1] = 0
|
||
|
return line
|
||
|
} else {
|
||
|
destl[0] = substr(line, pos + 1)
|
||
|
destl[1] = 1
|
||
|
return substr(line, 1, pos - 1)
|
||
|
}
|
||
|
}
|
||
|
function csv__array_parse(fields, line, nbfields, colsep, qchar, echar, shouldparse, destl, i) {
|
||
|
array_new(fields)
|
||
|
array_new(destl)
|
||
|
i = 1
|
||
|
shouldparse = 0
|
||
|
# shouldparse permet de gérer le cas où un champ vide est en fin de ligne.
|
||
|
# en effet, après "," il faut toujours parser, même si line==""
|
||
|
while (shouldparse || line != "") {
|
||
|
if (index(line, qchar) == 1) {
|
||
|
value = csv__parse_quoted(line, destl, colsep, qchar, echar)
|
||
|
line = destl[0]
|
||
|
shouldparse = destl[1]
|
||
|
} else {
|
||
|
value = csv__parse_unquoted(line, destl, colsep, qchar, echar)
|
||
|
line = destl[0]
|
||
|
shouldparse = destl[1]
|
||
|
}
|
||
|
fields[i] = value
|
||
|
i = i + 1
|
||
|
}
|
||
|
if (nbfields) {
|
||
|
nbfields = int(nbfields)
|
||
|
i = array_len(fields)
|
||
|
while (i < nbfields) {
|
||
|
i++
|
||
|
fields[i] = ""
|
||
|
}
|
||
|
}
|
||
|
return array_len(fields)
|
||
|
}
|
||
|
BEGIN {
|
||
|
DEFAULT_COLSEP = ","
|
||
|
DEFAULT_QCHAR = "\""
|
||
|
DEFAULT_ECHAR = ""
|
||
|
}
|
||
|
function array_parsecsv2(fields, line, nbfields, colsep, qchar, echar) {
|
||
|
return csv__array_parse(fields, line, nbfields, colsep, qchar, echar)
|
||
|
}
|
||
|
function array_parsecsv(fields, line, nbfields, colsep, qchar, echar) {
|
||
|
if (colsep == "") colsep = DEFAULT_COLSEP
|
||
|
if (qchar == "") qchar = DEFAULT_QCHAR
|
||
|
if (echar == "") echar = DEFAULT_ECHAR
|
||
|
return csv__array_parse(fields, line, nbfields, colsep, qchar, echar)
|
||
|
}
|
||
|
function parsecsv(line, fields) {
|
||
|
array_parsecsv(fields, line)
|
||
|
array_getline(fields)
|
||
|
return NF
|
||
|
}
|
||
|
function getlinecsv(file, fields) {
|
||
|
if (file) {
|
||
|
getline <file
|
||
|
} else {
|
||
|
getline
|
||
|
}
|
||
|
return parsecsv($0)
|
||
|
}
|
||
|
function csv__should_quote(s) {
|
||
|
if (s ~ /^[[:blank:][:cntrl:][:space:]]/) return 1
|
||
|
if (s ~ /[[:blank:][:cntrl:][:space:]]$/) return 1
|
||
|
return 0
|
||
|
}
|
||
|
function array_formatcsv2(fields, colsep, mvsep, qchar, echar, count, indices, line, i, value) {
|
||
|
line = ""
|
||
|
count = mkindices(fields, indices)
|
||
|
for (i = 1; i <= count; i++) {
|
||
|
value = fields[indices[i]]
|
||
|
if (i > 1) line = line colsep
|
||
|
if (qchar != "" && index(value, qchar) != 0) {
|
||
|
if (echar != "") gsub(qchar, quote_subrepl(echar) "&", value);
|
||
|
else gsub(qchar, "&&", value);
|
||
|
}
|
||
|
if (qchar != "" && (index(value, mvsep) != 0 || index(value, colsep) != 0 || index(value, qchar) != 0 || csv__should_quote(value))) {
|
||
|
line = line qchar value qchar
|
||
|
} else {
|
||
|
line = line value
|
||
|
}
|
||
|
}
|
||
|
return line
|
||
|
}
|
||
|
function array_formatcsv(fields) {
|
||
|
return array_formatcsv2(fields, ",", ";", "\"", "")
|
||
|
}
|
||
|
function array_printcsv(fields, output) {
|
||
|
printto(array_formatcsv(fields), output)
|
||
|
}
|
||
|
function get_formatcsv( fields) {
|
||
|
array_fill(fields)
|
||
|
return array_formatcsv(fields)
|
||
|
}
|
||
|
function formatcsv() {
|
||
|
$0 = get_formatcsv()
|
||
|
}
|
||
|
function printcsv(output, fields) {
|
||
|
array_fill(fields)
|
||
|
array_printcsv(fields, output)
|
||
|
}
|
||
|
function array_findcsv(fields, input, field, value, nbfields, orig, found, i) {
|
||
|
array_new(orig)
|
||
|
array_fill(orig)
|
||
|
array_new(fields)
|
||
|
found = 0
|
||
|
while ((getline <input) > 0) {
|
||
|
array_parsecsv(fields, $0, nbfields)
|
||
|
if (fields[field] == value) {
|
||
|
found = 1
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
close(input)
|
||
|
array_getline(orig)
|
||
|
if (!found) {
|
||
|
delete fields
|
||
|
if (nbfields) {
|
||
|
nbfields = int(nbfields)
|
||
|
i = array_len(fields)
|
||
|
while (i < nbfields) {
|
||
|
i++
|
||
|
fields[i] = ""
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return found
|
||
|
}
|