# -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8 @include "base.core" @include "base.array" function csv__parse_quoted(line, destl, colsep, qchar, echar, pos, tmpl, nextc, resl) { line = substr(line, 2) resl = "" while (1) { pos = index(line, qchar) if (pos == 0) { # chaine mal terminee resl = resl line destl[0] = "" destl[1] = 0 return resl } if (echar != "" && pos > 1) { # tenir compte du fait qu"un caratère peut être mis en échappement prevc = substr(line, pos - 1, 1) quotec = substr(line, pos, 1) nextc = substr(line, pos + 1, 1) if (prevc == echar) { # qchar en échappement tmpl = substr(line, 1, pos - 2) resl = resl tmpl quotec line = substr(line, pos + 1) continue } tmpl = substr(line, 1, pos - 1) if (nextc == colsep || nextc == "") { # fin de champ ou fin de ligne resl = resl tmpl destl[0] = substr(line, pos + 2) destl[1] = nextc == colsep return resl } else { # erreur de syntaxe: guillemet non mis en échappement # ignorer cette erreur et prendre le guillemet quand meme resl = resl tmpl quotec line = substr(line, pos + 1) } } else { # pas d"échappement pour qchar. il est éventuellement doublé tmpl = substr(line, 1, pos - 1) quotec = substr(line, pos, 1) nextc = substr(line, pos + 1, 1) if (nextc == colsep || nextc == "") { # fin de champ ou fin de ligne resl = resl tmpl destl[0] = substr(line, pos + 2) destl[1] = nextc == colsep return resl } else if (nextc == qchar) { # qchar en echappement resl = resl tmpl quotec line = substr(line, pos + 2) } else { # erreur de syntaxe: guillemet non mis en échappement # ignorer cette erreur et prendre le guillemet quand meme resl = resl tmpl quotec line = substr(line, pos + 1) } } } } function csv__parse_unquoted(line, destl, colsep, qchar, echar, pos) { pos = index(line, colsep) if (pos == 0) { destl[0] = "" destl[1] = 0 return line } else { destl[0] = substr(line, pos + 1) destl[1] = 1 return substr(line, 1, pos - 1) } } function csv__array_parse(fields, line, nbfields, colsep, qchar, echar, shouldparse, destl, i) { array_new(fields) array_new(destl) i = 1 shouldparse = 0 # shouldparse permet de gérer le cas où un champ vide est en fin de ligne. # en effet, après "," il faut toujours parser, même si line=="" while (shouldparse || line != "") { if (index(line, qchar) == 1) { value = csv__parse_quoted(line, destl, colsep, qchar, echar) line = destl[0] shouldparse = destl[1] } else { value = csv__parse_unquoted(line, destl, colsep, qchar, echar) line = destl[0] shouldparse = destl[1] } fields[i] = value i = i + 1 } if (nbfields) { nbfields = int(nbfields) i = array_len(fields) while (i < nbfields) { i++ fields[i] = "" } } return array_len(fields) } BEGIN { DEFAULT_COLSEP = "," DEFAULT_QCHAR = "\"" DEFAULT_ECHAR = "" } function array_parsecsv2(fields, line, nbfields, colsep, qchar, echar) { return csv__array_parse(fields, line, nbfields, colsep, qchar, echar) } function array_parsecsv(fields, line, nbfields, colsep, qchar, echar) { if (colsep == "") colsep = DEFAULT_COLSEP if (qchar == "") qchar = DEFAULT_QCHAR if (echar == "") echar = DEFAULT_ECHAR return csv__array_parse(fields, line, nbfields, colsep, qchar, echar) } function parsecsv(line, fields) { array_parsecsv(fields, line) array_getline(fields) return NF } function getlinecsv(file, fields) { if (file) { getline 1) line = line colsep if (qchar != "" && index(value, qchar) != 0) { if (echar != "") gsub(qchar, quote_subrepl(echar) "&", value); else gsub(qchar, "&&", value); } if (qchar != "" && (index(value, mvsep) != 0 || index(value, colsep) != 0 || index(value, qchar) != 0 || csv__should_quote(value))) { line = line qchar value qchar } else { line = line value } } return line } function array_formatcsv(fields) { return array_formatcsv2(fields, ",", ";", "\"", "") } function array_printcsv(fields, output) { printto(array_formatcsv(fields), output) } function get_formatcsv( fields) { array_fill(fields) return array_formatcsv(fields) } function formatcsv() { $0 = get_formatcsv() } function printcsv(output, fields) { array_fill(fields) array_printcsv(fields, output) } function array_findcsv(fields, input, field, value, nbfields, orig, found, i) { array_new(orig) array_fill(orig) array_new(fields) found = 0 while ((getline 0) { array_parsecsv(fields, $0, nbfields) if (fields[field] == value) { found = 1 break } } close(input) array_getline(orig) if (!found) { delete fields if (nbfields) { nbfields = int(nbfields) i = array_len(fields) while (i < nbfields) { i++ fields[i] = "" } } } return found }