202 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Awk
		
	
	
	
	
	
			
		
		
	
	
			202 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Awk
		
	
	
	
	
	
| # -*- coding: utf-8 mode: awk -*- vim:sw=4:sts=4:et:ai:si:sta:fenc=utf-8
 | |
| @include "base.core"
 | |
| @include "base.array"
 | |
| 
 | |
| function csv__parse_quoted(line, destl, colsep, qchar, echar,       pos, tmpl, nextc, resl) {
 | |
|   line = substr(line, 2)
 | |
|   resl = ""
 | |
|   while (1) {
 | |
|     pos = index(line, qchar)
 | |
|     if (pos == 0) {
 | |
|       # chaine mal terminee
 | |
|       resl = resl line
 | |
|       destl[0] = ""
 | |
|       destl[1] = 0
 | |
|       return resl
 | |
|     }
 | |
|     if (echar != "" && pos > 1) {
 | |
|       # tenir compte du fait qu"un caratère peut être mis en échappement
 | |
|       prevc = substr(line, pos - 1, 1)
 | |
|       quotec = substr(line, pos, 1)
 | |
|       nextc = substr(line, pos + 1, 1)
 | |
|       if (prevc == echar) {
 | |
|         # qchar en échappement
 | |
|         tmpl = substr(line, 1, pos - 2)
 | |
|         resl = resl tmpl quotec
 | |
|         line = substr(line, pos + 1)
 | |
|         continue
 | |
|       }
 | |
|       tmpl = substr(line, 1, pos - 1)
 | |
|       if (nextc == colsep || nextc == "") {
 | |
|         # fin de champ ou fin de ligne
 | |
|         resl = resl tmpl
 | |
|         destl[0] = substr(line, pos + 2)
 | |
|         destl[1] = nextc == colsep
 | |
|         return resl
 | |
|       } else {
 | |
|         # erreur de syntaxe: guillemet non mis en échappement
 | |
|         # ignorer cette erreur et prendre le guillemet quand meme
 | |
|         resl = resl tmpl quotec
 | |
|         line = substr(line, pos + 1)
 | |
|       }
 | |
|     } else {
 | |
|       # pas d"échappement pour qchar. il est éventuellement doublé
 | |
|       tmpl = substr(line, 1, pos - 1)
 | |
|       quotec = substr(line, pos, 1)
 | |
|       nextc = substr(line, pos + 1, 1)
 | |
|       if (nextc == colsep || nextc == "") {
 | |
|         # fin de champ ou fin de ligne
 | |
|         resl = resl tmpl
 | |
|         destl[0] = substr(line, pos + 2)
 | |
|         destl[1] = nextc == colsep
 | |
|         return resl
 | |
|       } else if (nextc == qchar) {
 | |
|         # qchar en echappement
 | |
|         resl = resl tmpl quotec
 | |
|         line = substr(line, pos + 2)
 | |
|       } else {
 | |
|         # erreur de syntaxe: guillemet non mis en échappement
 | |
|         # ignorer cette erreur et prendre le guillemet quand meme
 | |
|         resl = resl tmpl quotec
 | |
|         line = substr(line, pos + 1)
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| function csv__parse_unquoted(line, destl, colsep, qchar, echar,     pos) {
 | |
|   pos = index(line, colsep)
 | |
|   if (pos == 0) {
 | |
|     destl[0] = ""
 | |
|     destl[1] = 0
 | |
|     return line
 | |
|   } else {
 | |
|     destl[0] = substr(line, pos + 1)
 | |
|     destl[1] = 1
 | |
|     return substr(line, 1, pos - 1)
 | |
|   }
 | |
| }
 | |
| function csv__array_parse(fields, line, nbfields, colsep, qchar, echar,     shouldparse, destl, i) {
 | |
|   array_new(fields)
 | |
|   array_new(destl)
 | |
|   i = 1
 | |
|   shouldparse = 0
 | |
|   # shouldparse permet de gérer le cas où un champ vide est en fin de ligne.
 | |
|   # en effet, après "," il faut toujours parser, même si line==""
 | |
|   while (shouldparse || line != "") {
 | |
|     if (index(line, qchar) == 1) {
 | |
|       value = csv__parse_quoted(line, destl, colsep, qchar, echar)
 | |
|       line = destl[0]
 | |
|       shouldparse = destl[1]
 | |
|     } else {
 | |
|       value = csv__parse_unquoted(line, destl, colsep, qchar, echar)
 | |
|       line = destl[0]
 | |
|       shouldparse = destl[1]
 | |
|     }
 | |
|     fields[i] = value
 | |
|     i = i + 1
 | |
|   }
 | |
|   if (nbfields) {
 | |
|     nbfields = int(nbfields)
 | |
|     i = array_len(fields)
 | |
|     while (i < nbfields) {
 | |
|       i++
 | |
|       fields[i] = ""
 | |
|     }
 | |
|   }
 | |
|   return array_len(fields)
 | |
| }
 | |
| BEGIN {
 | |
|   DEFAULT_COLSEP = ","
 | |
|   DEFAULT_QCHAR = "\""
 | |
|   DEFAULT_ECHAR = ""
 | |
| }
 | |
| function array_parsecsv2(fields, line, nbfields, colsep, qchar, echar) {
 | |
|   return csv__array_parse(fields, line, nbfields, colsep, qchar, echar)
 | |
| }
 | |
| function array_parsecsv(fields, line, nbfields, colsep, qchar, echar) {
 | |
|   if (colsep == "") colsep = DEFAULT_COLSEP
 | |
|   if (qchar == "") qchar = DEFAULT_QCHAR
 | |
|   if (echar == "") echar = DEFAULT_ECHAR
 | |
|   return csv__array_parse(fields, line, nbfields, colsep, qchar, echar)
 | |
| }
 | |
| function parsecsv(line,             fields) {
 | |
|   array_parsecsv(fields, line)
 | |
|   array_getline(fields)
 | |
|   return NF
 | |
| }
 | |
| function getlinecsv(file,          fields) {
 | |
|   if (file) {
 | |
|     getline <file
 | |
|   } else {
 | |
|     getline
 | |
|   }
 | |
|   return parsecsv($0)
 | |
| }
 | |
| function csv__should_quote(s) {
 | |
|   if (s ~ /^[[:blank:][:cntrl:][:space:]]/) return 1
 | |
|   if (s ~ /[[:blank:][:cntrl:][:space:]]$/) return 1
 | |
|   return 0
 | |
| }
 | |
| function array_formatcsv2(fields, colsep, mvsep, qchar, echar,      count, indices, line, i, value) {
 | |
|   line = ""
 | |
|   count = mkindices(fields, indices)
 | |
|   for (i = 1; i <= count; i++) {
 | |
|     value = fields[indices[i]]
 | |
|     if (i > 1) line = line colsep
 | |
|     if (qchar != "" && index(value, qchar) != 0) {
 | |
|       if (echar != "") gsub(qchar, quote_subrepl(echar) "&", value);
 | |
|       else gsub(qchar, "&&", value);
 | |
|     }
 | |
|     if (qchar != "" && (index(value, mvsep) != 0 || index(value, colsep) != 0 || index(value, qchar) != 0 || csv__should_quote(value))) {
 | |
|       line = line qchar value qchar
 | |
|     } else {
 | |
|       line = line value
 | |
|     }
 | |
|   }
 | |
|   return line
 | |
| }
 | |
| function array_formatcsv(fields) {
 | |
|   return array_formatcsv2(fields, ",", ";", "\"", "")
 | |
| }
 | |
| function array_printcsv(fields, output) {
 | |
|   printto(array_formatcsv(fields), output)
 | |
| }
 | |
| function get_formatcsv(                 fields) {
 | |
|   array_fill(fields)
 | |
|   return array_formatcsv(fields)
 | |
| }
 | |
| function formatcsv() {
 | |
|   $0 = get_formatcsv()
 | |
| }
 | |
| function printcsv(output,           fields) {
 | |
|   array_fill(fields)
 | |
|   array_printcsv(fields, output)
 | |
| }
 | |
| function array_findcsv(fields, input, field, value, nbfields,          orig, found, i) {
 | |
|   array_new(orig)
 | |
|   array_fill(orig)
 | |
|   array_new(fields)
 | |
|   found = 0
 | |
|   while ((getline <input) > 0) {
 | |
|     array_parsecsv(fields, $0, nbfields)
 | |
|     if (fields[field] == value) {
 | |
|       found = 1
 | |
|       break
 | |
|     }
 | |
|   }
 | |
|   close(input)
 | |
|   array_getline(orig)
 | |
|   if (!found) {
 | |
|     delete fields
 | |
|     if (nbfields) {
 | |
|       nbfields = int(nbfields)
 | |
|       i = array_len(fields)
 | |
|       while (i < nbfields) {
 | |
|         i++
 | |
|         fields[i] = ""
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   return found
 | |
| }
 |