From caeb06ee4a66c91bbd972cea2fc724932f9b0cdd Mon Sep 17 00:00:00 2001 From: Jephte Clain Date: Tue, 19 May 2015 18:24:41 +0400 Subject: [PATCH] =?UTF-8?q?possibilit=C3=A9=20de=20sp=C3=A9cifier=20les=20?= =?UTF-8?q?types=20des=20champs=20pour=20mysqlloadcsv?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mysqlloadcsv | 137 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 112 insertions(+), 25 deletions(-) diff --git a/mysqlloadcsv b/mysqlloadcsv index d4e5392..765c51a 100755 --- a/mysqlloadcsv +++ b/mysqlloadcsv @@ -95,6 +95,11 @@ OPTIONS -z, --null-is-empty Avec les options -I et -U, considérer que NULL est représenté par la chaine vide. Cette option est équivalente à -Z '' + -t, --types [DEFAULT_TYPE,]FIELD:TYPE,... + Spécifier pour chaque champ mentionné le type de donnée à forcer. Le + type 'auto' signifie que le type est autodétecté. C'est la valeur par + défaut. Les autres types valides sont 'str', 'int' et 'date' + Cette option est ignorée avec l'option -L -A, --analyse Analyser les données et afficher une requête pour créer une table qui pourrait contenir ces données. @@ -103,6 +108,17 @@ OPTIONS APRES cette option" } +function norm_type() { + case "$1" in + string|str|s|varchar) echo varchar;; + integer|int|i) echo integer;; + #datetime|dt) echo datetime;; + date|d) echo date;; + auto|a) echo auto;; + *) echo "$1";; + esac +} + __AWK_MYSQLFUNCS=' function is_null(value) { return value == null_value @@ -127,11 +143,23 @@ function is_date(value) { } #function is_datetime(value) { #} -function format_sqlvalue(value) { - if (is_null(value)) { - value = "NULL" - } else if (is_integer(value)) { - } else if (is_date_dmy(value)) { +function format_string(value) { + gsub(/'\''/, "'\'\''", value) + value = "'\''" value "'\''" + return value +} +function format_integer(value) { + if (is_integer(value)) { + if (value != "0") { + gsub(/^0+/, "", value) + } + } else { + value = format_string(value) + } + return value +} +function format_date(value) { + if (is_date_dmy(value)) { value = "str_to_date('\''" value "'\'', '\''%d/%m/%y'\'')" } else if (is_date_dmY(value)) { value = "str_to_date('\''" value "'\'', '\''%d/%m/%Y'\'')" @@ -140,8 +168,30 @@ function format_sqlvalue(value) { } else if (is_date_Ymd2(value)) { value = "str_to_date('\''" value "'\'', '\''%Y-%m-%d'\'')" } else { - gsub(/'\''/, "'\'\''", value) - value = "'\''" value "'\''" + value = format_string(value) + } + return value +} +function format_sqlvalue(value, type) { + if (type == "varchar" || type == "string" || type == "str" || type == "s") { + value = format_string(value) + } else if (type == "integer" || type == "int" || type == "i") { + value = format_integer(value) + } else if (type == "date" || type == "d") { + value = format_date(value) + #} else if (type == "datetime" || type == "dt") { + # value = format_datetime(value) + } else { + # par defaut, autodétecter + if (is_null(value)) { + value = "NULL" + } else if (is_integer(value)) { + value = format_integer(value) + } else if (is_date(value)) { + value = format_date(value) + } else { + value = format_string(value) + } } return value } @@ -161,6 +211,7 @@ fake= method=load update_key= null_value='\N' +fieldtypes=() parse_opts "${PRETTYOPTS[@]}" \ --help '$exit_with display_help' \ -h:,-H:,--host: host= \ @@ -179,6 +230,7 @@ parse_opts "${PRETTYOPTS[@]}" \ -U:,-k:,--update-data: '$method=update; set@ update_key' \ -Z:,--null-value: null_value= \ -z,--null-is-empty null_value= \ + -t:,--types: fieldtypes \ -A,--analyse '$method=analyse; fake=1' \ @ args -- "$@" && set -- "${args[@]}" || die "$args" @@ -236,6 +288,34 @@ else cfields="$(array_join fields ,)" fi +# calculer les types à utiliser pour chaque champ +types=() +for field in "${fields[@]}"; do + array_add types "" +done +default_type=auto +array_fix_paths fieldtypes , +for ft in "${fieldtypes[@]}"; do + splitpair "$ft" f t + if [ -n "$t" ]; then + let i=0 + for field in "${fields[@]}"; do + if [ "$field" == "$f" ]; then + types[$i]="$(norm_type "$t")" + break + fi + let i=$i+1 + done + else + default_type="$(norm_type "$f")" + fi +done +let i=0 +for type in "${types[@]}"; do + [ -n "$type" ] || types[$i]="$default_type" + let i=$i+1 +done + mysqlargs=( ${host:+-h "$host"} ${port:+-P "$port"} ${user:+-u "$user"} ${database:+-D "$database"} @@ -271,7 +351,7 @@ elif [ "$method" == insert ]; then ac_set_tmpfile inserts [ -n "$truncate" ] && echo "$truncate" >>"$inserts" awkcsv <"$input" >>"$inserts" -s "$skip_lines" -v table="$table" \ - -v null_value="$null_value" -a "$__AWK_MYSQLFUNCS"'{ + -v null_value="$null_value" -v types[@] -a "$__AWK_MYSQLFUNCS"'{ count = length(ORIGHEADERS) fields = "" for (i = 1; i <= count; i++) { @@ -281,7 +361,7 @@ elif [ "$method" == insert ]; then values = "" for (i = 1; i <= count; i++) { if (i > 1) values = values ", " - values = values format_sqlvalue($i) + values = values format_sqlvalue($i, types[i]) } print "insert into `" table "` (" fields ") values (" values ");" } @@ -305,13 +385,13 @@ elif [ "$method" == update ]; then [ -n "$truncate" ] && echo "$truncate" >>"$updates" awkcsv <"$input" >>"$updates" -s "$skip_lines" \ -v table="$table" -v update_key="$update_key" \ - -v null_value="$null_value" -a "$__AWK_MYSQLFUNCS"'{ + -v null_value="$null_value" -v types[@] -a "$__AWK_MYSQLFUNCS"'{ set_values = "" cond = "" count = length(ORIGHEADERS) for (i = 1; i <= count; i++) { field = ORIGHEADERS[i] - value = format_sqlvalue($i) + value = format_sqlvalue($i, types[i]) if (field == update_key) { cond = "`" field "`=" value } else { @@ -337,21 +417,27 @@ END { elif [ "$method" == analyse ]; then ac_set_tmpfile create awkcsv <"$input" >"$create" -s "$skip_lines" -v table="$table" \ - -v null_value="$null_value" -a "$__AWK_MYSQLFUNCS"' + -v null_value="$null_value" -v types[@] -a "$__AWK_MYSQLFUNCS"' function set_integer(i) { - if (COL_TYPES[i] == "") { + if (COL_TYPES[i] == "" || COL_TYPES[i] == "integer") { COL_TYPES[i] = "integer" + return 1 } + return 0 } function set_datetime(i) { - if (COL_TYPES[i] == "") { + if (COL_TYPES[i] == "" || COL_TYPES[i] == "datetime") { COL_TYPES[i] = "datetime" + return 1 } + return 0 } function set_date(i) { - if (COL_TYPES[i] == "") { + if (COL_TYPES[i] == "" || COL_TYPES[i] == "date") { COL_TYPES[i] = "date" + return 1 } + return 0 } function set_varchar(i, col_size) { COL_TYPES[i] = "varchar" @@ -360,24 +446,25 @@ function set_varchar(i, col_size) { } else if (col_size > COL_SIZES[i]) { COL_SIZES[i] = col_size } + return 1 } { if (do_once("init")) { count = length(ORIGHEADERS) array_newsize(COL_TYPES, count) array_newsize(COL_SIZES, count) + for (i = 1; i <= count; i++) { + if (types[i] != "auto") { + COL_TYPES[i] = types[i] + } + } } for (i = 1; i <= count; i++) { - if (is_null($i)) { - } else if (is_integer($i)) { - set_integer(i) -# } else if (is_datetime($i)) { -# set_datetime(i) - } else if (is_date($i)) { - set_date(i) - } else { - set_varchar(i, length($i)) - } + if (is_null($i)) continue + if (is_integer($i) && set_integer(i)) continue +# if (is_datetime($i) && set_datetime(i)) continue + if (is_date($i) && set_date(i)) continue + set_varchar(i, length($i)) } } END {