possibilité de spécifier les types des champs pour mysqlloadcsv

This commit is contained in:
Jephté Clain 2015-05-19 18:24:41 +04:00
parent b2c9f81545
commit caeb06ee4a
1 changed files with 112 additions and 25 deletions

View File

@ -95,6 +95,11 @@ OPTIONS
-z, --null-is-empty -z, --null-is-empty
Avec les options -I et -U, considérer que NULL est représenté par la Avec les options -I et -U, considérer que NULL est représenté par la
chaine vide. Cette option est équivalente à -Z '' chaine vide. Cette option est équivalente à -Z ''
-t, --types [DEFAULT_TYPE,]FIELD:TYPE,...
Spécifier pour chaque champ mentionné le type de donnée à forcer. Le
type 'auto' signifie que le type est autodétecté. C'est la valeur par
défaut. Les autres types valides sont 'str', 'int' et 'date'
Cette option est ignorée avec l'option -L
-A, --analyse -A, --analyse
Analyser les données et afficher une requête pour créer une table qui Analyser les données et afficher une requête pour créer une table qui
pourrait contenir ces données. pourrait contenir ces données.
@ -103,6 +108,17 @@ OPTIONS
APRES cette option" APRES cette option"
} }
function norm_type() {
case "$1" in
string|str|s|varchar) echo varchar;;
integer|int|i) echo integer;;
#datetime|dt) echo datetime;;
date|d) echo date;;
auto|a) echo auto;;
*) echo "$1";;
esac
}
__AWK_MYSQLFUNCS=' __AWK_MYSQLFUNCS='
function is_null(value) { function is_null(value) {
return value == null_value return value == null_value
@ -127,11 +143,23 @@ function is_date(value) {
} }
#function is_datetime(value) { #function is_datetime(value) {
#} #}
function format_sqlvalue(value) { function format_string(value) {
if (is_null(value)) { gsub(/'\''/, "'\'\''", value)
value = "NULL" value = "'\''" value "'\''"
} else if (is_integer(value)) { return value
} else if (is_date_dmy(value)) { }
function format_integer(value) {
if (is_integer(value)) {
if (value != "0") {
gsub(/^0+/, "", value)
}
} else {
value = format_string(value)
}
return value
}
function format_date(value) {
if (is_date_dmy(value)) {
value = "str_to_date('\''" value "'\'', '\''%d/%m/%y'\'')" value = "str_to_date('\''" value "'\'', '\''%d/%m/%y'\'')"
} else if (is_date_dmY(value)) { } else if (is_date_dmY(value)) {
value = "str_to_date('\''" value "'\'', '\''%d/%m/%Y'\'')" value = "str_to_date('\''" value "'\'', '\''%d/%m/%Y'\'')"
@ -140,8 +168,30 @@ function format_sqlvalue(value) {
} else if (is_date_Ymd2(value)) { } else if (is_date_Ymd2(value)) {
value = "str_to_date('\''" value "'\'', '\''%Y-%m-%d'\'')" value = "str_to_date('\''" value "'\'', '\''%Y-%m-%d'\'')"
} else { } else {
gsub(/'\''/, "'\'\''", value) value = format_string(value)
value = "'\''" value "'\''" }
return value
}
function format_sqlvalue(value, type) {
if (type == "varchar" || type == "string" || type == "str" || type == "s") {
value = format_string(value)
} else if (type == "integer" || type == "int" || type == "i") {
value = format_integer(value)
} else if (type == "date" || type == "d") {
value = format_date(value)
#} else if (type == "datetime" || type == "dt") {
# value = format_datetime(value)
} else {
# par defaut, autodétecter
if (is_null(value)) {
value = "NULL"
} else if (is_integer(value)) {
value = format_integer(value)
} else if (is_date(value)) {
value = format_date(value)
} else {
value = format_string(value)
}
} }
return value return value
} }
@ -161,6 +211,7 @@ fake=
method=load method=load
update_key= update_key=
null_value='\N' null_value='\N'
fieldtypes=()
parse_opts "${PRETTYOPTS[@]}" \ parse_opts "${PRETTYOPTS[@]}" \
--help '$exit_with display_help' \ --help '$exit_with display_help' \
-h:,-H:,--host: host= \ -h:,-H:,--host: host= \
@ -179,6 +230,7 @@ parse_opts "${PRETTYOPTS[@]}" \
-U:,-k:,--update-data: '$method=update; set@ update_key' \ -U:,-k:,--update-data: '$method=update; set@ update_key' \
-Z:,--null-value: null_value= \ -Z:,--null-value: null_value= \
-z,--null-is-empty null_value= \ -z,--null-is-empty null_value= \
-t:,--types: fieldtypes \
-A,--analyse '$method=analyse; fake=1' \ -A,--analyse '$method=analyse; fake=1' \
@ args -- "$@" && set -- "${args[@]}" || die "$args" @ args -- "$@" && set -- "${args[@]}" || die "$args"
@ -236,6 +288,34 @@ else
cfields="$(array_join fields ,)" cfields="$(array_join fields ,)"
fi fi
# calculer les types à utiliser pour chaque champ
types=()
for field in "${fields[@]}"; do
array_add types ""
done
default_type=auto
array_fix_paths fieldtypes ,
for ft in "${fieldtypes[@]}"; do
splitpair "$ft" f t
if [ -n "$t" ]; then
let i=0
for field in "${fields[@]}"; do
if [ "$field" == "$f" ]; then
types[$i]="$(norm_type "$t")"
break
fi
let i=$i+1
done
else
default_type="$(norm_type "$f")"
fi
done
let i=0
for type in "${types[@]}"; do
[ -n "$type" ] || types[$i]="$default_type"
let i=$i+1
done
mysqlargs=( mysqlargs=(
${host:+-h "$host"} ${port:+-P "$port"} ${host:+-h "$host"} ${port:+-P "$port"}
${user:+-u "$user"} ${database:+-D "$database"} ${user:+-u "$user"} ${database:+-D "$database"}
@ -271,7 +351,7 @@ elif [ "$method" == insert ]; then
ac_set_tmpfile inserts ac_set_tmpfile inserts
[ -n "$truncate" ] && echo "$truncate" >>"$inserts" [ -n "$truncate" ] && echo "$truncate" >>"$inserts"
awkcsv <"$input" >>"$inserts" -s "$skip_lines" -v table="$table" \ awkcsv <"$input" >>"$inserts" -s "$skip_lines" -v table="$table" \
-v null_value="$null_value" -a "$__AWK_MYSQLFUNCS"'{ -v null_value="$null_value" -v types[@] -a "$__AWK_MYSQLFUNCS"'{
count = length(ORIGHEADERS) count = length(ORIGHEADERS)
fields = "" fields = ""
for (i = 1; i <= count; i++) { for (i = 1; i <= count; i++) {
@ -281,7 +361,7 @@ elif [ "$method" == insert ]; then
values = "" values = ""
for (i = 1; i <= count; i++) { for (i = 1; i <= count; i++) {
if (i > 1) values = values ", " if (i > 1) values = values ", "
values = values format_sqlvalue($i) values = values format_sqlvalue($i, types[i])
} }
print "insert into `" table "` (" fields ") values (" values ");" print "insert into `" table "` (" fields ") values (" values ");"
} }
@ -305,13 +385,13 @@ elif [ "$method" == update ]; then
[ -n "$truncate" ] && echo "$truncate" >>"$updates" [ -n "$truncate" ] && echo "$truncate" >>"$updates"
awkcsv <"$input" >>"$updates" -s "$skip_lines" \ awkcsv <"$input" >>"$updates" -s "$skip_lines" \
-v table="$table" -v update_key="$update_key" \ -v table="$table" -v update_key="$update_key" \
-v null_value="$null_value" -a "$__AWK_MYSQLFUNCS"'{ -v null_value="$null_value" -v types[@] -a "$__AWK_MYSQLFUNCS"'{
set_values = "" set_values = ""
cond = "" cond = ""
count = length(ORIGHEADERS) count = length(ORIGHEADERS)
for (i = 1; i <= count; i++) { for (i = 1; i <= count; i++) {
field = ORIGHEADERS[i] field = ORIGHEADERS[i]
value = format_sqlvalue($i) value = format_sqlvalue($i, types[i])
if (field == update_key) { if (field == update_key) {
cond = "`" field "`=" value cond = "`" field "`=" value
} else { } else {
@ -337,21 +417,27 @@ END {
elif [ "$method" == analyse ]; then elif [ "$method" == analyse ]; then
ac_set_tmpfile create ac_set_tmpfile create
awkcsv <"$input" >"$create" -s "$skip_lines" -v table="$table" \ awkcsv <"$input" >"$create" -s "$skip_lines" -v table="$table" \
-v null_value="$null_value" -a "$__AWK_MYSQLFUNCS"' -v null_value="$null_value" -v types[@] -a "$__AWK_MYSQLFUNCS"'
function set_integer(i) { function set_integer(i) {
if (COL_TYPES[i] == "") { if (COL_TYPES[i] == "" || COL_TYPES[i] == "integer") {
COL_TYPES[i] = "integer" COL_TYPES[i] = "integer"
return 1
} }
return 0
} }
function set_datetime(i) { function set_datetime(i) {
if (COL_TYPES[i] == "") { if (COL_TYPES[i] == "" || COL_TYPES[i] == "datetime") {
COL_TYPES[i] = "datetime" COL_TYPES[i] = "datetime"
return 1
} }
return 0
} }
function set_date(i) { function set_date(i) {
if (COL_TYPES[i] == "") { if (COL_TYPES[i] == "" || COL_TYPES[i] == "date") {
COL_TYPES[i] = "date" COL_TYPES[i] = "date"
return 1
} }
return 0
} }
function set_varchar(i, col_size) { function set_varchar(i, col_size) {
COL_TYPES[i] = "varchar" COL_TYPES[i] = "varchar"
@ -360,26 +446,27 @@ function set_varchar(i, col_size) {
} else if (col_size > COL_SIZES[i]) { } else if (col_size > COL_SIZES[i]) {
COL_SIZES[i] = col_size COL_SIZES[i] = col_size
} }
return 1
} }
{ {
if (do_once("init")) { if (do_once("init")) {
count = length(ORIGHEADERS) count = length(ORIGHEADERS)
array_newsize(COL_TYPES, count) array_newsize(COL_TYPES, count)
array_newsize(COL_SIZES, count) array_newsize(COL_SIZES, count)
for (i = 1; i <= count; i++) {
if (types[i] != "auto") {
COL_TYPES[i] = types[i]
}
}
} }
for (i = 1; i <= count; i++) { for (i = 1; i <= count; i++) {
if (is_null($i)) { if (is_null($i)) continue
} else if (is_integer($i)) { if (is_integer($i) && set_integer(i)) continue
set_integer(i) # if (is_datetime($i) && set_datetime(i)) continue
# } else if (is_datetime($i)) { if (is_date($i) && set_date(i)) continue
# set_datetime(i)
} else if (is_date($i)) {
set_date(i)
} else {
set_varchar(i, length($i)) set_varchar(i, length($i))
} }
} }
}
END { END {
print "drop table if exists `" table "`;" print "drop table if exists `" table "`;"
print "create table `" table "` (" print "create table `" table "` ("