possibilité de spécifier les types des champs pour mysqlloadcsv

This commit is contained in:
Jephté Clain 2015-05-19 18:24:41 +04:00
parent b2c9f81545
commit caeb06ee4a
1 changed files with 112 additions and 25 deletions

View File

@ -95,6 +95,11 @@ OPTIONS
-z, --null-is-empty
Avec les options -I et -U, considérer que NULL est représenté par la
chaine vide. Cette option est équivalente à -Z ''
-t, --types [DEFAULT_TYPE,]FIELD:TYPE,...
Spécifier pour chaque champ mentionné le type de donnée à forcer. Le
type 'auto' signifie que le type est autodétecté. C'est la valeur par
défaut. Les autres types valides sont 'str', 'int' et 'date'
Cette option est ignorée avec l'option -L
-A, --analyse
Analyser les données et afficher une requête pour créer une table qui
pourrait contenir ces données.
@ -103,6 +108,17 @@ OPTIONS
APRES cette option"
}
function norm_type() {
case "$1" in
string|str|s|varchar) echo varchar;;
integer|int|i) echo integer;;
#datetime|dt) echo datetime;;
date|d) echo date;;
auto|a) echo auto;;
*) echo "$1";;
esac
}
__AWK_MYSQLFUNCS='
function is_null(value) {
return value == null_value
@ -127,11 +143,23 @@ function is_date(value) {
}
#function is_datetime(value) {
#}
function format_sqlvalue(value) {
if (is_null(value)) {
value = "NULL"
} else if (is_integer(value)) {
} else if (is_date_dmy(value)) {
function format_string(value) {
gsub(/'\''/, "'\'\''", value)
value = "'\''" value "'\''"
return value
}
function format_integer(value) {
if (is_integer(value)) {
if (value != "0") {
gsub(/^0+/, "", value)
}
} else {
value = format_string(value)
}
return value
}
function format_date(value) {
if (is_date_dmy(value)) {
value = "str_to_date('\''" value "'\'', '\''%d/%m/%y'\'')"
} else if (is_date_dmY(value)) {
value = "str_to_date('\''" value "'\'', '\''%d/%m/%Y'\'')"
@ -140,8 +168,30 @@ function format_sqlvalue(value) {
} else if (is_date_Ymd2(value)) {
value = "str_to_date('\''" value "'\'', '\''%Y-%m-%d'\'')"
} else {
gsub(/'\''/, "'\'\''", value)
value = "'\''" value "'\''"
value = format_string(value)
}
return value
}
function format_sqlvalue(value, type) {
if (type == "varchar" || type == "string" || type == "str" || type == "s") {
value = format_string(value)
} else if (type == "integer" || type == "int" || type == "i") {
value = format_integer(value)
} else if (type == "date" || type == "d") {
value = format_date(value)
#} else if (type == "datetime" || type == "dt") {
# value = format_datetime(value)
} else {
# par defaut, autodétecter
if (is_null(value)) {
value = "NULL"
} else if (is_integer(value)) {
value = format_integer(value)
} else if (is_date(value)) {
value = format_date(value)
} else {
value = format_string(value)
}
}
return value
}
@ -161,6 +211,7 @@ fake=
method=load
update_key=
null_value='\N'
fieldtypes=()
parse_opts "${PRETTYOPTS[@]}" \
--help '$exit_with display_help' \
-h:,-H:,--host: host= \
@ -179,6 +230,7 @@ parse_opts "${PRETTYOPTS[@]}" \
-U:,-k:,--update-data: '$method=update; set@ update_key' \
-Z:,--null-value: null_value= \
-z,--null-is-empty null_value= \
-t:,--types: fieldtypes \
-A,--analyse '$method=analyse; fake=1' \
@ args -- "$@" && set -- "${args[@]}" || die "$args"
@ -236,6 +288,34 @@ else
cfields="$(array_join fields ,)"
fi
# calculer les types à utiliser pour chaque champ
types=()
for field in "${fields[@]}"; do
array_add types ""
done
default_type=auto
array_fix_paths fieldtypes ,
for ft in "${fieldtypes[@]}"; do
splitpair "$ft" f t
if [ -n "$t" ]; then
let i=0
for field in "${fields[@]}"; do
if [ "$field" == "$f" ]; then
types[$i]="$(norm_type "$t")"
break
fi
let i=$i+1
done
else
default_type="$(norm_type "$f")"
fi
done
let i=0
for type in "${types[@]}"; do
[ -n "$type" ] || types[$i]="$default_type"
let i=$i+1
done
mysqlargs=(
${host:+-h "$host"} ${port:+-P "$port"}
${user:+-u "$user"} ${database:+-D "$database"}
@ -271,7 +351,7 @@ elif [ "$method" == insert ]; then
ac_set_tmpfile inserts
[ -n "$truncate" ] && echo "$truncate" >>"$inserts"
awkcsv <"$input" >>"$inserts" -s "$skip_lines" -v table="$table" \
-v null_value="$null_value" -a "$__AWK_MYSQLFUNCS"'{
-v null_value="$null_value" -v types[@] -a "$__AWK_MYSQLFUNCS"'{
count = length(ORIGHEADERS)
fields = ""
for (i = 1; i <= count; i++) {
@ -281,7 +361,7 @@ elif [ "$method" == insert ]; then
values = ""
for (i = 1; i <= count; i++) {
if (i > 1) values = values ", "
values = values format_sqlvalue($i)
values = values format_sqlvalue($i, types[i])
}
print "insert into `" table "` (" fields ") values (" values ");"
}
@ -305,13 +385,13 @@ elif [ "$method" == update ]; then
[ -n "$truncate" ] && echo "$truncate" >>"$updates"
awkcsv <"$input" >>"$updates" -s "$skip_lines" \
-v table="$table" -v update_key="$update_key" \
-v null_value="$null_value" -a "$__AWK_MYSQLFUNCS"'{
-v null_value="$null_value" -v types[@] -a "$__AWK_MYSQLFUNCS"'{
set_values = ""
cond = ""
count = length(ORIGHEADERS)
for (i = 1; i <= count; i++) {
field = ORIGHEADERS[i]
value = format_sqlvalue($i)
value = format_sqlvalue($i, types[i])
if (field == update_key) {
cond = "`" field "`=" value
} else {
@ -337,21 +417,27 @@ END {
elif [ "$method" == analyse ]; then
ac_set_tmpfile create
awkcsv <"$input" >"$create" -s "$skip_lines" -v table="$table" \
-v null_value="$null_value" -a "$__AWK_MYSQLFUNCS"'
-v null_value="$null_value" -v types[@] -a "$__AWK_MYSQLFUNCS"'
function set_integer(i) {
if (COL_TYPES[i] == "") {
if (COL_TYPES[i] == "" || COL_TYPES[i] == "integer") {
COL_TYPES[i] = "integer"
return 1
}
return 0
}
function set_datetime(i) {
if (COL_TYPES[i] == "") {
if (COL_TYPES[i] == "" || COL_TYPES[i] == "datetime") {
COL_TYPES[i] = "datetime"
return 1
}
return 0
}
function set_date(i) {
if (COL_TYPES[i] == "") {
if (COL_TYPES[i] == "" || COL_TYPES[i] == "date") {
COL_TYPES[i] = "date"
return 1
}
return 0
}
function set_varchar(i, col_size) {
COL_TYPES[i] = "varchar"
@ -360,25 +446,26 @@ function set_varchar(i, col_size) {
} else if (col_size > COL_SIZES[i]) {
COL_SIZES[i] = col_size
}
return 1
}
{
if (do_once("init")) {
count = length(ORIGHEADERS)
array_newsize(COL_TYPES, count)
array_newsize(COL_SIZES, count)
for (i = 1; i <= count; i++) {
if (types[i] != "auto") {
COL_TYPES[i] = types[i]
}
}
}
for (i = 1; i <= count; i++) {
if (is_null($i)) {
} else if (is_integer($i)) {
set_integer(i)
# } else if (is_datetime($i)) {
# set_datetime(i)
} else if (is_date($i)) {
set_date(i)
} else {
if (is_null($i)) continue
if (is_integer($i) && set_integer(i)) continue
# if (is_datetime($i) && set_datetime(i)) continue
if (is_date($i) && set_date(i)) continue
set_varchar(i, length($i))
}
}
}
END {
print "drop table if exists `" table "`;"