386 lines
11 KiB
PHP
386 lines
11 KiB
PHP
|
<?php
|
||
|
namespace nur\mapper\fsv;
|
||
|
|
||
|
use nur\A;
|
||
|
use nur\b\date\Date;
|
||
|
use nur\b\IllegalAccessException;
|
||
|
use nur\b\ValueException;
|
||
|
use nur\data\types\md_utils;
|
||
|
use nur\data\types\Metadata;
|
||
|
use nur\func;
|
||
|
|
||
|
class FsvSchema {
|
||
|
const COLUMN_SCHEMA = [
|
||
|
"name" => ["string", null, "nom du champ", "required" => true],
|
||
|
"size" => ["int", null, "taille du champ", "required" => true],
|
||
|
"type" => ["?string", null, "type du champ: string, number, date"],
|
||
|
"precision" => ["?int", null, "précision pour un champ de type number"],
|
||
|
"format" => ["?string", null, "format à appliquer avant écriture"],
|
||
|
# ces champs sont en principe calculés automatiquement:
|
||
|
"position" => ["?int", null, "position du premier caractère du champ (commence à 1)"],
|
||
|
"index" => ["?int", null, "index du premier caractère du champ (commence à 1)"],
|
||
|
];
|
||
|
|
||
|
/** @var Metadata */
|
||
|
private static $column_md;
|
||
|
private static function column_md(): Metadata {
|
||
|
return md_utils::ensure_md(self::$column_md, self::COLUMN_SCHEMA);
|
||
|
}
|
||
|
|
||
|
protected function INPUT_ENCODING(): ?string {
|
||
|
return static::INPUT_ENCODING;
|
||
|
} const INPUT_ENCODING = "latin1";
|
||
|
|
||
|
protected function DATA_ENCODING(): ?string {
|
||
|
return static::DATA_ENCODING;
|
||
|
} const DATA_ENCODING = "utf-8";
|
||
|
|
||
|
protected function OUTPUT_ENCODING(): ?string {
|
||
|
return static::OUTPUT_ENCODING;
|
||
|
} const OUTPUT_ENCODING = "latin1//TRANSLIT//IGNORE";
|
||
|
|
||
|
protected function FSV_SCHEMA(): ?array {
|
||
|
return self::FSV_SCHEMA;
|
||
|
} const FSV_SCHEMA = null;
|
||
|
|
||
|
function __construct(?array $fsvSchema=null) {
|
||
|
$this->setFsvSchema($fsvSchema);
|
||
|
$this->setInputEncoding(null);
|
||
|
$this->setDataEncoding(null);
|
||
|
$this->setOutputEncoding(null);
|
||
|
}
|
||
|
|
||
|
protected $inputEncoding;
|
||
|
|
||
|
function setInputEncoding(?string $inputEncoding): self {
|
||
|
if ($inputEncoding === null) $inputEncoding = $this->INPUT_ENCODING();
|
||
|
$this->inputEncoding = $inputEncoding;
|
||
|
return $this;
|
||
|
}
|
||
|
|
||
|
protected $dataEncoding;
|
||
|
|
||
|
function setDataEncoding(?string $dataEncoding): self {
|
||
|
if ($dataEncoding === null) $dataEncoding = $this->DATA_ENCODING();
|
||
|
$this->dataEncoding = $dataEncoding;
|
||
|
return $this;
|
||
|
}
|
||
|
|
||
|
protected $outputEncoding;
|
||
|
|
||
|
function setOutputEncoding(?string $outputEncoding): self {
|
||
|
if ($outputEncoding === null) $outputEncoding = $this->OUTPUT_ENCODING();
|
||
|
$this->outputEncoding = $outputEncoding;
|
||
|
return $this;
|
||
|
}
|
||
|
|
||
|
protected function iconvInput(array $row): array {
|
||
|
$inputEncoding = $this->inputEncoding;
|
||
|
$dataEncoding = $this->dataEncoding;
|
||
|
if ($inputEncoding !== null && $dataEncoding !== null) {
|
||
|
foreach ($row as &$col) {
|
||
|
if (is_string($col)) $col = iconv($inputEncoding, $dataEncoding, $col);
|
||
|
}; unset($col);
|
||
|
}
|
||
|
return $row;
|
||
|
}
|
||
|
|
||
|
protected function iconvOutput(string $line): string {
|
||
|
$dataEncoding = $this->dataEncoding;
|
||
|
$outputEncoding = $this->outputEncoding;
|
||
|
if ($outputEncoding !== null && $dataEncoding !== null) {
|
||
|
$line = iconv($dataEncoding, $outputEncoding, $line);
|
||
|
}
|
||
|
return $line;
|
||
|
}
|
||
|
|
||
|
/** @var bool */
|
||
|
protected $shouldMapEmpty = false;
|
||
|
|
||
|
protected $mapEmpty = null;
|
||
|
|
||
|
function setMapEmpty($mapEmpty=null): void {
|
||
|
$this->shouldMapEmpty = true;
|
||
|
$this->mapEmpty = $mapEmpty;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @var bool faut-il générer les en-têtes et les données sous forme de
|
||
|
* tableaux séquentiels?
|
||
|
*/
|
||
|
protected $outputSeq;
|
||
|
|
||
|
/** @var bool faut-il afficher les en-têtes en sortie? */
|
||
|
protected $outputKeys;
|
||
|
|
||
|
function setOutputSeq(bool $outputSeq=true): void {
|
||
|
$this->outputSeq = $outputSeq;
|
||
|
$this->outputKeys = true;
|
||
|
}
|
||
|
|
||
|
/** @var array */
|
||
|
protected $fsvSchema;
|
||
|
|
||
|
function getFsvSchema(): array {
|
||
|
return $this->fsvSchema;
|
||
|
}
|
||
|
|
||
|
/** @var array liste des noms des champs */
|
||
|
protected $fsvColumns;
|
||
|
|
||
|
function getFsvColumns(): array {
|
||
|
return $this->fsvColumns;
|
||
|
}
|
||
|
|
||
|
function setFsvSchema(?array $fsvSchema): self {
|
||
|
if ($fsvSchema === null) $fsvSchema = $this->FSV_SCHEMA();
|
||
|
if ($fsvSchema === null) return $this;
|
||
|
$column_md = self::column_md();
|
||
|
$index = 0;
|
||
|
$position = 1;
|
||
|
$columns = [];
|
||
|
foreach ($fsvSchema as $key => &$sfield) {
|
||
|
if ($key === $index) {
|
||
|
# séquentiel
|
||
|
$index++;
|
||
|
$column_md->ensureSchema($sfield);
|
||
|
} else {
|
||
|
# associatif
|
||
|
$column_md->ensureSchema($sfield, $key);
|
||
|
}
|
||
|
A::replace_n($sfield, "type", "string");
|
||
|
A::replace_n($sfield, "precision", 0);
|
||
|
A::replace_n($sfield, "position", $position);
|
||
|
A::replace_n($sfield, "index", $position - 1);
|
||
|
$this->validateSfield($sfield);
|
||
|
$position += $sfield["size"];
|
||
|
$columns[] = $sfield["name"];
|
||
|
}; unset($sfield);
|
||
|
$this->fsvSchema = $fsvSchema;
|
||
|
$this->fsvColumns = $columns;
|
||
|
return $this;
|
||
|
}
|
||
|
|
||
|
protected function validateSfield($sfield): void {
|
||
|
$type = $sfield["type"];
|
||
|
switch ($type) {
|
||
|
case "string":
|
||
|
case "number":
|
||
|
break;
|
||
|
case "date":
|
||
|
$size = $sfield["size"];
|
||
|
#XXX tenir compte du format
|
||
|
if ($size != 6 && $size != 8) {
|
||
|
throw new ValueException("date type require size=6 or size=8");
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
throw ValueException::invalid_value($type, "type");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function checkFsvSchema(bool $throw=true): bool {
|
||
|
if ($this->fsvSchema !== null) return true;
|
||
|
elseif ($throw) throw new ValueException("a schema is required");
|
||
|
else return false;
|
||
|
}
|
||
|
|
||
|
#############################################################################
|
||
|
|
||
|
function _outputKeys(bool $reset=true): bool {
|
||
|
$outputKeys = $this->outputSeq && $this->outputKeys;
|
||
|
if ($reset) $this->outputKeys = false;
|
||
|
return $outputKeys;
|
||
|
}
|
||
|
|
||
|
function _getKeys(): array {
|
||
|
return $this->fsvColumns;
|
||
|
}
|
||
|
|
||
|
function parseRow(string $line, bool $iconv=true): array {
|
||
|
$this->checkFsvSchema();
|
||
|
$outputSeq = $this->outputSeq;
|
||
|
$row = [];
|
||
|
$length = strlen($line);
|
||
|
foreach ($this->fsvSchema as $sfield) {
|
||
|
[
|
||
|
"name" => $name,
|
||
|
"index" => $index,
|
||
|
"size" => $size,
|
||
|
"type" => $type,
|
||
|
] = $sfield;
|
||
|
if ($index >= $length) {
|
||
|
if ($outputSeq) $row[] = false;
|
||
|
else $row[$name] = false;
|
||
|
} else {
|
||
|
$value = substr($line, $index, $size);
|
||
|
$blank = str_pad("", $size);
|
||
|
switch ($type) {
|
||
|
case "string":
|
||
|
$value = $this->parseString($value, $sfield);
|
||
|
break;
|
||
|
case "number":
|
||
|
if ($value === $blank) $value = false;
|
||
|
else $value = $this->parseNumber($value, $sfield);
|
||
|
break;
|
||
|
case "date":
|
||
|
if ($value === $blank) $value = false;
|
||
|
else $value = $this->parseDate($value, $sfield);
|
||
|
break;
|
||
|
default:
|
||
|
throw IllegalAccessException::unexpected_state();
|
||
|
}
|
||
|
if ($value === "" && $this->shouldMapEmpty) $value = $this->mapEmpty;
|
||
|
if ($outputSeq) $row[] = $value;
|
||
|
else $row[$name] = $value;
|
||
|
}
|
||
|
}
|
||
|
if ($iconv) $row = $this->iconvInput($row);
|
||
|
return $row;
|
||
|
}
|
||
|
|
||
|
protected function parseString(string $value, array $sfield): string {
|
||
|
return rtrim($value);
|
||
|
}
|
||
|
|
||
|
protected function parseNumber(string $value, array $sfield) {
|
||
|
$precision = $sfield["precision"];
|
||
|
if ($precision == 0) {
|
||
|
$value = intval($value);
|
||
|
} else {
|
||
|
$value = doubleval($value) / (10**$precision);
|
||
|
}
|
||
|
return $value;
|
||
|
}
|
||
|
|
||
|
protected function parseDate(string $value, array $sfield) {
|
||
|
$size = $sfield["size"];
|
||
|
$dd = substr($value, 0, 2);
|
||
|
$mm = substr($value, 2, 2);
|
||
|
if ($size == 8) {
|
||
|
$yyyy = substr($value, 4, 4);
|
||
|
} elseif ($size == 6) {
|
||
|
$yy = substr($value, 4, 2);
|
||
|
$yyyy = Date::fix_any_year($yy);
|
||
|
} else {
|
||
|
throw IllegalAccessException::unexpected_state();
|
||
|
}
|
||
|
return "$dd/$mm/$yyyy";
|
||
|
}
|
||
|
|
||
|
#############################################################################
|
||
|
|
||
|
protected static function ensure_size(string $value, int $size): string {
|
||
|
$length = mb_strlen($value);
|
||
|
if ($length < $size) {
|
||
|
while ($length < $size) {
|
||
|
$value .= " ";
|
||
|
$length = mb_strlen($value);
|
||
|
}
|
||
|
} elseif ($length > $size) {
|
||
|
$value = mb_substr($value, 0, $size);
|
||
|
}
|
||
|
return $value;
|
||
|
}
|
||
|
|
||
|
protected static function invalid_size(string $value, int $actual_size, array $sfield): ValueException {
|
||
|
["name" => $name, "size" => $size, "format" => $format] = $sfield;
|
||
|
return new ValueException("field=$name with format $format, value=|$value|, expected size=$size, actual size=$actual_size");
|
||
|
}
|
||
|
|
||
|
function formatLine(array $row, bool $iconv=true): string {
|
||
|
$this->checkFsvSchema();
|
||
|
$line = [];
|
||
|
foreach ($this->fsvSchema as $sfield) {
|
||
|
[
|
||
|
"name" => $name,
|
||
|
"size" => $size,
|
||
|
"type" => $type,
|
||
|
] = $sfield;
|
||
|
$value = A::get($row, $name, "");
|
||
|
if ($value === false) {
|
||
|
$value = self::ensure_size("", $size);
|
||
|
} else {
|
||
|
switch ($type) {
|
||
|
case "string":
|
||
|
$value = $this->formatString($value, $sfield);
|
||
|
break;
|
||
|
case "number":
|
||
|
$value = $this->formatNumber($value, $sfield);
|
||
|
break;
|
||
|
case "date":
|
||
|
$value = $this->formatDate($value, $sfield);
|
||
|
break;
|
||
|
default:
|
||
|
throw IllegalAccessException::unexpected_state();
|
||
|
}
|
||
|
}
|
||
|
$line[] = $value;
|
||
|
}
|
||
|
$line = implode("", $line);
|
||
|
if ($iconv) $line = $this->iconvOutput($line);
|
||
|
return $line;
|
||
|
}
|
||
|
|
||
|
function formatString($value, $sfield): string {
|
||
|
["size" => $size, "format" => $format] = $sfield;
|
||
|
$value = strval($value);
|
||
|
if ($format !== null) {
|
||
|
$func = [$this, "stringFormat_$format"];
|
||
|
$value = func::call($func, $value, $sfield);
|
||
|
$actualSize = mb_strlen($value);
|
||
|
if ($actualSize != $size) {
|
||
|
throw self::invalid_size($value, $actualSize, $sfield);
|
||
|
}
|
||
|
} else {
|
||
|
$value = self::ensure_size($value, $size);
|
||
|
}
|
||
|
return $value;
|
||
|
}
|
||
|
|
||
|
function stringFormat_upper(string $value): string {
|
||
|
return mb_strtoupper($value);
|
||
|
}
|
||
|
|
||
|
function stringFormat_lower(string $value): string {
|
||
|
return mb_strtolower($value);
|
||
|
}
|
||
|
|
||
|
function formatNumber($value, $sfield): string {
|
||
|
["size" => $size, "precision" => $precision, "format" => $format] = $sfield;
|
||
|
if ($format !== null) {
|
||
|
$value = sprintf($format, $value);
|
||
|
$actualSize = strlen($value);
|
||
|
if ($actualSize != $size) {
|
||
|
throw self::invalid_size($value, $actualSize, $sfield);
|
||
|
}
|
||
|
} elseif ($precision == 0) {
|
||
|
$value = sprintf("%0${size}u", $value);
|
||
|
} else {
|
||
|
$size++;
|
||
|
$value = sprintf("%0${size}.${precision}F", $value);
|
||
|
$value = str_replace(".", "", $value);
|
||
|
}
|
||
|
return $value;
|
||
|
}
|
||
|
|
||
|
function formatDate($value, $sfield): string {
|
||
|
$date = new Date($value);
|
||
|
["size" => $size, "format" => $format] = $sfield;
|
||
|
if ($format !== null) {
|
||
|
$value = $date->format($format);
|
||
|
$actualSize = strlen($value);
|
||
|
if ($actualSize != $size) {
|
||
|
throw self::invalid_size($value, $actualSize, $sfield);
|
||
|
}
|
||
|
} elseif ($size == 6) {
|
||
|
$value = $date->format("dmy");
|
||
|
} elseif ($size == 8) {
|
||
|
$value = $date->format("dmY");
|
||
|
} else {
|
||
|
throw IllegalAccessException::unexpected_state();
|
||
|
}
|
||
|
return $value;
|
||
|
}
|
||
|
}
|