nur-sery/nur_src/io/fsv/FsvSchema.php

405 lines
12 KiB
PHP
Raw Permalink Normal View History

2023-12-03 22:10:18 +04:00
<?php
namespace nur\io\fsv;
use nur\A;
use nur\b\date\Date;
use nur\b\IllegalAccessException;
use nur\b\ValueException;
use nur\data\types\md_utils;
use nur\data\types\Metadata;
use nur\data\types\SDateType;
use nur\func;
use nur\str;
class FsvSchema {
const COLUMN_SCHEMA = [
"name" => ["string", null, "nom du champ", "required" => true],
"size" => ["int", null, "taille du champ", "required" => true],
"type" => ["?string", null, "type du champ: string, number, date"],
"precision" => ["?int", null, "précision pour un champ de type number"],
"format" => ["?string", null, "format à appliquer avant écriture"],
# ces champs sont en principe calculés automatiquement:
"position" => ["?int", null, "position du premier caractère du champ (commence à 1)"],
"index" => ["?int", null, "index du premier caractère du champ (commence à 1)"],
];
/** @var Metadata */
private static $column_md;
private static function column_md(): Metadata {
return md_utils::ensure_md(self::$column_md, self::COLUMN_SCHEMA);
}
protected function INPUT_ENCODING(): ?string {
return static::INPUT_ENCODING;
} const INPUT_ENCODING = "latin1";
protected function DATA_ENCODING(): ?string {
return static::DATA_ENCODING;
} const DATA_ENCODING = "utf-8";
protected function OUTPUT_ENCODING(): ?string {
return static::OUTPUT_ENCODING;
} const OUTPUT_ENCODING = "latin1//TRANSLIT//IGNORE";
protected function SCHEMA(): ?array {
return self::SCHEMA;
} const SCHEMA = null;
function __construct(?array $schema=null) {
$this->setSchema($schema);
$this->setInputEncoding(null);
$this->setDataEncoding(null);
$this->setOutputEncoding(null);
}
protected $inputEncoding;
function setInputEncoding(?string $inputEncoding): self {
if ($inputEncoding === null) $inputEncoding = $this->INPUT_ENCODING();
$this->inputEncoding = $inputEncoding;
return $this;
}
protected $dataEncoding;
function setDataEncoding(?string $dataEncoding): self {
if ($dataEncoding === null) $dataEncoding = $this->DATA_ENCODING();
$this->dataEncoding = $dataEncoding;
return $this;
}
protected $outputEncoding;
function setOutputEncoding(?string $outputEncoding): self {
if ($outputEncoding === null) $outputEncoding = $this->OUTPUT_ENCODING();
$this->outputEncoding = $outputEncoding;
return $this;
}
protected function iconvInput(array $row): array {
$inputEncoding = $this->inputEncoding;
$dataEncoding = $this->dataEncoding;
if ($inputEncoding !== null && $dataEncoding !== null) {
foreach ($row as &$col) {
if (is_string($col)) $col = iconv($inputEncoding, $dataEncoding, $col);
}; unset($col);
}
return $row;
}
protected function iconvOutput(string $line): string {
$dataEncoding = $this->dataEncoding;
$outputEncoding = $this->outputEncoding;
if ($outputEncoding !== null && $dataEncoding !== null) {
$line = iconv($dataEncoding, $outputEncoding, $line);
}
return $line;
}
/** @var bool */
protected $shouldMapEmpty = false;
protected $mapEmpty = null;
function setMapEmpty($mapEmpty=null): void {
$this->shouldMapEmpty = true;
$this->mapEmpty = $mapEmpty;
}
/**
* @var bool faut-il générer les en-têtes et les données sous forme de
* tableaux séquentiels?
*/
protected $outputSeq;
/** @var bool faut-il afficher les en-têtes en sortie? */
protected $outputKeys;
function setOutputSeq(bool $outputSeq=true): void {
$this->outputSeq = $outputSeq;
$this->outputKeys = true;
}
/** @var array */
protected $schema;
function getSchema(): array {
return $this->schema;
}
/** @var array liste des noms des champs */
protected $columns;
function getColumns(): array {
return $this->columns;
}
function setSchema(?array $schema): self {
if ($schema === null) $schema = $this->SCHEMA();
if ($schema === null) return $this;
$column_md = self::column_md();
$index = 0;
$position = 1;
$columns = [];
foreach ($schema as $key => &$sfield) {
if ($key === $index) {
# séquentiel
$index++;
$column_md->ensureSchema($sfield);
} else {
# associatif
$column_md->ensureSchema($sfield, $key);
}
A::replace_n($sfield, "type", "string");
A::replace_n($sfield, "precision", 0);
A::replace_n($sfield, "position", $position);
A::replace_n($sfield, "index", $position - 1);
$this->validateSfield($sfield);
$position += $sfield["size"];
$columns[] = $sfield["name"];
}; unset($sfield);
$this->schema = $schema;
$this->columns = $columns;
return $this;
}
protected function validateSfield($sfield): void {
$type = $sfield["type"];
switch ($type) {
case "string":
case "number":
break;
case "date":
$size = $sfield["size"];
#XXX tenir compte du format
if ($size != 6 && $size != 8) {
throw new ValueException("date type require size=6 or size=8");
}
break;
default:
throw ValueException::invalid_value($type, "type");
}
}
function checkSchema(bool $throw=true): bool {
if ($this->schema !== null) return true;
elseif ($throw) throw new ValueException("a schema is required");
else return false;
}
#############################################################################
function _outputKeys(bool $reset=true): bool {
$outputKeys = $this->outputSeq && $this->outputKeys;
if ($reset) $this->outputKeys = false;
return $outputKeys;
}
function _getKeys(): array {
return $this->columns;
}
function parseRow(string $line, bool $iconv=true): array {
$this->checkSchema();
$outputSeq = $this->outputSeq;
$row = [];
$length = strlen($line);
foreach ($this->schema as $sfield) {
[
"name" => $name,
"index" => $index,
"size" => $size,
"type" => $type,
] = $sfield;
if ($index >= $length) {
if ($outputSeq) $row[] = false;
else $row[$name] = false;
} else {
$value = substr($line, $index, $size);
$blank = str_pad("", $size);
switch ($type) {
case "string":
$value = $this->parseString($value, $sfield);
break;
case "number":
if ($value === $blank) $value = false;
else $value = $this->parseNumber($value, $sfield);
break;
case "date":
if ($value === $blank) $value = false;
else $value = $this->parseDate($value, $sfield);
break;
default:
throw IllegalAccessException::unexpected_state();
}
if ($value === "" && $this->shouldMapEmpty) $value = $this->mapEmpty;
if ($outputSeq) $row[] = $value;
else $row[$name] = $value;
}
}
if ($iconv) $row = $this->iconvInput($row);
return $row;
}
protected function parseString(string $value, array $sfield): string {
return rtrim($value);
}
protected function parseNumber(string $value, array $sfield) {
# tester si c'est un nombre. si ce n'est pas un nombre, le laisser en l'état
if (preg_match('/^\d+$/', $value)) {
$precision = $sfield["precision"];
if ($precision == 0) {
$value = intval($value);
} else {
$value = doubleval($value) / (10**$precision);
}
}
return $value;
}
protected function parseDate(string $value, array $sfield) {
# tester si c'est un nombre. si ce n'est pas un nombre, le laisser en l'état
if (preg_match('/^\d+$/', $value)) {
$size = $sfield["size"];
$dd = substr($value, 0, 2);
$mm = substr($value, 2, 2);
if ($size == 8) {
$yyyy = substr($value, 4, 4);
} elseif ($size == 6) {
$yy = substr($value, 4, 2);
$yyyy = Date::fix_any_year($yy);
} else {
throw IllegalAccessException::unexpected_state();
}
return "$dd/$mm/$yyyy";
} else {
return $value;
}
}
#############################################################################
protected static function ensure_size(string $value, int $size): string {
$length = mb_strlen($value);
if ($length < $size) {
while ($length < $size) {
$value .= " ";
$length = mb_strlen($value);
}
} elseif ($length > $size) {
$value = mb_substr($value, 0, $size);
}
return $value;
}
protected static function invalid_size(string $value, int $actual_size, array $sfield): ValueException {
["name" => $name, "size" => $size, "format" => $format] = $sfield;
return new ValueException("field=$name with format $format, value=|$value|, expected size=$size, actual size=$actual_size");
}
function formatRow(array $row, bool $iconv=true): string {
$this->checkSchema();
$line = [];
foreach ($this->schema as $sfield) {
[
"name" => $name,
"size" => $size,
"type" => $type,
] = $sfield;
$value = A::get($row, $name, "");
if ($value === false || $value === null) {
$value = self::ensure_size("", $size);
} else {
switch ($type) {
case "string":
$value = $this->formatString($value, $sfield);
break;
case "number":
$value = $this->formatNumber($value, $sfield);
break;
case "date":
$value = $this->formatDate($value, $sfield);
break;
default:
throw IllegalAccessException::unexpected_state();
}
}
$line[] = $value;
}
$line = implode("", $line);
if ($iconv) $line = $this->iconvOutput($line);
return $line;
}
function formatString($value, $sfield): string {
["size" => $size, "format" => $format] = $sfield;
$value = strval($value);
if ($format !== null) {
$func = [$this, "stringFormat_$format"];
$value = func::call($func, $value, $sfield);
}
return self::ensure_size($value, $size);
}
function stringFormat_upper(string $value): string {
return mb_strtoupper($value);
}
function stringFormat_lower(string $value): string {
return mb_strtolower($value);
}
function formatNumber($value, $sfield): string {
["size" => $size, "precision" => $precision, "format" => $format] = $sfield;
if (is_string($value) && !preg_match('/^\d+$/', $value)) {
# si ce n'est pas un nombre, padder avec des zéros
# si c'est une chaine vide, laisser en l'état
if ($value !== "") $value = str::pad0($value, $size);
} elseif ($format !== null) {
$value = sprintf($format, $value);
$actualSize = strlen($value);
if ($actualSize != $size) {
throw self::invalid_size($value, $actualSize, $sfield);
}
} elseif ($precision == 0) {
$value = sprintf("%0${size}u", $value);
} else {
$size++;
$value = sprintf("%0${size}.${precision}F", $value);
$value = str_replace(".", "", $value);
}
return self::ensure_size($value, $size);
}
function formatDate($value, $sfield): string {
["size" => $size, "format" => $format] = $sfield;
try {
$date = SDateType::to_date($value);
if ($date === null) {
# chaine vide
return self::ensure_size("", $size);
}
} catch (ValueException $e) {
# si ce n'est pas une date, padder avec des zéros
$value = str::pad0($value, $size);
return self::ensure_size($value, $size);
}
$date = new Date($value);
if ($format !== null) {
$value = $date->format($format);
$actualSize = strlen($value);
if ($actualSize != $size) {
throw self::invalid_size($value, $actualSize, $sfield);
}
} elseif ($size == 6) {
$value = $date->format("dmy");
} elseif ($size == 8) {
$value = $date->format("dmY");
} else {
throw IllegalAccessException::unexpected_state();
}
return $value;
}
}