<?php namespace nur\mapper\fsv; use nur\A; use nur\b\date\Date; use nur\b\IllegalAccessException; use nur\b\ValueException; use nur\data\types\md_utils; use nur\data\types\Metadata; use nur\func; class FsvSchema { const COLUMN_SCHEMA = [ "name" => ["string", null, "nom du champ", "required" => true], "size" => ["int", null, "taille du champ", "required" => true], "type" => ["?string", null, "type du champ: string, number, date"], "precision" => ["?int", null, "précision pour un champ de type number"], "format" => ["?string", null, "format à appliquer avant écriture"], # ces champs sont en principe calculés automatiquement: "position" => ["?int", null, "position du premier caractère du champ (commence à 1)"], "index" => ["?int", null, "index du premier caractère du champ (commence à 1)"], ]; /** @var Metadata */ private static $column_md; private static function column_md(): Metadata { return md_utils::ensure_md(self::$column_md, self::COLUMN_SCHEMA); } protected function INPUT_ENCODING(): ?string { return static::INPUT_ENCODING; } const INPUT_ENCODING = "latin1"; protected function DATA_ENCODING(): ?string { return static::DATA_ENCODING; } const DATA_ENCODING = "utf-8"; protected function OUTPUT_ENCODING(): ?string { return static::OUTPUT_ENCODING; } const OUTPUT_ENCODING = "latin1//TRANSLIT//IGNORE"; protected function FSV_SCHEMA(): ?array { return self::FSV_SCHEMA; } const FSV_SCHEMA = null; function __construct(?array $fsvSchema=null) { $this->setFsvSchema($fsvSchema); $this->setInputEncoding(null); $this->setDataEncoding(null); $this->setOutputEncoding(null); } protected $inputEncoding; function setInputEncoding(?string $inputEncoding): self { if ($inputEncoding === null) $inputEncoding = $this->INPUT_ENCODING(); $this->inputEncoding = $inputEncoding; return $this; } protected $dataEncoding; function setDataEncoding(?string $dataEncoding): self { if ($dataEncoding === null) $dataEncoding = $this->DATA_ENCODING(); $this->dataEncoding = $dataEncoding; return $this; } protected $outputEncoding; function setOutputEncoding(?string $outputEncoding): self { if ($outputEncoding === null) $outputEncoding = $this->OUTPUT_ENCODING(); $this->outputEncoding = $outputEncoding; return $this; } protected function iconvInput(array $row): array { $inputEncoding = $this->inputEncoding; $dataEncoding = $this->dataEncoding; if ($inputEncoding !== null && $dataEncoding !== null) { foreach ($row as &$col) { if (is_string($col)) $col = iconv($inputEncoding, $dataEncoding, $col); }; unset($col); } return $row; } protected function iconvOutput(string $line): string { $dataEncoding = $this->dataEncoding; $outputEncoding = $this->outputEncoding; if ($outputEncoding !== null && $dataEncoding !== null) { $line = iconv($dataEncoding, $outputEncoding, $line); } return $line; } /** @var bool */ protected $shouldMapEmpty = false; protected $mapEmpty = null; function setMapEmpty($mapEmpty=null): void { $this->shouldMapEmpty = true; $this->mapEmpty = $mapEmpty; } /** * @var bool faut-il générer les en-têtes et les données sous forme de * tableaux séquentiels? */ protected $outputSeq; /** @var bool faut-il afficher les en-têtes en sortie? */ protected $outputKeys; function setOutputSeq(bool $outputSeq=true): void { $this->outputSeq = $outputSeq; $this->outputKeys = true; } /** @var array */ protected $fsvSchema; function getFsvSchema(): array { return $this->fsvSchema; } /** @var array liste des noms des champs */ protected $fsvColumns; function getFsvColumns(): array { return $this->fsvColumns; } function setFsvSchema(?array $fsvSchema): self { if ($fsvSchema === null) $fsvSchema = $this->FSV_SCHEMA(); if ($fsvSchema === null) return $this; $column_md = self::column_md(); $index = 0; $position = 1; $columns = []; foreach ($fsvSchema as $key => &$sfield) { if ($key === $index) { # séquentiel $index++; $column_md->ensureSchema($sfield); } else { # associatif $column_md->ensureSchema($sfield, $key); } A::replace_n($sfield, "type", "string"); A::replace_n($sfield, "precision", 0); A::replace_n($sfield, "position", $position); A::replace_n($sfield, "index", $position - 1); $this->validateSfield($sfield); $position += $sfield["size"]; $columns[] = $sfield["name"]; }; unset($sfield); $this->fsvSchema = $fsvSchema; $this->fsvColumns = $columns; return $this; } protected function validateSfield($sfield): void { $type = $sfield["type"]; switch ($type) { case "string": case "number": break; case "date": $size = $sfield["size"]; #XXX tenir compte du format if ($size != 6 && $size != 8) { throw new ValueException("date type require size=6 or size=8"); } break; default: throw ValueException::invalid_value($type, "type"); } } function checkFsvSchema(bool $throw=true): bool { if ($this->fsvSchema !== null) return true; elseif ($throw) throw new ValueException("a schema is required"); else return false; } ############################################################################# function _outputKeys(bool $reset=true): bool { $outputKeys = $this->outputSeq && $this->outputKeys; if ($reset) $this->outputKeys = false; return $outputKeys; } function _getKeys(): array { return $this->fsvColumns; } function parseRow(string $line, bool $iconv=true): array { $this->checkFsvSchema(); $outputSeq = $this->outputSeq; $row = []; $length = strlen($line); foreach ($this->fsvSchema as $sfield) { [ "name" => $name, "index" => $index, "size" => $size, "type" => $type, ] = $sfield; if ($index >= $length) { if ($outputSeq) $row[] = false; else $row[$name] = false; } else { $value = substr($line, $index, $size); $blank = str_pad("", $size); switch ($type) { case "string": $value = $this->parseString($value, $sfield); break; case "number": if ($value === $blank) $value = false; else $value = $this->parseNumber($value, $sfield); break; case "date": if ($value === $blank) $value = false; else $value = $this->parseDate($value, $sfield); break; default: throw IllegalAccessException::unexpected_state(); } if ($value === "" && $this->shouldMapEmpty) $value = $this->mapEmpty; if ($outputSeq) $row[] = $value; else $row[$name] = $value; } } if ($iconv) $row = $this->iconvInput($row); return $row; } protected function parseString(string $value, array $sfield): string { return rtrim($value); } protected function parseNumber(string $value, array $sfield) { $precision = $sfield["precision"]; if ($precision == 0) { $value = intval($value); } else { $value = doubleval($value) / (10**$precision); } return $value; } protected function parseDate(string $value, array $sfield) { $size = $sfield["size"]; $dd = substr($value, 0, 2); $mm = substr($value, 2, 2); if ($size == 8) { $yyyy = substr($value, 4, 4); } elseif ($size == 6) { $yy = substr($value, 4, 2); $yyyy = Date::fix_any_year($yy); } else { throw IllegalAccessException::unexpected_state(); } return "$dd/$mm/$yyyy"; } ############################################################################# protected static function ensure_size(string $value, int $size): string { $length = mb_strlen($value); if ($length < $size) { while ($length < $size) { $value .= " "; $length = mb_strlen($value); } } elseif ($length > $size) { $value = mb_substr($value, 0, $size); } return $value; } protected static function invalid_size(string $value, int $actual_size, array $sfield): ValueException { ["name" => $name, "size" => $size, "format" => $format] = $sfield; return new ValueException("field=$name with format $format, value=|$value|, expected size=$size, actual size=$actual_size"); } function formatLine(array $row, bool $iconv=true): string { $this->checkFsvSchema(); $line = []; foreach ($this->fsvSchema as $sfield) { [ "name" => $name, "size" => $size, "type" => $type, ] = $sfield; $value = A::get($row, $name, ""); if ($value === false) { $value = self::ensure_size("", $size); } else { switch ($type) { case "string": $value = $this->formatString($value, $sfield); break; case "number": $value = $this->formatNumber($value, $sfield); break; case "date": $value = $this->formatDate($value, $sfield); break; default: throw IllegalAccessException::unexpected_state(); } } $line[] = $value; } $line = implode("", $line); if ($iconv) $line = $this->iconvOutput($line); return $line; } function formatString($value, $sfield): string { ["size" => $size, "format" => $format] = $sfield; $value = strval($value); if ($format !== null) { $func = [$this, "stringFormat_$format"]; $value = func::call($func, $value, $sfield); $actualSize = mb_strlen($value); if ($actualSize != $size) { throw self::invalid_size($value, $actualSize, $sfield); } } else { $value = self::ensure_size($value, $size); } return $value; } function stringFormat_upper(string $value): string { return mb_strtoupper($value); } function stringFormat_lower(string $value): string { return mb_strtolower($value); } function formatNumber($value, $sfield): string { ["size" => $size, "precision" => $precision, "format" => $format] = $sfield; if ($format !== null) { $value = sprintf($format, $value); $actualSize = strlen($value); if ($actualSize != $size) { throw self::invalid_size($value, $actualSize, $sfield); } } elseif ($precision == 0) { $value = sprintf("%0${size}u", $value); } else { $size++; $value = sprintf("%0${size}.${precision}F", $value); $value = str_replace(".", "", $value); } return $value; } function formatDate($value, $sfield): string { $date = new Date($value); ["size" => $size, "format" => $format] = $sfield; if ($format !== null) { $value = $date->format($format); $actualSize = strlen($value); if ($actualSize != $size) { throw self::invalid_size($value, $actualSize, $sfield); } } elseif ($size == 6) { $value = $date->format("dmy"); } elseif ($size == 8) { $value = $date->format("dmY"); } else { throw IllegalAccessException::unexpected_state(); } return $value; } }