nur-sery/src/ext/spreadsheet/wsutils.php

81 lines
2.6 KiB
PHP
Raw Normal View History

2024-06-10 18:34:19 +04:00
<?php
namespace nur\sery\ext\spreadsheet;
2024-06-11 17:33:13 +04:00
use nur\sery\ValueException;
2024-06-10 18:34:19 +04:00
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
2024-06-11 17:33:13 +04:00
use PhpOffice\PhpSpreadsheet\Spreadsheet;
2024-06-10 18:34:19 +04:00
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
class wsutils {
2024-06-11 17:33:13 +04:00
static function get_ws(?string $wsname, Spreadsheet $ss, bool $create=false): ?Worksheet {
if ($wsname == null) {
$ws = $ss->getActiveSheet();
} elseif (is_numeric($wsname)) {
$sheetCount = $ss->getSheetCount();
if ($wsname < 1 || $wsname > $sheetCount) {
throw ValueException::invalid_value($wsname, "sheet index");
}
$ws = $ss->getSheet($wsname - 1);
} else {
$ws = $ss->getSheetByName($wsname);
if ($ws === null) {
if ($create) $ws = $ss->createSheet()->setTitle($wsname);
else throw ValueException::invalid_value($wsname, "sheet name");
}
}
return $ws;
}
2024-06-10 18:34:19 +04:00
static function get_highest_coords(Worksheet $ws): array {
$highestColumnA = $ws->getHighestColumn();
$highestCol = Coordinate::columnIndexFromString($highestColumnA);
2024-06-10 20:45:38 +04:00
$highestRow = $ws->getHighestRow();
return [$highestCol, $highestRow];
2024-06-10 18:34:19 +04:00
}
/**
* @var int nombre de colonnes/lignes au bout desquels on arrête de chercher
* si on n'a trouvé que des cellules vides.
*
* c'est nécessaire à cause de certains fichiers provenant d'Excel que j'ai
* reçus qui ont jusqu'à 10000 colonne vides et/ou 1048576 lignes vides. un
* algorithme "bête" perd énormément de temps à chercher dans le vide, donnant
* l'impression que le processus a planté.
*/
const MAX_EMPTY_THRESHOLD = 150;
static function compute_max_coords(Worksheet $ws): array {
2024-06-10 20:45:38 +04:00
[$highestCol, $highestRow] = self::get_highest_coords($ws);
2024-06-10 18:34:19 +04:00
$maxCol = 1;
2024-06-10 20:45:38 +04:00
$maxRow = 1;
2024-06-10 18:34:19 +04:00
$maxEmptyRows = self::MAX_EMPTY_THRESHOLD;
for ($row = 1; $row <= $highestRow; $row++) {
$emptyRow = true;
$maxEmptyCols = self::MAX_EMPTY_THRESHOLD;
for ($col = 1; $col <= $highestCol; $col++) {
$value = null;
2024-06-10 20:45:38 +04:00
if ($ws->cellExistsByColumnAndRow($col, $row)) {
$value = $ws->getCellByColumnAndRow($col, $row)->getValue();
2024-06-10 18:34:19 +04:00
}
if ($value === null) {
$maxEmptyCols--;
if ($maxEmptyCols == 0) break;
} else {
$maxEmptyCols = self::MAX_EMPTY_THRESHOLD;
if ($row > $maxRow) $maxRow = $row;
if ($col > $maxCol) $maxCol = $col;
$emptyRow = false;
}
}
if ($emptyRow) {
$maxEmptyRows--;
if ($maxEmptyRows == 0) break;
} else {
$maxEmptyRows = self::MAX_EMPTY_THRESHOLD;
}
}
2024-06-10 20:45:38 +04:00
return [$maxCol, $maxRow];
2024-06-10 18:34:19 +04:00
}
}