nur-sery/src/ext/spreadsheet/wsutils.php

60 lines
1.9 KiB
PHP
Raw Normal View History

2024-06-10 18:34:19 +04:00
<?php
namespace nur\sery\ext\spreadsheet;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
class wsutils {
static function get_highest_coords(Worksheet $ws): array {
$highestColumnA = $ws->getHighestColumn();
$highestCol = Coordinate::columnIndexFromString($highestColumnA);
2024-06-10 20:45:38 +04:00
$highestRow = $ws->getHighestRow();
return [$highestCol, $highestRow];
2024-06-10 18:34:19 +04:00
}
/**
* @var int nombre de colonnes/lignes au bout desquels on arrête de chercher
* si on n'a trouvé que des cellules vides.
*
* c'est nécessaire à cause de certains fichiers provenant d'Excel que j'ai
* reçus qui ont jusqu'à 10000 colonne vides et/ou 1048576 lignes vides. un
* algorithme "bête" perd énormément de temps à chercher dans le vide, donnant
* l'impression que le processus a planté.
*/
const MAX_EMPTY_THRESHOLD = 150;
static function compute_max_coords(Worksheet $ws): array {
2024-06-10 20:45:38 +04:00
[$highestCol, $highestRow] = self::get_highest_coords($ws);
2024-06-10 18:34:19 +04:00
$maxCol = 1;
2024-06-10 20:45:38 +04:00
$maxRow = 1;
2024-06-10 18:34:19 +04:00
$maxEmptyRows = self::MAX_EMPTY_THRESHOLD;
for ($row = 1; $row <= $highestRow; $row++) {
$emptyRow = true;
$maxEmptyCols = self::MAX_EMPTY_THRESHOLD;
for ($col = 1; $col <= $highestCol; $col++) {
$value = null;
2024-06-10 20:45:38 +04:00
if ($ws->cellExistsByColumnAndRow($col, $row)) {
$value = $ws->getCellByColumnAndRow($col, $row)->getValue();
2024-06-10 18:34:19 +04:00
}
if ($value === null) {
$maxEmptyCols--;
if ($maxEmptyCols == 0) break;
} else {
$maxEmptyCols = self::MAX_EMPTY_THRESHOLD;
if ($row > $maxRow) $maxRow = $row;
if ($col > $maxCol) $maxCol = $col;
$emptyRow = false;
}
}
if ($emptyRow) {
$maxEmptyRows--;
if ($maxEmptyRows == 0) break;
} else {
$maxEmptyRows = self::MAX_EMPTY_THRESHOLD;
}
}
2024-06-10 20:45:38 +04:00
return [$maxCol, $maxRow];
2024-06-10 18:34:19 +04:00
}
}