Compare commits

..

1 Commits
php82 ... php74

Author SHA1 Message Date
6e58b99069 maj deps 2025-02-18 12:17:16 +04:00
250 changed files with 9570 additions and 7748 deletions

View File

@ -4,5 +4,5 @@
RUNPHP=
# Si RUNPHP n'est pas défini, les variables suivantes peuvent être définies
DIST=d12
#REGISTRY=pubdocker.univ-reunion.fr
DIST=d11
#REGISTRY=pubdocker.univ-reunion.fr/dist

View File

@ -19,32 +19,28 @@
}
},
"replace": {
"openspout/openspout": "v4.27.0"
"openspout/openspout": "v3.7.4"
},
"require": {
"nulib/php": "^8.2-dev",
"nulib/php": "^7.4-dev",
"ext-dom": "*",
"ext-fileinfo": "*",
"ext-filter": "*",
"ext-libxml": "*",
"ext-xmlreader": "*",
"ext-zip": "*",
"php": "^8.2"
"php": "^7.4"
},
"require-dev": {
"nulib/tests": "^8.2",
"friendsofphp/php-cs-fixer": "^3.64.0",
"infection/infection": "^0.29.6",
"phpbench/phpbench": "^1.3.1",
"phpstan/phpstan": "^1.12.4",
"phpstan/phpstan-phpunit": "^1.4.0",
"phpstan/phpstan-strict-rules": "^1.6.1",
"nulib/tests": "^7.4",
"friendsofphp/php-cs-fixer": "^3.4",
"phpstan/phpstan": "^1.4",
"phpstan/phpstan-phpunit": "^1.0",
"ext-zlib": "*"
},
"autoload": {
"psr-4": {
"nulib\\": "src",
"OpenSpout\\": "upstream-4.x/src"
"OpenSpout\\": "upstream-3.x/src"
}
},
"autoload-dev": {

View File

@ -1,91 +0,0 @@
diff --git a/src/Reader/XLSX/Helper/CellValueFormatter.php b/src/Reader/XLSX/Helper/CellValueFormatter.php
index 776de0a..bc7a5c4 100644
--- a/src/Reader/XLSX/Helper/CellValueFormatter.php
+++ b/src/Reader/XLSX/Helper/CellValueFormatter.php
@@ -284,9 +284,13 @@ final class CellValueFormatter
\assert(false !== $dateObj);
if ($this->shouldFormatDates) {
- $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
- $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
+ //$styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
+ //$phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode);
+ // Toujours utiliser le format français complet
+ $phpDateFormat = "d/m/Y H:i:s";
$cellValue = $dateObj->format($phpDateFormat);
+ // Enlever la composante heure si elle n'existe pas
+ $cellValue = preg_replace('/ 00:00:00$/', "", $cellValue);
} else {
$cellValue = $dateObj;
}
diff --git a/src/Reader/XLSX/Options.php b/src/Reader/XLSX/Options.php
index 636d2a4..e8609b5 100644
--- a/src/Reader/XLSX/Options.php
+++ b/src/Reader/XLSX/Options.php
@@ -10,7 +10,7 @@ final class Options
{
use TempFolderOptionTrait;
- public bool $SHOULD_FORMAT_DATES = false;
+ public bool $SHOULD_FORMAT_DATES = true;
public bool $SHOULD_PRESERVE_EMPTY_ROWS = false;
public bool $SHOULD_USE_1904_DATES = false;
public bool $SHOULD_LOAD_MERGE_CELLS = false;
diff --git a/src/Writer/AbstractWriter.php b/src/Writer/AbstractWriter.php
index eef34fb..a22e90f 100644
--- a/src/Writer/AbstractWriter.php
+++ b/src/Writer/AbstractWriter.php
@@ -13,6 +13,12 @@ abstract class AbstractWriter implements WriterInterface
/** @var resource Pointer to the file/stream we will write to */
protected $filePointer;
+ /**
+ * @var bool faut-il garder ouvert le flux quand {@link self::close()} est
+ * appelé?
+ */
+ protected bool $dontCloseFilePointer = false;
+
/** @var string document creator */
protected string $creator = 'OpenSpout';
@@ -20,7 +26,7 @@ abstract class AbstractWriter implements WriterInterface
protected static string $headerContentType;
/** @var string Path to the output file */
- private string $outputFilePath;
+ private ?string $outputFilePath;
/** @var bool Indicates whether the writer has been opened or not */
private bool $isWriterOpened = false;
@@ -51,6 +57,20 @@ abstract class AbstractWriter implements WriterInterface
$this->isWriterOpened = true;
}
+ final public function writeToStream($filePointer): void
+ {
+ if (!is_resource($filePointer)) {
+ throw new IOException("filePointer is not a resource");
+ }
+ $this->outputFilePath = null;
+
+ $this->filePointer = $filePointer;
+ $this->dontCloseFilePointer = true;
+
+ $this->openWriter();
+ $this->isWriterOpened = true;
+ }
+
/**
* @codeCoverageIgnore
*
@@ -140,7 +160,9 @@ abstract class AbstractWriter implements WriterInterface
$this->closeWriter();
- fclose($this->filePointer);
+ if (!$this->dontCloseFilePointer) {
+ fclose($this->filePointer);
+ }
$this->isWriterOpened = false;
}

View File

@ -14,22 +14,14 @@ use nulib\ref\ext\spout\ref_builder_ods;
use nulib\ref\ext\spout\ref_builder_xlsx;
use nulib\str;
use nulib\web\http;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Style\Border;
use OpenSpout\Common\Entity\Style\BorderPart;
use OpenSpout\Common\Entity\Cell\DateTimeCell;
use OpenSpout\Common\Entity\Cell\EmptyCell;
use OpenSpout\Common\Entity\Cell\NumericCell;
use OpenSpout\Common\Entity\Cell\StringCell;
use OpenSpout\Common\Entity\Row;
use OpenSpout\Common\Entity\Style\Color;
use OpenSpout\Common\Entity\Style\Style;
use OpenSpout\Writer\AbstractWriterMultiSheets;
use OpenSpout\Writer\Common\AbstractOptions;
use OpenSpout\Writer\ODS\Options as ODSOptions;
use OpenSpout\Writer\ODS\Writer as ODSWriter;
use OpenSpout\Common\Helper\CellTypeHelper;
use OpenSpout\Writer\Common\Creator\WriterEntityFactory;
use OpenSpout\Writer\WriterMultiSheetsAbstract;
use OpenSpout\Writer\XLSX\Entity\SheetView;
use OpenSpout\Writer\XLSX\Options as XLSXOptions;
use OpenSpout\Writer\XLSX\Writer as XLSXWriter;
class SpoutBuilder extends AbstractBuilder {
use TAbstractBuilder;
@ -50,16 +42,16 @@ class SpoutBuilder extends AbstractBuilder {
return $object;
}
protected static function add_border_part(?array &$parts, string $name, ?array $params): void {
protected static function add_border_part(?Border &$border, string $name, ?array $params): void {
if ($params === null) return;
if ($border === null) $border = new Border();
$part = new BorderPart($name);
if (($color = $params["color"] ?? null) !== null) {
$color = cl::get(ref_builder::COLORS, $color, $color);
} else {
$color = Color::BLACK;
$part->setColor(cl::get(ref_builder::COLORS, $color, $color));
}
if (($width = $params["width"] ?? null) === null) $width = Border::WIDTH_MEDIUM;
if (($style = $params["style"] ?? null) === null) $style = Border::STYLE_SOLID;
$parts[] = new BorderPart($name, $color, $width, $style);
if (($width = $params["width"] ?? null) !== null) $part->setWidth($width);
if (($style = $params["style"] ?? null) !== null) $part->setStyle($style);
$border->addPart($part);
}
protected static function set_defaults(?array &$params, string $key, array $defaults): void {
@ -90,9 +82,8 @@ class SpoutBuilder extends AbstractBuilder {
$style->setBackgroundColor(cl::get(ref_builder::COLORS, $color, $color));
}
if (($align = $cell["align"] ?? null) !== null) $style->setCellAlignment($align);
if (($align = $cell["valign"] ?? null) !== null) $style->setCellVerticalAlignment($align);
//if (($align = $cell["valign"] ?? null) !== null) $style->setCellVerticalAlignment($align);
if (($wrap = $cell["wrap"] ?? null) !== null) $style->setShouldWrapText($wrap);
if (($rotation = $cell["rotation"] ?? null) !== null) $style->setTextRotation($rotation);
if (($format = $cell["format"] ?? null) !== null) $style->setFormat($format);
if (($border = $cell["border"] ?? null) !== null) {
if (is_string($border)) {
@ -123,14 +114,18 @@ class SpoutBuilder extends AbstractBuilder {
if ($colorAll !== null) $part["color"] = $colorAll;
}; unset($part);
}
$parts = null;
self::add_border_part($parts, "top", $border["top"] ?? null);
self::add_border_part($parts, "right", $border["right"] ?? null);
self::add_border_part($parts, "bottom", $border["bottom"] ?? null);
self::add_border_part($parts, "left", $border["left"] ?? null);
if ($parts !== null) $style->setBorder(new Border(...$parts));
$top = $border["top"] ?? null;
$right = $border["right"] ?? null;
$bottom = $border["bottom"] ?? null;
$left = $border["left"] ?? null;
$border = null;
self::add_border_part($border, "top", $top);
self::add_border_part($border, "right", $right);
self::add_border_part($border, "bottom", $bottom);
self::add_border_part($border, "left", $left);
if ($border !== null) $style->setBorder($border);
}
if (($autofit = $cell["autofit"] ?? null) !== null) $style->setShouldShrinkToFit($autofit);
return $style;
}
@ -178,31 +173,26 @@ class SpoutBuilder extends AbstractBuilder {
case "ods":
case self::SS_TYPE_ODS:
$ssType = self::SS_TYPE_ODS;
$ssOptions = new ODSOptions();
$ssWriterClass = ODSWriter::class;
$refParams = ref_builder_ods::PARAMS_SPOUT;
$ssWriter = WriterEntityFactory::createODSWriter();
self::apply_params($ssWriter, $spoutParams, ref_builder_ods::PARAMS_SPOUT);
break;
case "xlsx":
case self::SS_TYPE_XLSX:
default:
$ssType = self::SS_TYPE_XLSX;
$ssOptions = new XLSXOptions();
$ssWriterClass = XLSXWriter::class;
$refParams = ref_builder_xlsx::PARAMS_SPOUT;
$ssWriter = WriterEntityFactory::createXLSXWriter();
self::apply_params($ssWriter, $spoutParams, ref_builder_xlsx::PARAMS_SPOUT);
break;
}
$defaultColumnWidth = $spoutParams["default_column_width"] ?? null;
if ($defaultColumnWidth !== null) $ssOptions->DEFAULT_COLUMN_WIDTH = $defaultColumnWidth;
if ($defaultColumnWidth !== null) $ssWriter->setDefaultColumnWidth($defaultColumnWidth);
$defaultRowHeight = $spoutParams["default_row_height"] ?? null;
if ($defaultRowHeight !== null) $ssOptions->DEFAULT_ROW_HEIGHT = $defaultRowHeight;
if ($defaultRowHeight !== null) $ssWriter->setDefaultRowHeight($defaultRowHeight);
$defaultRowStyle = $spoutParams["default_row_style"] ?? null;
if ($defaultRowStyle !== null) $ssOptions->DEFAULT_ROW_STYLE = $defaultRowStyle;
self::apply_params($ssOptions, $spoutParams, $refParams);
$ssWriter = new $ssWriterClass($ssOptions);
if ($defaultRowStyle !== null) $ssWriter->setDefaultRowStyle($defaultRowStyle);
$ssWriter->writeToStream($this->getResource());
$this->ssType = $ssType;
$this->ssOptions = $ssOptions;
$this->ssWriter = $ssWriter;
$this->spoutParams = $spoutParams;
$this->typeNumeric = boolval($params["type_numeric"] ?? static::TYPE_NUMERIC);
@ -223,9 +213,7 @@ class SpoutBuilder extends AbstractBuilder {
/** @var int type de fichier généré */
protected int $ssType;
protected AbstractOptions $ssOptions;
protected AbstractWriterMultiSheets $ssWriter;
protected WriterMultiSheetsAbstract $ssWriter;
protected ?array $spoutParams;
@ -350,8 +338,7 @@ class SpoutBuilder extends AbstractBuilder {
* Les lignes sont indexées sur 1
*/
function mergeCells(int $topLeftCol, int $topLeftRow, int $bottomRightCol, int $bottomRightRow): void {
$sheet = $this->ssWriter->getCurrentSheet();
$this->ssOptions->mergeCells($topLeftCol, $topLeftRow, $bottomRightCol, $bottomRightRow, $sheet->getIndex());
$this->ssWriter->mergeCells([$topLeftCol, $topLeftRow], [$bottomRightCol, $bottomRightRow]);
}
protected function isNumeric($value): bool {
@ -369,6 +356,10 @@ class SpoutBuilder extends AbstractBuilder {
$style ??= new Style();
$style->setFormat(self::DATETIME_FORMAT);
return true;
} elseif (CellTypeHelper::isDateTimeOrDateInterval($value)) {
$style ??= new Style();
$style->setFormat(self::DATE_FORMAT);
return true;
}
if (!is_string($value) || !$this->typeDate) return false;
if (DateTime::isa_datetime($value, true)) {
@ -410,14 +401,16 @@ class SpoutBuilder extends AbstractBuilder {
$style = $colStyles[$key] ?? null;
self::ensure_style($style);
if ($col === null || $col === "") {
$cell = new EmptyCell(null, $style);
$type = Cell::TYPE_EMPTY;
} elseif ($this->isNumeric($col)) {
$cell = new NumericCell($col, $style);
$type = Cell::TYPE_NUMERIC;
} elseif ($this->isDate($col, $style)) {
$cell = new DateTimeCell($col, $style);
$type = Cell::TYPE_DATE;
} else {
$cell = new StringCell($col, $style);
$type = Cell::TYPE_STRING;
}
$cell = WriterEntityFactory::createCell($col, $style);
$cell->setType($type);
$cells[] = $cell;
}
@ -428,7 +421,7 @@ class SpoutBuilder extends AbstractBuilder {
}
$rowStyle ??= $oddStyle;
self::ensure_style($rowStyle);
$row = new Row($cells, $rowStyle);
$row = WriterEntityFactory::createRow($cells, $rowStyle);
self::apply_params($row, $rowParams, ref_builder::ROW_PARAMS);
$mergeCells = $rowParams["merge_cells"] ?? null;

View File

@ -82,7 +82,6 @@ class ref_builder {
"align" => "string",
"valign" => "string",
"wrap" => "bool",
"rotation" => "int",
"format" => "string",
"border" => [
"top" => ["color" => "string", "width" => "string", "style" => "string"],
@ -90,6 +89,5 @@ class ref_builder {
"bottom" => ["color" => "string", "width" => "string", "style" => "string"],
"left" => ["color" => "string", "width" => "string", "style" => "string"],
],
"autofit" => "bool",
];
}

View File

@ -1,43 +1,16 @@
<?php
namespace nulib\ref\ext\spout;
use OpenSpout\Writer\XLSX\Options\HeaderFooter;
use OpenSpout\Writer\XLSX\Options\PageMargin;
use OpenSpout\Writer\XLSX\Options\PageSetup;
class ref_builder_xlsx extends ref_builder {
const PARAMS_SPOUT = [
...parent::PARAMS_SPOUT,
"->setPageSetup" => [
PageSetup::class,
"page_orientation" => "string",
"page_size" => "string",
"fit_to_height" => "bool",
"fit_to_width" => "bool",
"page_order" => "string",
],
"->setPageMargin" => [
PageMargin::class,
"top" => "float",
"right" => "float",
"bottom" => "float",
"left" => "float",
"header" => "float",
"footer" => "float",
],
"->setHeaderFooter" => [
HeaderFooter::class,
"odd_header" => "string",
"odd_footer" => "string",
"even_header" => "string",
"even_footer" => "string",
"different_odd_even" => "bool",
],
];
const PARAMS_SHEET = [
...parent::PARAMS_SHEET,
"view" => self::PARAMS_SHEET_VIEW,
# copie de parent::SHEET
"->setName" => ["string"],
"->setIsVisible" => ["bool"],
"header_style" => self::STYLE,
"odd_style" => self::STYLE,
"even_style" => self::STYLE,
"different_odd_even" => "bool",
];
const PARAMS_SHEET_VIEW = [

54
upstream-3.x/README.md Normal file
View File

@ -0,0 +1,54 @@
# OpenSpout
[![Latest Stable Version](https://poser.pugx.org/openspout/openspout/v/stable)](https://packagist.org/packages/openspout/openspout)
[![Build Status](https://github.com/openspout/openspout/actions/workflows/ci.yml/badge.svg)](https://github.com/openspout/openspout/actions/workflows/ci.yml)
[![Code Coverage](https://codecov.io/gh/openspout/openspout/coverage.svg?branch=main)](https://codecov.io/gh/openspout/openspout?branch=main)
[![Total Downloads](https://poser.pugx.org/openspout/openspout/downloads)](https://packagist.org/packages/openspout/openspout)
OpenSpout is a community driven fork of `box/spout`, a PHP library to read and write spreadsheet files (CSV, XLSX and ODS), in a fast and scalable way.
Unlike other file readers or writers, it is capable of processing very large files, while keeping the memory usage really low (less than 3MB).
## Documentation
Documentation can be found at [https://openspout.readthedocs.io/en/latest/](https://openspout.readthedocs.io/en/latest/).
## Requirements
* PHP version 7.3 or higher
* PHP extension `php_zip` enabled
* PHP extension `php_xmlreader` enabled
## Upgrade from `box/spout`
1. Replace `box/spout` with `openspout/openspout` in your `composer.json`
2. Replace `Box\Spout` with `OpenSpout` in your code
## Upgrade guide
Version 3 introduced new functionality but also some breaking changes. If you want to upgrade your Spout codebase from version 2 please consult the [Upgrade guide](UPGRADE-3.0.md).
## Running tests
The `main` branch includes unit, functional and performance tests.
If you just want to check that everything is working as expected, executing the unit and functional tests is enough.
* `phpunit` - runs unit and functional tests
* `phpunit --group perf-tests` - only runs the performance tests
For information, the performance tests take about 10 minutes to run (processing 1 million rows files is not a quick thing).
> Performance tests status: [![Build Status](https://travis-ci.org/box/spout.svg?branch=perf-tests)](https://travis-ci.org/box/spout)
## Copyright and License
This is a fork of Box's Spout library: https://github.com/box/spout
Code until and directly descending from commit [`cc42c1d`](https://github.com/openspout/openspout/commit/cc42c1d29fc5d29f07caeace99bd29dbb6d7c2f8)
is copyright of _Box, Inc._ and licensed under the Apache License, Version 2.0:
https://github.com/openspout/openspout/blob/cc42c1d29fc5d29f07caeace99bd29dbb6d7c2f8/LICENSE
Code created, edited and released after the commit mentioned above
is copyright of _openspout_ Github organization and licensed under MIT License.
https://github.com/openspout/openspout/blob/main/LICENSE

View File

@ -1,103 +1,33 @@
# Upgrade guide
Upgrading from 2.x to 3.0
=========================
## Upgrading from 3.x to 4.0
Beginning with v4, only actively supported [PHP version](https://www.php.net/supported-versions.php) will be supported.
Removing support for EOLed PHP versions as well adding support for new PHP versions will be included in MINOR releases.
### Most notable changes
1. OpenSpout is now fully typed
2. Classes and interfaces not consumed by the user are now marked as `@internal`
3. Classes used by the user are all `final`
### Reader & Writer objects
Both readers and writers have to be naturally instantiated with `new` keyword, passing the eventual needed `Options`
class as the first argument:
```php
use OpenSpout\Reader\CSV\Reader;
use OpenSpout\Reader\CSV\Options;
$options = new Options();
$options->FIELD_DELIMITER = '|';
$options->FIELD_ENCLOSURE = '@';
$reader = new Reader($options);
```
### Cell types on writes
Cell types are now handled with separate classes:
```php
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
$row = new Row([
new Cell\BooleanCell(true),
new Cell\DateIntervalCell(new DateInterval('P1D')),
new Cell\DateTimeCell(new DateTimeImmutable('now')),
new Cell\EmptyCell(null),
new Cell\FormulaCell('=SUM(A1:A2)'),
new Cell\NumericCell(3),
new Cell\StringCell('foo'),
]);
```
Auto-typing is still available though:
```php
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
$cell = Cell::fromValue(true); // Instance of Cell\BooleanCell
$row = Row::fromValues([
true,
new DateInterval('P1D'),
new DateTimeImmutable('now'),
null,
'=SUM(A1:A2)',
3,
'foo',
]);
```
## Upgrading from 2.x to 3.0
OpenSpout 3.0 introduced several backwards-incompatible changes. The upgrade from OpenSpout 2.x to 3.0 must therefore
be done with caution.
Spout 3.0 introduced several backwards-incompatible changes. The upgrade from Spout 2.x to 3.0 must therefore be done with caution.
This guide is meant to ease this process.
### Most notable changes
Most notable changes
--------------------
In 2.x, styles were applied per row; it was therefore impossible to apply different styles to cells in the same row.
With the 3.0 version, this is now possible: each cell can have its own style.
OpenSpout 3.0 tries to enforce better typing. For instance, instead of using/returning generic arrays, OpenSpout now
makes use of specific `Row` and `Cell` objects that can encapsulate more data such as type, style, value.
Spout 3.0 tries to enforce better typing. For instance, instead of using/returning generic arrays, Spout now makes use of specific `Row` and `Cell` objects that can encapsulate more data such as type, style, value.
Finally, **_OpenSpout 3.2 only supports PHP 7.2 and above_**, as other PHP versions are no longer supported by the
community.
### Reader changes
Finally, **_Spout 3.2 only supports PHP 7.2 and above_**, as other PHP versions are no longer supported by the community.
Reader changes
--------------
Creating a reader should now be done through the Reader `ReaderEntityFactory`, instead of using the `ReaderFactory`.
Also, the `ReaderFactory::create($type)` method was removed and replaced by methods for each reader:
```php
use OpenSpout\Reader\Common\Creator\ReaderEntityFactory; // namespace is no longer "OpenSpout\Reader"
...
$reader = ReaderEntityFactory::createXLSXReader(); // replaces ReaderFactory::create(Type::XLSX)
$reader = ReaderEntityFactory::createCSVReader(); // replaces ReaderFactory::create(Type::CSV)
$reader = ReaderEntityFactory::createODSReader(); // replaces ReaderFactory::create(Type::ODS)
```
When iterating over the spreadsheet rows, OpenSpout now returns `Row` objects, instead of an array containing row
values. Accessing the row values should now be done this way:
When iterating over the spreadsheet rows, Spout now returns `Row` objects, instead of an array containing row values. Accessing the row values should now be done this way:
```php
...
foreach ($reader->getSheetIterator() as $sheet) {
foreach ($sheet->getRowIterator() as $row) { // $row is a "Row" object, not an array
$rowAsArray = $row->toArray(); // this is the 2.x equivalent
@ -108,22 +38,20 @@ foreach ($reader->getSheetIterator() as $sheet) {
}
```
### Writer changes
Writer creation follows the same change as the reader. It should now be done through the Writer `WriterEntityFactory`,
instead of using the `WriterFactory`.
Writer changes
--------------
Writer creation follows the same change as the reader. It should now be done through the Writer `WriterEntityFactory`, instead of using the `WriterFactory`.
Also, the `WriterFactory::create($type)` method was removed and replaced by methods for each writer:
```php
use OpenSpout\Writer\Common\Creator\WriterEntityFactory; // namespace is no longer "OpenSpout\Writer"
...
$writer = WriterEntityFactory::createXLSXWriter(); // replaces WriterFactory::create(Type::XLSX)
$writer = WriterEntityFactory::createCSVWriter(); // replaces WriterFactory::create(Type::CSV)
$writer = WriterEntityFactory::createODSWriter(); // replaces WriterFactory::create(Type::ODS)
```
Adding rows is also done differently: instead of passing an array, the writer now takes in a `Row` object (or an
array of `Row`). Creating such objects can easily be done this way:
Adding rows is also done differently: instead of passing an array, the writer now takes in a `Row` object (or an array of `Row`). Creating such objects can easily be done this way:
```php
// Adding a row from an array of values (2.x equivalent)
$cellValues = ['foo', 12345];
@ -137,8 +65,8 @@ $row2 = WriterEntityFactory::createRow([$cell1, $cell2]);
$writer->addRows([$row1, $row2]);
```
### Namespace changes for styles
Namespace changes for styles
-----------------
The namespaces for styles have changed. Styles are still created by using a `builder` class.
For the builder, please update your import statements to use the following namespaces:
@ -155,8 +83,7 @@ If your are using these classes directly via an import statement in your code, p
OpenSpout\Common\Entity\Style\Color
OpenSpout\Common\Entity\Style\Style
### Handling of empty rows
Handling of empty rows
----------------------
In 2.x, empty rows were not added to the spreadsheet.
In 3.0, `addRow` now always writes a row to the spreadsheet: when the row does not contain any cells, an empty row
is created in the sheet.
In 3.0, `addRow` now always writes a row to the spreadsheet: when the row does not contain any cells, an empty row is created in the sheet.

View File

@ -28,9 +28,8 @@
],
"homepage": "https://github.com/openspout/openspout",
"require": {
"php": "~8.2.0 || ~8.3.0 || ~8.4.0",
"php": "~7.3.0 || ~7.4.0 || ~8.0.0 || ~8.1.0",
"ext-dom": "*",
"ext-fileinfo": "*",
"ext-filter": "*",
"ext-libxml": "*",
"ext-xmlreader": "*",
@ -38,17 +37,14 @@
},
"require-dev": {
"ext-zlib": "*",
"friendsofphp/php-cs-fixer": "^3.65.0",
"infection/infection": "^0.29.8",
"phpbench/phpbench": "^1.3.1",
"phpstan/phpstan": "^2.0.2",
"phpstan/phpstan-phpunit": "^2.0.1",
"phpstan/phpstan-strict-rules": "^2",
"phpunit/phpunit": "^11.4.3"
"friendsofphp/php-cs-fixer": "^3.4",
"phpstan/phpstan": "^1.4",
"phpstan/phpstan-phpunit": "^1.0",
"phpunit/phpunit": "^9.5"
},
"suggest": {
"ext-iconv": "To handle non UTF-8 CSV files (if \"php-mbstring\" is not already installed or is too limited)",
"ext-mbstring": "To handle non UTF-8 CSV files (if \"iconv\" is not already installed)"
"ext-iconv": "To handle non UTF-8 CSV files (if \"php-intl\" is not already installed or is too limited)",
"ext-intl": "To handle non UTF-8 CSV files (if \"iconv\" is not already installed)"
},
"autoload": {
"psr-4": {
@ -56,16 +52,13 @@
}
},
"autoload-dev": {
"psr-4": {
"OpenSpout\\Benchmarks\\": "benchmarks/"
},
"classmap": [
"tests/"
]
},
"config": {
"allow-plugins": {
"infection/extension-installer": true
"platform": {
"php": "7.3"
}
},
"extra": {

View File

@ -0,0 +1,147 @@
<?php
namespace OpenSpout\Autoloader;
/**
* @see https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-4-autoloader-examples.md#class-example
*/
class Psr4Autoloader
{
/**
* An associative array where the key is a namespace prefix and the value
* is an array of base directories for classes in that namespace.
*
* @var array
*/
protected $prefixes = [];
/**
* Register loader with SPL autoloader stack.
*/
public function register()
{
spl_autoload_register([$this, 'loadClass']);
}
/**
* Adds a base directory for a namespace prefix.
*
* @param string $prefix the namespace prefix
* @param string $baseDir a base directory for class files in the
* namespace
* @param bool $prepend if true, prepend the base directory to the stack
* instead of appending it; this causes it to be searched first rather
* than last
*/
public function addNamespace($prefix, $baseDir, $prepend = false)
{
// normalize namespace prefix
$prefix = trim($prefix, '\\').'\\';
// normalize the base directory with a trailing separator
$baseDir = rtrim($baseDir, \DIRECTORY_SEPARATOR).'/';
// initialize the namespace prefix array
if (false === isset($this->prefixes[$prefix])) {
$this->prefixes[$prefix] = [];
}
// retain the base directory for the namespace prefix
if ($prepend) {
array_unshift($this->prefixes[$prefix], $baseDir);
} else {
$this->prefixes[$prefix][] = $baseDir;
}
}
/**
* Loads the class file for a given class name.
*
* @param string $class the fully-qualified class name
*
* @return mixed the mapped file name on success, or boolean false on
* failure
*/
public function loadClass($class)
{
// the current namespace prefix
$prefix = $class;
// work backwards through the namespace names of the fully-qualified
// class name to find a mapped file name
while (($pos = strrpos($prefix, '\\')) !== false) {
// retain the trailing namespace separator in the prefix
$prefix = substr($class, 0, $pos + 1);
// the rest is the relative class name
$relativeClass = substr($class, $pos + 1);
// try to load a mapped file for the prefix and relative class
$mappedFile = $this->loadMappedFile($prefix, $relativeClass);
if (false !== $mappedFile) {
return $mappedFile;
}
// remove the trailing namespace separator for the next iteration
// of strrpos()
$prefix = rtrim($prefix, '\\');
}
// never found a mapped file
return false;
}
/**
* Load the mapped file for a namespace prefix and relative class.
*
* @param string $prefix the namespace prefix
* @param string $relativeClass the relative class name
*
* @return mixed boolean false if no mapped file can be loaded, or the
* name of the mapped file that was loaded
*/
protected function loadMappedFile($prefix, $relativeClass)
{
// are there any base directories for this namespace prefix?
if (false === isset($this->prefixes[$prefix])) {
return false;
}
// look through base directories for this namespace prefix
foreach ($this->prefixes[$prefix] as $baseDir) {
// replace the namespace prefix with the base directory,
// replace namespace separators with directory separators
// in the relative class name, append with .php
$file = $baseDir
.str_replace('\\', '/', $relativeClass)
.'.php';
// if the mapped file exists, require it
if ($this->requireFile($file)) {
// yes, we're done
return $file;
}
}
// never found it
return false;
}
/**
* If a file exists, require it from the file system.
*
* @param string $file the file to require
*
* @return bool true if the file exists, false if not
*/
protected function requireFile($file)
{
if (file_exists($file)) {
require $file;
return true;
}
return false;
}
}

View File

@ -0,0 +1,15 @@
<?php
namespace OpenSpout\Autoloader;
require_once 'Psr4Autoloader.php';
/**
* @var string
* Full path to "src/Spout" which is what we want "OpenSpout" to map to
*/
$srcBaseDirectory = \dirname(__DIR__);
$loader = new Psr4Autoloader();
$loader->register();
$loader->addNamespace('OpenSpout', $srcBaseDirectory);

View File

@ -0,0 +1,48 @@
<?php
namespace OpenSpout\Common\Creator;
use OpenSpout\Common\Helper\EncodingHelper;
use OpenSpout\Common\Helper\FileSystemHelper;
use OpenSpout\Common\Helper\GlobalFunctionsHelper;
use OpenSpout\Common\Helper\StringHelper;
/**
* Factory to create helpers.
*/
class HelperFactory
{
/**
* @return GlobalFunctionsHelper
*/
public function createGlobalFunctionsHelper()
{
return new GlobalFunctionsHelper();
}
/**
* @param string $baseFolderPath The path of the base folder where all the I/O can occur
*
* @return FileSystemHelper
*/
public function createFileSystemHelper($baseFolderPath)
{
return new FileSystemHelper($baseFolderPath);
}
/**
* @return EncodingHelper
*/
public function createEncodingHelper(GlobalFunctionsHelper $globalFunctionsHelper)
{
return new EncodingHelper($globalFunctionsHelper);
}
/**
* @return StringHelper
*/
public function createStringHelper()
{
return new StringHelper();
}
}

View File

@ -0,0 +1,227 @@
<?php
namespace OpenSpout\Common\Entity;
use OpenSpout\Common\Entity\Style\Style;
use OpenSpout\Common\Helper\CellTypeHelper;
class Cell
{
/**
* Numeric cell type (whole numbers, fractional numbers, dates).
*/
public const TYPE_NUMERIC = 0;
/**
* String (text) cell type.
*/
public const TYPE_STRING = 1;
/**
* Formula cell type
* Not used at the moment.
*/
public const TYPE_FORMULA = 2;
/**
* Empty cell type.
*/
public const TYPE_EMPTY = 3;
/**
* Boolean cell type.
*/
public const TYPE_BOOLEAN = 4;
/**
* Date cell type.
*/
public const TYPE_DATE = 5;
/**
* Error cell type.
*/
public const TYPE_ERROR = 6;
/**
* The value of this cell.
*
* @var null|mixed
*/
protected $value;
/**
* The cell type.
*
* @var null|int
*/
protected $type;
/**
* The cell style.
*
* @var Style
*/
protected $style;
/**
* @param null|mixed $value
*/
public function __construct($value, Style $style = null)
{
$this->setValue($value);
$this->setStyle($style);
}
/**
* @return string
*/
public function __toString()
{
return (string) $this->getValue();
}
/**
* @param null|mixed $value
*/
public function setValue($value)
{
$this->value = $value;
$this->type = $this->detectType($value);
}
/**
* @return null|mixed
*/
public function getValue()
{
return !$this->isError() ? $this->value : null;
}
/**
* @return mixed
*/
public function getValueEvenIfError()
{
return $this->value;
}
/**
* @param null|Style $style
*/
public function setStyle($style)
{
$this->style = $style ?: new Style();
}
/**
* @return Style
*/
public function getStyle()
{
return $this->style;
}
/**
* @return null|int
*/
public function getType()
{
return $this->type;
}
/**
* @param int $type
*/
public function setType($type)
{
$this->type = $type;
}
/**
* @return bool
*/
public function isBoolean()
{
return self::TYPE_BOOLEAN === $this->type;
}
/**
* @return bool
*/
public function isEmpty()
{
return self::TYPE_EMPTY === $this->type;
}
/**
* @return bool
*/
public function isNumeric()
{
return self::TYPE_NUMERIC === $this->type;
}
/**
* @return bool
*/
public function isString()
{
return self::TYPE_STRING === $this->type;
}
/**
* @return bool
*/
public function isDate()
{
return self::TYPE_DATE === $this->type;
}
/**
* @return bool
*/
public function isFormula()
{
return self::TYPE_FORMULA === $this->type;
}
/**
* @return bool
*/
public function isError()
{
return self::TYPE_ERROR === $this->type;
}
/**
* Get the current value type.
*
* @param null|mixed $value
*
* @return int
*/
protected function detectType($value)
{
if (CellTypeHelper::isBoolean($value)) {
return self::TYPE_BOOLEAN;
}
if (CellTypeHelper::isEmpty($value)) {
return self::TYPE_EMPTY;
}
if (CellTypeHelper::isNumeric($value)) {
return self::TYPE_NUMERIC;
}
if (CellTypeHelper::isDateTimeOrDateInterval($value)) {
return self::TYPE_DATE;
}
if (CellTypeHelper::isFormula($value)) {
return self::TYPE_FORMULA;
}
if (CellTypeHelper::isNonEmptyString($value)) {
return self::TYPE_STRING;
}
return self::TYPE_ERROR;
}
}

View File

@ -0,0 +1,166 @@
<?php
namespace OpenSpout\Common\Entity;
use OpenSpout\Common\Entity\Style\Style;
class Row
{
/**
* The cells in this row.
*
* @var Cell[]
*/
protected $cells = [];
/**
* The row style.
*
* @var Style
*/
protected $style;
/**
* Row height (default is 15).
*
* @var string
*/
protected $height = '15';
/**
* Row constructor.
*
* @param Cell[] $cells
* @param null|Style $style
*/
public function __construct(array $cells, $style)
{
$this
->setCells($cells)
->setStyle($style)
;
}
/**
* @return Cell[] $cells
*/
public function getCells()
{
return $this->cells;
}
/**
* @param Cell[] $cells
*
* @return Row
*/
public function setCells(array $cells)
{
$this->cells = [];
foreach ($cells as $cell) {
$this->addCell($cell);
}
return $this;
}
/**
* @param int $cellIndex
*
* @return Row
*/
public function setCellAtIndex(Cell $cell, $cellIndex)
{
$this->cells[$cellIndex] = $cell;
return $this;
}
/**
* @param int $cellIndex
*
* @return null|Cell
*/
public function getCellAtIndex($cellIndex)
{
return $this->cells[$cellIndex] ?? null;
}
/**
* @return Row
*/
public function addCell(Cell $cell)
{
$this->cells[] = $cell;
return $this;
}
/**
* @return int
*/
public function getNumCells()
{
// When using "setCellAtIndex", it's possible to
// have "$this->cells" contain holes.
if (empty($this->cells)) {
return 0;
}
return max(array_keys($this->cells)) + 1;
}
/**
* @return Style
*/
public function getStyle()
{
return $this->style;
}
/**
* @param null|Style $style
*
* @return Row
*/
public function setStyle($style)
{
$this->style = $style ?: new Style();
return $this;
}
/**
* @return array The row values, as array
*/
public function toArray()
{
return array_map(function (Cell $cell) {
return $cell->getValue();
}, $this->cells);
}
/**
* Set row height.
*
* @param string $height
*
* @return Row
*/
public function setHeight($height)
{
$this->height = $height;
return $this;
}
/**
* Returns row height.
*
* @return string
*/
public function getHeight()
{
return $this->height;
}
}

View File

@ -0,0 +1,80 @@
<?php
namespace OpenSpout\Common\Entity\Style;
class Border
{
public const LEFT = 'left';
public const RIGHT = 'right';
public const TOP = 'top';
public const BOTTOM = 'bottom';
public const STYLE_NONE = 'none';
public const STYLE_SOLID = 'solid';
public const STYLE_DASHED = 'dashed';
public const STYLE_DOTTED = 'dotted';
public const STYLE_DOUBLE = 'double';
public const WIDTH_THIN = 'thin';
public const WIDTH_MEDIUM = 'medium';
public const WIDTH_THICK = 'thick';
/** @var array A list of BorderPart objects for this border. */
private $parts = [];
public function __construct(array $borderParts = [])
{
$this->setParts($borderParts);
}
/**
* @param string $name The name of the border part
*
* @return null|BorderPart
*/
public function getPart($name)
{
return $this->hasPart($name) ? $this->parts[$name] : null;
}
/**
* @param string $name The name of the border part
*
* @return bool
*/
public function hasPart($name)
{
return isset($this->parts[$name]);
}
/**
* @return array
*/
public function getParts()
{
return $this->parts;
}
/**
* Set BorderParts.
*
* @param array $parts
*/
public function setParts($parts)
{
$this->parts = [];
foreach ($parts as $part) {
$this->addPart($part);
}
}
/**
* @return Border
*/
public function addPart(BorderPart $borderPart)
{
$this->parts[$borderPart->getName()] = $borderPart;
return $this;
}
}

View File

@ -0,0 +1,181 @@
<?php
namespace OpenSpout\Common\Entity\Style;
use OpenSpout\Writer\Exception\Border\InvalidNameException;
use OpenSpout\Writer\Exception\Border\InvalidStyleException;
use OpenSpout\Writer\Exception\Border\InvalidWidthException;
class BorderPart
{
/**
* @var string the style of this border part
*/
protected $style;
/**
* @var string the name of this border part
*/
protected $name;
/**
* @var string the color of this border part
*/
protected $color;
/**
* @var string the width of this border part
*/
protected $width;
/**
* @var array allowed style constants for parts
*/
protected static $allowedStyles = [
'none',
'solid',
'dashed',
'dotted',
'double',
];
/**
* @var array allowed names constants for border parts
*/
protected static $allowedNames = [
'left',
'right',
'top',
'bottom',
];
/**
* @var array allowed width constants for border parts
*/
protected static $allowedWidths = [
'thin',
'medium',
'thick',
];
/**
* @param string $name @see BorderPart::$allowedNames
* @param string $color A RGB color code
* @param string $width @see BorderPart::$allowedWidths
* @param string $style @see BorderPart::$allowedStyles
*
* @throws InvalidNameException
* @throws InvalidStyleException
* @throws InvalidWidthException
*/
public function __construct($name, $color = Color::BLACK, $width = Border::WIDTH_MEDIUM, $style = Border::STYLE_SOLID)
{
$this->setName($name);
$this->setColor($color);
$this->setWidth($width);
$this->setStyle($style);
}
/**
* @return string
*/
public function getName()
{
return $this->name;
}
/**
* @param string $name The name of the border part @see BorderPart::$allowedNames
*
* @throws InvalidNameException
*/
public function setName($name)
{
if (!\in_array($name, self::$allowedNames, true)) {
throw new InvalidNameException($name);
}
$this->name = $name;
}
/**
* @return string
*/
public function getStyle()
{
return $this->style;
}
/**
* @param string $style The style of the border part @see BorderPart::$allowedStyles
*
* @throws InvalidStyleException
*/
public function setStyle($style)
{
if (!\in_array($style, self::$allowedStyles, true)) {
throw new InvalidStyleException($style);
}
$this->style = $style;
}
/**
* @return string
*/
public function getColor()
{
return $this->color;
}
/**
* @param string $color The color of the border part @see Color::rgb()
*/
public function setColor($color)
{
$this->color = $color;
}
/**
* @return string
*/
public function getWidth()
{
return $this->width;
}
/**
* @param string $width The width of the border part @see BorderPart::$allowedWidths
*
* @throws InvalidWidthException
*/
public function setWidth($width)
{
if (!\in_array($width, self::$allowedWidths, true)) {
throw new InvalidWidthException($width);
}
$this->width = $width;
}
/**
* @return array
*/
public static function getAllowedStyles()
{
return self::$allowedStyles;
}
/**
* @return array
*/
public static function getAllowedNames()
{
return self::$allowedNames;
}
/**
* @return array
*/
public static function getAllowedWidths()
{
return self::$allowedWidths;
}
}

View File

@ -1,20 +1,18 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Common\Entity\Style;
/**
* This class provides constants to work with text alignment.
*/
final class CellAlignment
abstract class CellAlignment
{
public const LEFT = 'left';
public const RIGHT = 'right';
public const CENTER = 'center';
public const JUSTIFY = 'justify';
private const VALID_ALIGNMENTS = [
private static $VALID_ALIGNMENTS = [
self::LEFT => 1,
self::RIGHT => 1,
self::CENTER => 1,
@ -22,10 +20,12 @@ final class CellAlignment
];
/**
* @param string $cellAlignment
*
* @return bool Whether the given cell alignment is valid
*/
public static function isValid(string $cellAlignment): bool
public static function isValid($cellAlignment)
{
return isset(self::VALID_ALIGNMENTS[$cellAlignment]);
return isset(self::$VALID_ALIGNMENTS[$cellAlignment]);
}
}

View File

@ -1,7 +1,5 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Common\Entity\Style;
use OpenSpout\Common\Exception\InvalidColorException;
@ -9,11 +7,9 @@ use OpenSpout\Common\Exception\InvalidColorException;
/**
* This class provides constants and functions to work with colors.
*/
final class Color
abstract class Color
{
/**
* Standard colors - based on Office Online.
*/
/** Standard colors - based on Office Online */
public const BLACK = '000000';
public const WHITE = 'FFFFFF';
public const RED = 'FF0000';
@ -36,7 +32,7 @@ final class Color
*
* @return string RGB color
*/
public static function rgb(int $red, int $green, int $blue): string
public static function rgb($red, $green, $blue)
{
self::throwIfInvalidColorComponentValue($red);
self::throwIfInvalidColorComponentValue($green);
@ -57,7 +53,7 @@ final class Color
*
* @return string ARGB color
*/
public static function toARGB(string $rgbColor): string
public static function toARGB($rgbColor)
{
return 'FF'.$rgbColor;
}
@ -65,11 +61,13 @@ final class Color
/**
* Throws an exception is the color component value is outside of bounds (0 - 255).
*
* @throws InvalidColorException
* @param int $colorComponent
*
* @throws \OpenSpout\Common\Exception\InvalidColorException
*/
private static function throwIfInvalidColorComponentValue(int $colorComponent): void
protected static function throwIfInvalidColorComponentValue($colorComponent)
{
if ($colorComponent < 0 || $colorComponent > 255) {
if (!\is_int($colorComponent) || $colorComponent < 0 || $colorComponent > 255) {
throw new InvalidColorException("The RGB components must be between 0 and 255. Received: {$colorComponent}");
}
}
@ -81,7 +79,7 @@ final class Color
*
* @return string Corresponding hexadecimal value, with a leading 0 if needed. E.g "0f", "2d"
*/
private static function convertColorComponentToHex(int $colorComponent): string
protected static function convertColorComponentToHex($colorComponent)
{
return str_pad(dechex($colorComponent), 2, '0', STR_PAD_LEFT);
}

View File

@ -1,160 +1,159 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Common\Entity\Style;
use OpenSpout\Common\Exception\InvalidArgumentException;
/**
* Represents a style to be applied to a cell.
*/
final class Style
class Style
{
/**
* Default values.
*/
/** Default values */
public const DEFAULT_FONT_SIZE = 11;
public const DEFAULT_FONT_COLOR = Color::BLACK;
public const DEFAULT_FONT_NAME = 'Arial';
/** @var int Style ID */
private int $id = -1;
/** @var null|int Style ID */
private $id;
/** @var bool Whether the font should be bold */
private bool $fontBold = false;
private $fontBold = false;
/** @var bool Whether the bold property was set */
private bool $hasSetFontBold = false;
private $hasSetFontBold = false;
/** @var bool Whether the font should be italic */
private bool $fontItalic = false;
private $fontItalic = false;
/** @var bool Whether the italic property was set */
private bool $hasSetFontItalic = false;
private $hasSetFontItalic = false;
/** @var bool Whether the font should be underlined */
private bool $fontUnderline = false;
private $fontUnderline = false;
/** @var bool Whether the underline property was set */
private bool $hasSetFontUnderline = false;
private $hasSetFontUnderline = false;
/** @var bool Whether the font should be struck through */
private bool $fontStrikethrough = false;
private $fontStrikethrough = false;
/** @var bool Whether the strikethrough property was set */
private bool $hasSetFontStrikethrough = false;
private $hasSetFontStrikethrough = false;
/** @var int Font size */
private int $fontSize = self::DEFAULT_FONT_SIZE;
private $fontSize = self::DEFAULT_FONT_SIZE;
/** @var bool Whether the font size property was set */
private bool $hasSetFontSize = false;
private $hasSetFontSize = false;
/** @var string Font color */
private string $fontColor = self::DEFAULT_FONT_COLOR;
private $fontColor = self::DEFAULT_FONT_COLOR;
/** @var bool Whether the font color property was set */
private bool $hasSetFontColor = false;
private $hasSetFontColor = false;
/** @var string Font name */
private string $fontName = self::DEFAULT_FONT_NAME;
private $fontName = self::DEFAULT_FONT_NAME;
/** @var bool Whether the font name property was set */
private bool $hasSetFontName = false;
private $hasSetFontName = false;
/** @var bool Whether specific font properties should be applied */
private bool $shouldApplyFont = false;
private $shouldApplyFont = false;
/** @var bool Whether specific cell alignment should be applied */
private bool $shouldApplyCellAlignment = false;
private $shouldApplyCellAlignment = false;
/** @var string Cell alignment */
private string $cellAlignment;
private $cellAlignment;
/** @var bool Whether the cell alignment property was set */
private bool $hasSetCellAlignment = false;
/** @var bool Whether specific cell vertical alignment should be applied */
private bool $shouldApplyCellVerticalAlignment = false;
/** @var string Cell vertical alignment */
private string $cellVerticalAlignment;
/** @var bool Whether the cell vertical alignment property was set */
private bool $hasSetCellVerticalAlignment = false;
private $hasSetCellAlignment = false;
/** @var bool Whether the text should wrap in the cell (useful for long or multi-lines text) */
private bool $shouldWrapText = false;
private $shouldWrapText = false;
/** @var bool Whether the wrap text property was set */
private bool $hasSetWrapText = false;
/** @var int Text rotation */
private int $textRotation = 0;
/** @var bool Whether the text rotation property was set */
private bool $hasSetTextRotation = false;
private $hasSetWrapText = false;
/** @var bool Whether the cell should shrink to fit to content */
private bool $shouldShrinkToFit = false;
private $shouldShrinkToFit = false;
/** @var bool Whether the shouldShrinkToFit text property was set */
private bool $hasSetShrinkToFit = false;
private $hasSetShrinkToFit = false;
private ?Border $border = null;
/** @var null|Border */
private $border;
/** @var bool Whether border properties should be applied */
private $shouldApplyBorder = false;
/** @var null|string Background color */
private ?string $backgroundColor = null;
private $backgroundColor;
/** @var bool */
private $hasSetBackgroundColor = false;
/** @var null|string Format */
private ?string $format = null;
private $format;
private bool $isRegistered = false;
/** @var bool */
private $hasSetFormat = false;
private bool $isEmpty = true;
/** @var bool */
private $isRegistered = false;
public function __sleep(): array
/** @var bool */
private $isEmpty = true;
/**
* @return null|int
*/
public function getId()
{
$vars = get_object_vars($this);
unset($vars['id'], $vars['isRegistered']);
return array_keys($vars);
}
public function getId(): int
{
\assert(0 <= $this->id);
return $this->id;
}
public function setId(int $id): self
/**
* @param int $id
*
* @return Style
*/
public function setId($id)
{
$this->id = $id;
return $this;
}
public function getBorder(): ?Border
/**
* @return null|Border
*/
public function getBorder()
{
return $this->border;
}
public function setBorder(Border $border): self
/**
* @return Style
*/
public function setBorder(Border $border)
{
$this->shouldApplyBorder = true;
$this->border = $border;
$this->isEmpty = false;
return $this;
}
public function isFontBold(): bool
/**
* @return bool
*/
public function shouldApplyBorder()
{
return $this->shouldApplyBorder;
}
/**
* @return bool
*/
public function isFontBold()
{
return $this->fontBold;
}
public function setFontBold(): self
/**
* @return Style
*/
public function setFontBold()
{
$this->fontBold = true;
$this->hasSetFontBold = true;
@ -164,17 +163,26 @@ final class Style
return $this;
}
public function hasSetFontBold(): bool
/**
* @return bool
*/
public function hasSetFontBold()
{
return $this->hasSetFontBold;
}
public function isFontItalic(): bool
/**
* @return bool
*/
public function isFontItalic()
{
return $this->fontItalic;
}
public function setFontItalic(): self
/**
* @return Style
*/
public function setFontItalic()
{
$this->fontItalic = true;
$this->hasSetFontItalic = true;
@ -184,17 +192,26 @@ final class Style
return $this;
}
public function hasSetFontItalic(): bool
/**
* @return bool
*/
public function hasSetFontItalic()
{
return $this->hasSetFontItalic;
}
public function isFontUnderline(): bool
/**
* @return bool
*/
public function isFontUnderline()
{
return $this->fontUnderline;
}
public function setFontUnderline(): self
/**
* @return Style
*/
public function setFontUnderline()
{
$this->fontUnderline = true;
$this->hasSetFontUnderline = true;
@ -204,17 +221,26 @@ final class Style
return $this;
}
public function hasSetFontUnderline(): bool
/**
* @return bool
*/
public function hasSetFontUnderline()
{
return $this->hasSetFontUnderline;
}
public function isFontStrikethrough(): bool
/**
* @return bool
*/
public function isFontStrikethrough()
{
return $this->fontStrikethrough;
}
public function setFontStrikethrough(): self
/**
* @return Style
*/
public function setFontStrikethrough()
{
$this->fontStrikethrough = true;
$this->hasSetFontStrikethrough = true;
@ -224,20 +250,28 @@ final class Style
return $this;
}
public function hasSetFontStrikethrough(): bool
/**
* @return bool
*/
public function hasSetFontStrikethrough()
{
return $this->hasSetFontStrikethrough;
}
public function getFontSize(): int
/**
* @return int
*/
public function getFontSize()
{
return $this->fontSize;
}
/**
* @param int $fontSize Font size, in pixels
*
* @return Style
*/
public function setFontSize(int $fontSize): self
public function setFontSize($fontSize)
{
$this->fontSize = $fontSize;
$this->hasSetFontSize = true;
@ -247,12 +281,18 @@ final class Style
return $this;
}
public function hasSetFontSize(): bool
/**
* @return bool
*/
public function hasSetFontSize()
{
return $this->hasSetFontSize;
}
public function getFontColor(): string
/**
* @return string
*/
public function getFontColor()
{
return $this->fontColor;
}
@ -261,8 +301,10 @@ final class Style
* Sets the font color.
*
* @param string $fontColor ARGB color (@see Color)
*
* @return Style
*/
public function setFontColor(string $fontColor): self
public function setFontColor($fontColor)
{
$this->fontColor = $fontColor;
$this->hasSetFontColor = true;
@ -272,20 +314,28 @@ final class Style
return $this;
}
public function hasSetFontColor(): bool
/**
* @return bool
*/
public function hasSetFontColor()
{
return $this->hasSetFontColor;
}
public function getFontName(): string
/**
* @return string
*/
public function getFontName()
{
return $this->fontName;
}
/**
* @param string $fontName Name of the font to use
*
* @return Style
*/
public function setFontName(string $fontName): self
public function setFontName($fontName)
{
$this->fontName = $fontName;
$this->hasSetFontName = true;
@ -295,30 +345,29 @@ final class Style
return $this;
}
public function hasSetFontName(): bool
/**
* @return bool
*/
public function hasSetFontName()
{
return $this->hasSetFontName;
}
public function getCellAlignment(): string
/**
* @return string
*/
public function getCellAlignment()
{
return $this->cellAlignment;
}
public function getCellVerticalAlignment(): string
{
return $this->cellVerticalAlignment;
}
/**
* @param string $cellAlignment The cell alignment
*
* @return Style
*/
public function setCellAlignment(string $cellAlignment): self
public function setCellAlignment($cellAlignment)
{
if (!CellAlignment::isValid($cellAlignment)) {
throw new InvalidArgumentException('Invalid cell alignment value');
}
$this->cellAlignment = $cellAlignment;
$this->hasSetCellAlignment = true;
$this->shouldApplyCellAlignment = true;
@ -328,54 +377,35 @@ final class Style
}
/**
* @param string $cellVerticalAlignment The cell vertical alignment
* @return bool
*/
public function setCellVerticalAlignment(string $cellVerticalAlignment): self
{
if (!CellVerticalAlignment::isValid($cellVerticalAlignment)) {
throw new InvalidArgumentException('Invalid cell vertical alignment value');
}
$this->cellVerticalAlignment = $cellVerticalAlignment;
$this->hasSetCellVerticalAlignment = true;
$this->shouldApplyCellVerticalAlignment = true;
$this->isEmpty = false;
return $this;
}
public function hasSetCellAlignment(): bool
public function hasSetCellAlignment()
{
return $this->hasSetCellAlignment;
}
public function hasSetCellVerticalAlignment(): bool
{
return $this->hasSetCellVerticalAlignment;
}
/**
* @return bool Whether specific cell alignment should be applied
*/
public function shouldApplyCellAlignment(): bool
public function shouldApplyCellAlignment()
{
return $this->shouldApplyCellAlignment;
}
public function shouldApplyCellVerticalAlignment(): bool
{
return $this->shouldApplyCellVerticalAlignment;
}
public function shouldWrapText(): bool
/**
* @return bool
*/
public function shouldWrapText()
{
return $this->shouldWrapText;
}
/**
* @param bool $shouldWrap Should the text be wrapped
*
* @return Style
*/
public function setShouldWrapText(bool $shouldWrap = true): self
public function setShouldWrapText($shouldWrap = true)
{
$this->shouldWrapText = $shouldWrap;
$this->hasSetWrapText = true;
@ -384,37 +414,18 @@ final class Style
return $this;
}
public function hasSetWrapText(): bool
/**
* @return bool
*/
public function hasSetWrapText()
{
return $this->hasSetWrapText;
}
public function textRotation(): int
{
return $this->textRotation;
}
/**
* @param int $rotation Rotate text
*/
public function setTextRotation(int $rotation): self
{
$this->textRotation = $rotation;
$this->hasSetTextRotation = true;
$this->isEmpty = false;
return $this;
}
public function hasSetTextRotation(): bool
{
return $this->hasSetTextRotation;
}
/**
* @return bool Whether specific font properties should be applied
*/
public function shouldApplyFont(): bool
public function shouldApplyFont()
{
return $this->shouldApplyFont;
}
@ -423,36 +434,66 @@ final class Style
* Sets the background color.
*
* @param string $color ARGB color (@see Color)
*
* @return Style
*/
public function setBackgroundColor(string $color): self
public function setBackgroundColor($color)
{
$this->hasSetBackgroundColor = true;
$this->backgroundColor = $color;
$this->isEmpty = false;
return $this;
}
public function getBackgroundColor(): ?string
/**
* @return null|string
*/
public function getBackgroundColor()
{
return $this->backgroundColor;
}
/**
* Sets format.
* @return bool Whether the background color should be applied
*/
public function setFormat(string $format): self
public function shouldApplyBackgroundColor()
{
return $this->hasSetBackgroundColor;
}
/**
* Sets format.
*
* @param string $format
*
* @return Style
*/
public function setFormat($format)
{
$this->hasSetFormat = true;
$this->format = $format;
$this->isEmpty = false;
return $this;
}
public function getFormat(): ?string
/**
* @return null|string
*/
public function getFormat()
{
return $this->format;
}
/**
* @return bool Whether format should be applied
*/
public function shouldApplyFormat()
{
return $this->hasSetFormat;
}
public function isRegistered(): bool
{
return $this->isRegistered;
@ -464,6 +505,12 @@ final class Style
$this->isRegistered = true;
}
public function unmarkAsRegistered(): void
{
$this->setId(0);
$this->isRegistered = false;
}
public function isEmpty(): bool
{
return $this->isEmpty;
@ -471,8 +518,12 @@ final class Style
/**
* Sets should shrink to fit.
*
* @param bool $shrinkToFit
*
* @return Style
*/
public function setShouldShrinkToFit(bool $shrinkToFit = true): self
public function setShouldShrinkToFit($shrinkToFit = true)
{
$this->hasSetShrinkToFit = true;
$this->shouldShrinkToFit = $shrinkToFit;
@ -483,12 +534,15 @@ final class Style
/**
* @return bool Whether format should be applied
*/
public function shouldShrinkToFit(): bool
public function shouldShrinkToFit()
{
return $this->shouldShrinkToFit;
}
public function hasSetShrinkToFit(): bool
/**
* @return bool
*/
public function hasSetShrinkToFit()
{
return $this->hasSetShrinkToFit;
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Common\Exception;
class EncodingConversionException extends SpoutException
{
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Common\Exception;
class IOException extends SpoutException
{
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Common\Exception;
class InvalidArgumentException extends SpoutException
{
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Common\Exception;
class InvalidColorException extends SpoutException
{
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Common\Exception;
abstract class SpoutException extends \Exception
{
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Common\Exception;
class UnsupportedTypeException extends SpoutException
{
}

View File

@ -0,0 +1,82 @@
<?php
namespace OpenSpout\Common\Helper;
/**
* This class provides helper functions to determine the type of the cell value.
*/
class CellTypeHelper
{
/**
* @param null|mixed $value
*
* @return bool Whether the given value is considered "empty"
*/
public static function isEmpty($value)
{
return null === $value || '' === $value;
}
/**
* @param mixed $value
*
* @return bool Whether the given value is a non empty string
*/
public static function isNonEmptyString($value)
{
return 'string' === \gettype($value) && '' !== $value;
}
/**
* Returns whether the given value is numeric.
* A numeric value is from type "integer" or "double" ("float" is not returned by gettype).
*
* @param mixed $value
*
* @return bool Whether the given value is numeric
*/
public static function isNumeric($value)
{
$valueType = \gettype($value);
return 'integer' === $valueType || 'double' === $valueType;
}
/**
* Returns whether the given value is boolean.
* "true"/"false" and 0/1 are not booleans.
*
* @param mixed $value
*
* @return bool Whether the given value is boolean
*/
public static function isBoolean($value)
{
return 'boolean' === \gettype($value);
}
/**
* Returns whether the given value is a DateTime or DateInterval object.
*
* @param mixed $value
*
* @return bool Whether the given value is a DateTime or DateInterval object
*/
public static function isDateTimeOrDateInterval($value)
{
return
$value instanceof \DateTimeInterface
|| $value instanceof \DateInterval
;
}
/**
* @param mixed $value
*
* @return bool
*/
public static function isFormula($value)
{
return \is_string($value) && isset($value[0]) && '=' === $value[0];
}
}

View File

@ -1,46 +1,40 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Common\Helper;
use Error;
use OpenSpout\Common\Exception\EncodingConversionException;
/**
* @internal
* This class provides helper functions to work with encodings.
*/
final readonly class EncodingHelper
class EncodingHelper
{
/**
* Definition of the encodings that can have a BOM.
*/
/** Definition of the encodings that can have a BOM */
public const ENCODING_UTF8 = 'UTF-8';
public const ENCODING_UTF16_LE = 'UTF-16LE';
public const ENCODING_UTF16_BE = 'UTF-16BE';
public const ENCODING_UTF32_LE = 'UTF-32LE';
public const ENCODING_UTF32_BE = 'UTF-32BE';
/**
* Definition of the BOMs for the different encodings.
*/
/** Definition of the BOMs for the different encodings */
public const BOM_UTF8 = "\xEF\xBB\xBF";
public const BOM_UTF16_LE = "\xFF\xFE";
public const BOM_UTF16_BE = "\xFE\xFF";
public const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
public const BOM_UTF32_BE = "\x00\x00\xFE\xFF";
/** @var array<string, string> Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
private array $supportedEncodingsWithBom;
/** @var \OpenSpout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
private bool $canUseIconv;
/** @var array Map representing the encodings supporting BOMs (key) and their associated BOM (value) */
protected $supportedEncodingsWithBom;
private bool $canUseMbString;
public function __construct(bool $canUseIconv, bool $canUseMbString)
/**
* @param \OpenSpout\Common\Helper\GlobalFunctionsHelper $globalFunctionsHelper
*/
public function __construct($globalFunctionsHelper)
{
$this->canUseIconv = $canUseIconv;
$this->canUseMbString = $canUseMbString;
$this->globalFunctionsHelper = $globalFunctionsHelper;
$this->supportedEncodingsWithBom = [
self::ENCODING_UTF8 => self::BOM_UTF8,
@ -51,14 +45,6 @@ final readonly class EncodingHelper
];
}
public static function factory(): self
{
return new self(
\function_exists('iconv'),
\function_exists('mb_convert_encoding'),
);
}
/**
* Returns the number of bytes to use as offset in order to skip the BOM.
*
@ -67,7 +53,7 @@ final readonly class EncodingHelper
*
* @return int Bytes offset to apply to skip the BOM (0 means no BOM)
*/
public function getBytesOffsetToSkipBOM($filePointer, string $encoding): int
public function getBytesOffsetToSkipBOM($filePointer, $encoding)
{
$byteOffsetToSkipBom = 0;
@ -87,11 +73,11 @@ final readonly class EncodingHelper
* @param string $string Non UTF-8 string to be converted
* @param string $sourceEncoding The encoding used to encode the source string
*
* @return string The converted, UTF-8 string
* @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
*
* @throws EncodingConversionException If conversion is not supported or if the conversion failed
* @return string The converted, UTF-8 string
*/
public function attemptConversionToUTF8(?string $string, string $sourceEncoding): ?string
public function attemptConversionToUTF8($string, $sourceEncoding)
{
return $this->attemptConversion($string, $sourceEncoding, self::ENCODING_UTF8);
}
@ -102,11 +88,11 @@ final readonly class EncodingHelper
* @param string $string UTF-8 string to be converted
* @param string $targetEncoding The encoding the string should be re-encoded into
*
* @return string The converted string, encoded with the given encoding
* @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
*
* @throws EncodingConversionException If conversion is not supported or if the conversion failed
* @return string The converted string, encoded with the given encoding
*/
public function attemptConversionFromUTF8(?string $string, string $targetEncoding): ?string
public function attemptConversionFromUTF8($string, $targetEncoding)
{
return $this->attemptConversion($string, self::ENCODING_UTF8, $targetEncoding);
}
@ -119,17 +105,17 @@ final readonly class EncodingHelper
*
* @return bool TRUE if the file has a BOM, FALSE otherwise
*/
private function hasBOM($filePointer, string $encoding): bool
protected function hasBOM($filePointer, $encoding)
{
$hasBOM = false;
rewind($filePointer);
$this->globalFunctionsHelper->rewind($filePointer);
if (\array_key_exists($encoding, $this->supportedEncodingsWithBom)) {
$potentialBom = $this->supportedEncodingsWithBom[$encoding];
$numBytesInBom = \strlen($potentialBom);
$hasBOM = (fgets($filePointer, $numBytesInBom + 1) === $potentialBom);
$hasBOM = ($this->globalFunctionsHelper->fgets($filePointer, $numBytesInBom + 1) === $potentialBom);
}
return $hasBOM;
@ -143,47 +129,25 @@ final readonly class EncodingHelper
* @param string $sourceEncoding The encoding used to encode the source string
* @param string $targetEncoding The encoding the string should be re-encoded into
*
* @return string The converted string, encoded with the given encoding
* @throws \OpenSpout\Common\Exception\EncodingConversionException If conversion is not supported or if the conversion failed
*
* @throws EncodingConversionException If conversion is not supported or if the conversion failed
* @return string The converted string, encoded with the given encoding
*/
private function attemptConversion(?string $string, string $sourceEncoding, string $targetEncoding): ?string
protected function attemptConversion($string, $sourceEncoding, $targetEncoding)
{
// if source and target encodings are the same, it's a no-op
if (null === $string || $sourceEncoding === $targetEncoding) {
if ($sourceEncoding === $targetEncoding) {
return $string;
}
$convertedString = null;
if ($this->canUseIconv) {
set_error_handler(static function (): bool {
return true;
});
$convertedString = iconv($sourceEncoding, $targetEncoding, $string);
restore_error_handler();
} elseif ($this->canUseMbString) {
$errorMessage = null;
set_error_handler(static function ($nr, $message) use (&$errorMessage): bool {
$errorMessage = $message; // @codeCoverageIgnore
return true; // @codeCoverageIgnore
});
try {
$convertedString = mb_convert_encoding($string, $targetEncoding, $sourceEncoding);
} catch (Error $error) {
$errorMessage = $error->getMessage();
}
restore_error_handler();
if (null !== $errorMessage) {
$convertedString = false;
}
if ($this->canUseIconv()) {
$convertedString = $this->globalFunctionsHelper->iconv($string, $sourceEncoding, $targetEncoding);
} elseif ($this->canUseMbString()) {
$convertedString = $this->globalFunctionsHelper->mb_convert_encoding($string, $sourceEncoding, $targetEncoding);
} else {
throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} is not supported. Please install \"iconv\" or \"mbstring\".");
throw new EncodingConversionException("The conversion from {$sourceEncoding} to {$targetEncoding} is not supported. Please install \"iconv\" or \"PHP Intl\".");
}
if (false === $convertedString) {
@ -192,4 +156,25 @@ final readonly class EncodingHelper
return $convertedString;
}
/**
* Returns whether "iconv" can be used.
*
* @return bool TRUE if "iconv" is available and can be used, FALSE otherwise
*/
protected function canUseIconv()
{
return $this->globalFunctionsHelper->function_exists('iconv');
}
/**
* Returns whether "mb_string" functions can be used.
* These functions come with the PHP Intl package.
*
* @return bool TRUE if "mb_string" functions are available and can be used, FALSE otherwise
*/
protected function canUseMbString()
{
return $this->globalFunctionsHelper->function_exists('mb_convert_encoding');
}
}

View File

@ -0,0 +1,37 @@
<?php
namespace OpenSpout\Common\Helper\Escaper;
/**
* Provides functions to escape and unescape data for CSV files.
*/
class CSV implements EscaperInterface
{
/**
* Escapes the given string to make it compatible with CSV.
*
* @codeCoverageIgnore
*
* @param string $string The string to escape
*
* @return string The escaped string
*/
public function escape($string)
{
return $string;
}
/**
* Unescapes the given string to make it compatible with CSV.
*
* @codeCoverageIgnore
*
* @param string $string The string to unescape
*
* @return string The unescaped string
*/
public function unescape($string)
{
return $string;
}
}

View File

@ -1,11 +1,9 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Common\Helper\Escaper;
/**
* @internal
* Interface EscaperInterface.
*/
interface EscaperInterface
{
@ -16,7 +14,7 @@ interface EscaperInterface
*
* @return string The escaped string
*/
public function escape(string $string): string;
public function escape($string);
/**
* Unescapes the given string to make it compatible with PHP.
@ -25,5 +23,5 @@ interface EscaperInterface
*
* @return string The unescaped string
*/
public function unescape(string $string): string;
public function unescape($string);
}

View File

@ -0,0 +1,63 @@
<?php
namespace OpenSpout\Common\Helper\Escaper;
/**
* Provides functions to escape and unescape data for ODS files.
*/
class ODS implements EscaperInterface
{
/**
* Escapes the given string to make it compatible with XLSX.
*
* @param string $string The string to escape
*
* @return string The escaped string
*/
public function escape($string)
{
// @NOTE: Using ENT_QUOTES as XML entities ('<', '>', '&') as well as
// single/double quotes (for XML attributes) need to be encoded.
if (\defined('ENT_DISALLOWED')) {
/**
* 'ENT_DISALLOWED' ensures that invalid characters in the given document type are replaced.
* Otherwise control characters like a vertical tab "\v" will make the XML document unreadable by the XML processor.
*
* @see https://github.com/box/spout/issues/329
*/
$replacedString = htmlspecialchars($string, ENT_QUOTES | ENT_DISALLOWED, 'UTF-8');
} else {
// We are on hhvm or any other engine that does not support ENT_DISALLOWED.
$escapedString = htmlspecialchars($string, ENT_QUOTES, 'UTF-8');
// control characters values are from 0 to 1F (hex values) in the ASCII table
// some characters should not be escaped though: "\t", "\r" and "\n".
$regexPattern = '[\x00-\x08'.
// skipping "\t" (0x9) and "\n" (0xA)
'\x0B-\x0C'.
// skipping "\r" (0xD)
'\x0E-\x1F]';
$replacedString = preg_replace("/{$regexPattern}/", '<27>', $escapedString);
}
return $replacedString;
}
/**
* Unescapes the given string to make it compatible with XLSX.
*
* @param string $string The string to unescape
*
* @return string The unescaped string
*/
public function unescape($string)
{
// ==============
// = WARNING =
// ==============
// It is assumed that the given string has already had its XML entities decoded.
// This is true if the string is coming from a DOMNode (as DOMNode already decode XML entities on creation).
// Therefore there is no need to call "htmlspecialchars_decode()".
return $string;
}
}

View File

@ -1,25 +1,23 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Common\Helper\Escaper;
/**
* @internal
* Provides functions to escape and unescape data for XLSX files.
*/
final class XLSX implements EscaperInterface
class XLSX implements EscaperInterface
{
/** @var bool Whether the escaper has already been initialized */
private bool $isAlreadyInitialized = false;
private $isAlreadyInitialized = false;
/** @var string Regex pattern to detect control characters that need to be escaped */
private string $escapableControlCharactersPattern;
private $escapableControlCharactersPattern;
/** @var string[] Map containing control characters to be escaped (key) and their escaped value (value) */
private array $controlCharactersEscapingMap;
private $controlCharactersEscapingMap;
/** @var string[] Map containing control characters to be escaped (value) and their escaped value (key) */
private array $controlCharactersEscapingReverseMap;
private $controlCharactersEscapingReverseMap;
/**
* Escapes the given string to make it compatible with XLSX.
@ -28,12 +26,11 @@ final class XLSX implements EscaperInterface
*
* @return string The escaped string
*/
public function escape(string $string): string
public function escape($string)
{
$this->initIfNeeded();
$escapedString = $this->escapeControlCharacters($string);
// @NOTE: Using ENT_QUOTES as XML entities ('<', '>', '&') as well as
// single/double quotes (for XML attributes) need to be encoded.
return htmlspecialchars($escapedString, ENT_QUOTES, 'UTF-8');
@ -46,7 +43,7 @@ final class XLSX implements EscaperInterface
*
* @return string The unescaped string
*/
public function unescape(string $string): string
public function unescape($string)
{
$this->initIfNeeded();
@ -62,7 +59,7 @@ final class XLSX implements EscaperInterface
/**
* Initializes the control characters if not already done.
*/
private function initIfNeeded(): void
protected function initIfNeeded()
{
if (!$this->isAlreadyInitialized) {
$this->escapableControlCharactersPattern = $this->getEscapableControlCharactersPattern();
@ -76,7 +73,7 @@ final class XLSX implements EscaperInterface
/**
* @return string Regex pattern containing all escapable control characters
*/
private function getEscapableControlCharactersPattern(): string
protected function getEscapableControlCharactersPattern()
{
// control characters values are from 0 to 1F (hex values) in the ASCII table
// some characters should not be escaped though: "\t", "\r" and "\n".
@ -98,16 +95,16 @@ final class XLSX implements EscaperInterface
*
* @return string[]
*/
private function getControlCharactersEscapingMap(): array
protected function getControlCharactersEscapingMap()
{
$controlCharactersEscapingMap = [];
// control characters values are from 0 to 1F (hex values) in the ASCII table
for ($charValue = 0x00; $charValue <= 0x1F; ++$charValue) {
$character = \chr($charValue);
if (1 === preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
if (preg_match("/{$this->escapableControlCharactersPattern}/", $character)) {
$charHexValue = dechex($charValue);
$escapedChar = '_x'.\sprintf('%04s', strtoupper($charHexValue)).'_';
$escapedChar = '_x'.sprintf('%04s', strtoupper($charHexValue)).'_';
$controlCharactersEscapingMap[$escapedChar] = $character;
}
}
@ -127,13 +124,15 @@ final class XLSX implements EscaperInterface
* @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
*
* @param string $string String to escape
*
* @return string
*/
private function escapeControlCharacters(string $string): string
protected function escapeControlCharacters($string)
{
$escapedString = $this->escapeEscapeCharacter($string);
// if no control characters
if (1 !== preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
if (!preg_match("/{$this->escapableControlCharactersPattern}/", $escapedString)) {
return $escapedString;
}
@ -149,7 +148,7 @@ final class XLSX implements EscaperInterface
*
* @return string The escaped string
*/
private function escapeEscapeCharacter(string $string): string
protected function escapeEscapeCharacter($string)
{
return preg_replace('/_(x[\dA-F]{4})_/', '_x005F_$1_', $string);
}
@ -166,8 +165,10 @@ final class XLSX implements EscaperInterface
* @see https://github.com/jmcnamara/XlsxWriter/blob/f1e610f29/xlsxwriter/sharedstrings.py#L89
*
* @param string $string String to unescape
*
* @return string
*/
private function unescapeControlCharacters(string $string): string
protected function unescapeControlCharacters($string)
{
$unescapedString = $string;
@ -186,7 +187,7 @@ final class XLSX implements EscaperInterface
*
* @return string The unescaped string
*/
private function unescapeEscapeCharacter(string $string): string
protected function unescapeEscapeCharacter($string)
{
return preg_replace('/_x005F(_x[\dA-F]{4}_)/', '$1', $string);
}

View File

@ -1,34 +1,24 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Common\Helper;
use OpenSpout\Common\Exception\IOException;
use RecursiveDirectoryIterator;
use RecursiveIteratorIterator;
/**
* @internal
* This class provides helper functions to help with the file system operations
* like files/folders creation & deletion.
*/
final readonly class FileSystemHelper implements FileSystemHelperInterface
class FileSystemHelper implements FileSystemHelperInterface
{
/** @var string Real path of the base folder where all the I/O can occur */
private string $baseFolderRealPath;
protected $baseFolderRealPath;
/**
* @param string $baseFolderPath The path of the base folder where all the I/O can occur
*/
public function __construct(string $baseFolderPath)
{
$realpath = realpath($baseFolderPath);
\assert(false !== $realpath);
$this->baseFolderRealPath = $realpath;
}
public function getBaseFolderRealPath(): string
{
return $this->baseFolderRealPath;
$this->baseFolderRealPath = realpath($baseFolderPath);
}
/**
@ -37,27 +27,19 @@ final readonly class FileSystemHelper implements FileSystemHelperInterface
* @param string $parentFolderPath The parent folder path under which the folder is going to be created
* @param string $folderName The name of the folder to create
*
* @return string Path of the created folder
* @throws \OpenSpout\Common\Exception\IOException If unable to create the folder or if the folder path is not inside of the base folder
*
* @throws IOException If unable to create the folder or if the folder path is not inside of the base folder
* @return string Path of the created folder
*/
public function createFolder(string $parentFolderPath, string $folderName): string
public function createFolder($parentFolderPath, $folderName)
{
$this->throwIfOperationNotInBaseFolder($parentFolderPath);
$folderPath = $parentFolderPath.\DIRECTORY_SEPARATOR.$folderName;
$folderPath = $parentFolderPath.'/'.$folderName;
$errorMessage = '';
set_error_handler(static function ($nr, $message) use (&$errorMessage): bool {
$errorMessage = $message;
return true;
});
$wasCreationSuccessful = mkdir($folderPath, 0777, true);
restore_error_handler();
if (!$wasCreationSuccessful) {
throw new IOException("Unable to create folder: {$folderPath} - {$errorMessage}");
throw new IOException("Unable to create folder: {$folderPath}");
}
return $folderPath;
@ -71,27 +53,19 @@ final readonly class FileSystemHelper implements FileSystemHelperInterface
* @param string $fileName The name of the file to create
* @param string $fileContents The contents of the file to create
*
* @return string Path of the created file
* @throws \OpenSpout\Common\Exception\IOException If unable to create the file or if the file path is not inside of the base folder
*
* @throws IOException If unable to create the file or if the file path is not inside of the base folder
* @return string Path of the created file
*/
public function createFileWithContents(string $parentFolderPath, string $fileName, string $fileContents): string
public function createFileWithContents($parentFolderPath, $fileName, $fileContents)
{
$this->throwIfOperationNotInBaseFolder($parentFolderPath);
$filePath = $parentFolderPath.\DIRECTORY_SEPARATOR.$fileName;
$filePath = $parentFolderPath.'/'.$fileName;
$errorMessage = '';
set_error_handler(static function ($nr, $message) use (&$errorMessage): bool {
$errorMessage = $message;
return true;
});
$wasCreationSuccessful = file_put_contents($filePath, $fileContents);
restore_error_handler();
if (false === $wasCreationSuccessful) {
throw new IOException("Unable to create file: {$filePath} - {$errorMessage}");
throw new IOException("Unable to create file: {$filePath}");
}
return $filePath;
@ -102,9 +76,9 @@ final readonly class FileSystemHelper implements FileSystemHelperInterface
*
* @param string $filePath Path of the file to delete
*
* @throws IOException If the file path is not inside of the base folder
* @throws \OpenSpout\Common\Exception\IOException If the file path is not inside of the base folder
*/
public function deleteFile(string $filePath): void
public function deleteFile($filePath)
{
$this->throwIfOperationNotInBaseFolder($filePath);
@ -118,15 +92,15 @@ final readonly class FileSystemHelper implements FileSystemHelperInterface
*
* @param string $folderPath Path of the folder to delete
*
* @throws IOException If the folder path is not inside of the base folder
* @throws \OpenSpout\Common\Exception\IOException If the folder path is not inside of the base folder
*/
public function deleteFolderRecursively(string $folderPath): void
public function deleteFolderRecursively($folderPath)
{
$this->throwIfOperationNotInBaseFolder($folderPath);
$itemIterator = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($folderPath, RecursiveDirectoryIterator::SKIP_DOTS),
RecursiveIteratorIterator::CHILD_FIRST
$itemIterator = new \RecursiveIteratorIterator(
new \RecursiveDirectoryIterator($folderPath, \RecursiveDirectoryIterator::SKIP_DOTS),
\RecursiveIteratorIterator::CHILD_FIRST
);
foreach ($itemIterator as $item) {
@ -147,16 +121,16 @@ final readonly class FileSystemHelper implements FileSystemHelperInterface
*
* @param string $operationFolderPath The path of the folder where the I/O operation should occur
*
* @throws IOException If the folder where the I/O operation should occur
* @throws \OpenSpout\Common\Exception\IOException If the folder where the I/O operation should occur
* is not inside the base folder or the base folder does not exist
*/
private function throwIfOperationNotInBaseFolder(string $operationFolderPath): void
protected function throwIfOperationNotInBaseFolder(string $operationFolderPath)
{
$operationFolderRealPath = realpath($operationFolderPath);
if (false === $operationFolderRealPath) {
throw new IOException("Folder not found: {$operationFolderRealPath}");
if (!$this->baseFolderRealPath) {
throw new IOException("The base folder path is invalid: {$this->baseFolderRealPath}");
}
$isInBaseFolder = str_starts_with($operationFolderRealPath, $this->baseFolderRealPath);
$isInBaseFolder = (0 === strpos($operationFolderRealPath, $this->baseFolderRealPath));
if (!$isInBaseFolder) {
throw new IOException("Cannot perform I/O operation outside of the base folder: {$this->baseFolderRealPath}");
}

View File

@ -1,13 +1,10 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Common\Helper;
use OpenSpout\Common\Exception\IOException;
/**
* @internal
* This interface describes helper functions to help with the file system operations
* like files/folders creation & deletion.
*/
interface FileSystemHelperInterface
{
@ -17,11 +14,11 @@ interface FileSystemHelperInterface
* @param string $parentFolderPath The parent folder path under which the folder is going to be created
* @param string $folderName The name of the folder to create
*
* @return string Path of the created folder
* @throws \OpenSpout\Common\Exception\IOException If unable to create the folder or if the folder path is not inside of the base folder
*
* @throws IOException If unable to create the folder or if the folder path is not inside of the base folder
* @return string Path of the created folder
*/
public function createFolder(string $parentFolderPath, string $folderName): string;
public function createFolder($parentFolderPath, $folderName);
/**
* Creates a file with the given name and content in the given folder.
@ -31,27 +28,27 @@ interface FileSystemHelperInterface
* @param string $fileName The name of the file to create
* @param string $fileContents The contents of the file to create
*
* @return string Path of the created file
* @throws \OpenSpout\Common\Exception\IOException If unable to create the file or if the file path is not inside of the base folder
*
* @throws IOException If unable to create the file or if the file path is not inside of the base folder
* @return string Path of the created file
*/
public function createFileWithContents(string $parentFolderPath, string $fileName, string $fileContents): string;
public function createFileWithContents($parentFolderPath, $fileName, $fileContents);
/**
* Delete the file at the given path.
*
* @param string $filePath Path of the file to delete
*
* @throws IOException If the file path is not inside of the base folder
* @throws \OpenSpout\Common\Exception\IOException If the file path is not inside of the base folder
*/
public function deleteFile(string $filePath): void;
public function deleteFile($filePath);
/**
* Delete the folder at the given path as well as all its contents.
*
* @param string $folderPath Path of the folder to delete
*
* @throws IOException If the folder path is not inside of the base folder
* @throws \OpenSpout\Common\Exception\IOException If the folder path is not inside of the base folder
*/
public function deleteFolderRecursively(string $folderPath): void;
public function deleteFolderRecursively($folderPath);
}

View File

@ -0,0 +1,371 @@
<?php
namespace OpenSpout\Common\Helper;
/**
* This class wraps global functions to facilitate testing.
*
* @codeCoverageIgnore
*/
class GlobalFunctionsHelper
{
/**
* Wrapper around global function fopen().
*
* @see fopen()
*
* @param string $fileName
* @param string $mode
*
* @return bool|resource
*/
public function fopen($fileName, $mode)
{
return fopen($fileName, $mode);
}
/**
* Wrapper around global function fgets().
*
* @see fgets()
*
* @param resource $handle
* @param null|int $length
*
* @return string
*/
public function fgets($handle, $length = null)
{
return fgets($handle, $length);
}
/**
* Wrapper around global function fputs().
*
* @see fputs()
*
* @param resource $handle
* @param string $string
*
* @return int
*/
public function fputs($handle, $string)
{
return fwrite($handle, $string);
}
/**
* Wrapper around global function fflush().
*
* @see fflush()
*
* @param resource $handle
*
* @return bool
*/
public function fflush($handle)
{
return fflush($handle);
}
/**
* Wrapper around global function fseek().
*
* @see fseek()
*
* @param resource $handle
* @param int $offset
*
* @return int
*/
public function fseek($handle, $offset)
{
return fseek($handle, $offset);
}
/**
* Wrapper around global function fgetcsv().
*
* @see fgetcsv()
*
* @param resource $handle
* @param null|int $length
* @param null|string $delimiter
* @param null|string $enclosure
*
* @return array|false
*/
public function fgetcsv($handle, $length = null, $delimiter = null, $enclosure = null)
{
/**
* PHP uses '\' as the default escape character. This is not RFC-4180 compliant...
* To fix that, simply disable the escape character.
*
* @see https://bugs.php.net/bug.php?id=43225
* @see http://tools.ietf.org/html/rfc4180
*/
$escapeCharacter = \PHP_VERSION_ID >= 70400 ? '' : "\0";
return fgetcsv($handle, $length, $delimiter, $enclosure, $escapeCharacter);
}
/**
* Wrapper around global function fputcsv().
*
* @see fputcsv()
*
* @param resource $handle
* @param null|string $delimiter
* @param null|string $enclosure
*
* @return false|int
*/
public function fputcsv($handle, array $fields, $delimiter = null, $enclosure = null)
{
/**
* PHP uses '\' as the default escape character. This is not RFC-4180 compliant...
* To fix that, simply disable the escape character.
*
* @see https://bugs.php.net/bug.php?id=43225
* @see http://tools.ietf.org/html/rfc4180
*/
$escapeCharacter = \PHP_VERSION_ID >= 70400 ? '' : "\0";
return fputcsv($handle, $fields, $delimiter, $enclosure, $escapeCharacter);
}
/**
* Wrapper around global function fwrite().
*
* @see fwrite()
*
* @param resource $handle
* @param string $string
*
* @return int
*/
public function fwrite($handle, $string)
{
return fwrite($handle, $string);
}
/**
* Wrapper around global function fclose().
*
* @see fclose()
*
* @param resource $handle
*
* @return bool
*/
public function fclose($handle)
{
return fclose($handle);
}
/**
* Wrapper around global function rewind().
*
* @see rewind()
*
* @param resource $handle
*
* @return bool
*/
public function rewind($handle)
{
return rewind($handle);
}
/**
* Wrapper around global function file_exists().
*
* @see file_exists()
*
* @param string $fileName
*
* @return bool
*/
public function file_exists($fileName)
{
return file_exists($fileName);
}
/**
* Wrapper around global function file_get_contents().
*
* @see file_get_contents()
*
* @param string $filePath
*
* @return string
*/
public function file_get_contents($filePath)
{
$realFilePath = $this->convertToUseRealPath($filePath);
return file_get_contents($realFilePath);
}
/**
* Wrapper around global function feof().
*
* @see feof()
*
* @param resource $handle
*
* @return bool
*/
public function feof($handle)
{
return feof($handle);
}
/**
* Wrapper around global function is_readable().
*
* @see is_readable()
*
* @param string $fileName
*
* @return bool
*/
public function is_readable($fileName)
{
return is_readable($fileName);
}
/**
* Wrapper around global function basename().
*
* @see basename()
*
* @param string $path
* @param string $suffix
*
* @return string
*/
public function basename($path, $suffix = '')
{
return basename($path, $suffix);
}
/**
* Wrapper around global function header().
*
* @see header()
*
* @param string $string
*/
public function header($string)
{
header($string);
}
/**
* Wrapper around global function ob_end_clean().
*
* @see ob_end_clean()
*/
public function ob_end_clean()
{
if (ob_get_length() > 0) {
ob_end_clean();
}
}
/**
* Wrapper around global function iconv().
*
* @see iconv()
*
* @param string $string The string to be converted
* @param string $sourceEncoding The encoding of the source string
* @param string $targetEncoding The encoding the source string should be converted to
*
* @return bool|string the converted string or FALSE on failure
*/
public function iconv($string, $sourceEncoding, $targetEncoding)
{
return iconv($sourceEncoding, $targetEncoding, $string);
}
/**
* Wrapper around global function mb_convert_encoding().
*
* @see mb_convert_encoding()
*
* @param string $string The string to be converted
* @param string $sourceEncoding The encoding of the source string
* @param string $targetEncoding The encoding the source string should be converted to
*
* @return bool|string the converted string or FALSE on failure
*/
public function mb_convert_encoding($string, $sourceEncoding, $targetEncoding)
{
return mb_convert_encoding($string, $targetEncoding, $sourceEncoding);
}
/**
* Wrapper around global function stream_get_wrappers().
*
* @see stream_get_wrappers()
*
* @return array
*/
public function stream_get_wrappers()
{
return stream_get_wrappers();
}
/**
* Wrapper around global function function_exists().
*
* @see function_exists()
*
* @param string $functionName
*
* @return bool
*/
public function function_exists($functionName)
{
return \function_exists($functionName);
}
/**
* Updates the given file path to use a real path.
* This is to avoid issues on some Windows setup.
*
* @param string $filePath File path
*
* @return string The file path using a real path
*/
protected function convertToUseRealPath($filePath)
{
$realFilePath = $filePath;
if ($this->isZipStream($filePath)) {
if (preg_match('/zip:\/\/(.*)#(.*)/', $filePath, $matches)) {
$documentPath = $matches[1];
$documentInsideZipPath = $matches[2];
$realFilePath = 'zip://'.realpath($documentPath).'#'.$documentInsideZipPath;
}
} else {
$realFilePath = realpath($filePath);
}
return $realFilePath;
}
/**
* Returns whether the given path is a zip stream.
*
* @param string $path Path pointing to a document
*
* @return bool TRUE if path is a zip stream, FALSE otherwise
*/
protected function isZipStream($path)
{
return 0 === strpos($path, 'zip://');
}
}

View File

@ -0,0 +1,108 @@
<?php
namespace OpenSpout\Common\Helper;
/**
* This class provides helper functions to work with strings and multibyte strings.
*
* @codeCoverageIgnore
*/
class StringHelper
{
/** @var bool Whether the mbstring extension is loaded */
protected $hasMbstringSupport;
/** @var bool Whether the code is running with PHP7 or older versions */
private $isRunningPhp7OrOlder;
/** @var array Locale info, used for number formatting */
private $localeInfo;
public function __construct()
{
$this->hasMbstringSupport = \extension_loaded('mbstring');
$this->isRunningPhp7OrOlder = version_compare(PHP_VERSION, '8.0.0') < 0;
$this->localeInfo = localeconv();
}
/**
* Returns the length of the given string.
* It uses the multi-bytes function is available.
*
* @see strlen
* @see mb_strlen
*
* @param string $string
*
* @return int
*/
public function getStringLength($string)
{
return $this->hasMbstringSupport ? mb_strlen($string) : \strlen($string);
}
/**
* Returns the position of the first occurrence of the given character/substring within the given string.
* It uses the multi-bytes function is available.
*
* @see strpos
* @see mb_strpos
*
* @param string $char Needle
* @param string $string Haystack
*
* @return int Char/substring's first occurrence position within the string if found (starts at 0) or -1 if not found
*/
public function getCharFirstOccurrencePosition($char, $string)
{
$position = $this->hasMbstringSupport ? mb_strpos($string, $char) : strpos($string, $char);
return (false !== $position) ? $position : -1;
}
/**
* Returns the position of the last occurrence of the given character/substring within the given string.
* It uses the multi-bytes function is available.
*
* @see strrpos
* @see mb_strrpos
*
* @param string $char Needle
* @param string $string Haystack
*
* @return int Char/substring's last occurrence position within the string if found (starts at 0) or -1 if not found
*/
public function getCharLastOccurrencePosition($char, $string)
{
$position = $this->hasMbstringSupport ? mb_strrpos($string, $char) : strrpos($string, $char);
return (false !== $position) ? $position : -1;
}
/**
* Formats a numeric value (int or float) in a way that's compatible with the expected spreadsheet format.
*
* Formatting of float values is locale dependent in PHP < 8.
* Thousands separators and decimal points vary from locale to locale (en_US: 12.34 vs pl_PL: 12,34).
* However, float values must be formatted with no thousands separator and a "." as decimal point
* to work properly. This method can be used to convert the value to the correct format before storing it.
*
* @see https://wiki.php.net/rfc/locale_independent_float_to_string for the changed behavior in PHP8.
*
* @param float|int $numericValue
*
* @return float|int|string
*/
public function formatNumericValue($numericValue)
{
if ($this->isRunningPhp7OrOlder && \is_float($numericValue)) {
return str_replace(
[$this->localeInfo['thousands_sep'], $this->localeInfo['decimal_point']],
['', '.'],
(string) $numericValue
);
}
return $numericValue;
}
}

View File

@ -0,0 +1,82 @@
<?php
namespace OpenSpout\Common\Manager;
abstract class OptionsManagerAbstract implements OptionsManagerInterface
{
public const PREFIX_OPTION = 'OPTION_';
/** @var string[] List of all supported option names */
private $supportedOptions = [];
/** @var array Associative array [OPTION_NAME => OPTION_VALUE] */
private $options = [];
/**
* OptionsManagerAbstract constructor.
*/
public function __construct()
{
$this->supportedOptions = $this->getSupportedOptions();
$this->setDefaultOptions();
}
/**
* Sets the given option, if this option is supported.
*
* @param string $optionName
* @param mixed $optionValue
*/
public function setOption($optionName, $optionValue)
{
if (\in_array($optionName, $this->supportedOptions, true)) {
$this->options[$optionName] = $optionValue;
}
}
/**
* Add an option to the internal list of options
* Used only for mergeCells() for now.
*
* @param mixed $optionName
* @param mixed $optionValue
*/
public function addOption($optionName, $optionValue)
{
if (\in_array($optionName, $this->supportedOptions, true)) {
if (!isset($this->options[$optionName])) {
$this->options[$optionName] = [];
} elseif (!\is_array($this->options[$optionName])) {
$this->options[$optionName] = [$this->options[$optionName]];
}
$this->options[$optionName][] = $optionValue;
}
}
/**
* @param string $optionName
*
* @return null|mixed The set option or NULL if no option with given name found
*/
public function getOption($optionName)
{
$optionValue = null;
if (isset($this->options[$optionName])) {
$optionValue = $this->options[$optionName];
}
return $optionValue;
}
/**
* @return array List of supported options
*/
abstract protected function getSupportedOptions();
/**
* Sets the default options.
* To be overriden by child classes.
*/
abstract protected function setDefaultOptions();
}

View File

@ -0,0 +1,31 @@
<?php
namespace OpenSpout\Common\Manager;
/**
* Interface OptionsManagerInterface.
*/
interface OptionsManagerInterface
{
/**
* @param string $optionName
* @param mixed $optionValue
*/
public function setOption($optionName, $optionValue);
/**
* @param string $optionName
*
* @return null|mixed The set option or NULL if no option with given name found
*/
public function getOption($optionName);
/**
* Add an option to the internal list of options
* Used only for mergeCells() for now.
*
* @param mixed $optionName
* @param mixed $optionValue
*/
public function addOption($optionName, $optionValue);
}

View File

@ -0,0 +1,13 @@
<?php
namespace OpenSpout\Common;
/**
* This class references the supported types.
*/
abstract class Type
{
public const CSV = 'csv';
public const XLSX = 'xlsx';
public const ODS = 'ods';
}

View File

@ -0,0 +1,98 @@
<?php
namespace OpenSpout\Reader\CSV\Creator;
use OpenSpout\Common\Creator\HelperFactory;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
use OpenSpout\Common\Helper\GlobalFunctionsHelper;
use OpenSpout\Common\Manager\OptionsManagerInterface;
use OpenSpout\Reader\Common\Creator\InternalEntityFactoryInterface;
use OpenSpout\Reader\CSV\RowIterator;
use OpenSpout\Reader\CSV\Sheet;
use OpenSpout\Reader\CSV\SheetIterator;
/**
* Factory to create entities.
*/
class InternalEntityFactory implements InternalEntityFactoryInterface
{
/** @var HelperFactory */
private $helperFactory;
public function __construct(HelperFactory $helperFactory)
{
$this->helperFactory = $helperFactory;
}
/**
* @param resource $filePointer Pointer to the CSV file to read
* @param OptionsManagerInterface $optionsManager
* @param GlobalFunctionsHelper $globalFunctionsHelper
*
* @return SheetIterator
*/
public function createSheetIterator($filePointer, $optionsManager, $globalFunctionsHelper)
{
$rowIterator = $this->createRowIterator($filePointer, $optionsManager, $globalFunctionsHelper);
$sheet = $this->createSheet($rowIterator);
return new SheetIterator($sheet);
}
/**
* @param Cell[] $cells
*
* @return Row
*/
public function createRow(array $cells = [])
{
return new Row($cells, null);
}
/**
* @param mixed $cellValue
*
* @return Cell
*/
public function createCell($cellValue)
{
return new Cell($cellValue);
}
/**
* @return Row
*/
public function createRowFromArray(array $cellValues = [])
{
$cells = array_map(function ($cellValue) {
return $this->createCell($cellValue);
}, $cellValues);
return $this->createRow($cells);
}
/**
* @param RowIterator $rowIterator
*
* @return Sheet
*/
private function createSheet($rowIterator)
{
return new Sheet($rowIterator);
}
/**
* @param resource $filePointer Pointer to the CSV file to read
* @param OptionsManagerInterface $optionsManager
* @param GlobalFunctionsHelper $globalFunctionsHelper
*
* @return RowIterator
*/
private function createRowIterator($filePointer, $optionsManager, $globalFunctionsHelper)
{
$encodingHelper = $this->helperFactory->createEncodingHelper($globalFunctionsHelper);
return new RowIterator($filePointer, $optionsManager, $encodingHelper, $this, $globalFunctionsHelper);
}
}

View File

@ -0,0 +1,39 @@
<?php
namespace OpenSpout\Reader\CSV\Manager;
use OpenSpout\Common\Helper\EncodingHelper;
use OpenSpout\Common\Manager\OptionsManagerAbstract;
use OpenSpout\Reader\Common\Entity\Options;
/**
* CSV Reader options manager.
*/
class OptionsManager extends OptionsManagerAbstract
{
/**
* {@inheritdoc}
*/
protected function getSupportedOptions()
{
return [
Options::SHOULD_FORMAT_DATES,
Options::SHOULD_PRESERVE_EMPTY_ROWS,
Options::FIELD_DELIMITER,
Options::FIELD_ENCLOSURE,
Options::ENCODING,
];
}
/**
* {@inheritdoc}
*/
protected function setDefaultOptions()
{
$this->setOption(Options::SHOULD_FORMAT_DATES, false);
$this->setOption(Options::SHOULD_PRESERVE_EMPTY_ROWS, false);
$this->setOption(Options::FIELD_DELIMITER, ',');
$this->setOption(Options::FIELD_ENCLOSURE, '"');
$this->setOption(Options::ENCODING, EncodingHelper::ENCODING_UTF8);
}
}

View File

@ -0,0 +1,149 @@
<?php
namespace OpenSpout\Reader\CSV;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Common\Helper\GlobalFunctionsHelper;
use OpenSpout\Common\Manager\OptionsManagerInterface;
use OpenSpout\Reader\Common\Creator\InternalEntityFactoryInterface;
use OpenSpout\Reader\Common\Entity\Options;
use OpenSpout\Reader\CSV\Creator\InternalEntityFactory;
use OpenSpout\Reader\ReaderAbstract;
/**
* This class provides support to read data from a CSV file.
*/
class Reader extends ReaderAbstract
{
/** @var resource Pointer to the file to be written */
protected $filePointer;
/** @var SheetIterator To iterator over the CSV unique "sheet" */
protected $sheetIterator;
/** @var string Original value for the "auto_detect_line_endings" INI value */
protected $originalAutoDetectLineEndings;
/** @var bool Whether the code is running with PHP >= 8.1 */
private $isRunningAtLeastPhp81;
public function __construct(
OptionsManagerInterface $optionsManager,
GlobalFunctionsHelper $globalFunctionsHelper,
InternalEntityFactoryInterface $entityFactory
) {
parent::__construct($optionsManager, $globalFunctionsHelper, $entityFactory);
$this->isRunningAtLeastPhp81 = version_compare(PHP_VERSION, '8.1.0') >= 0;
}
/**
* Sets the field delimiter for the CSV.
* Needs to be called before opening the reader.
*
* @param string $fieldDelimiter Character that delimits fields
*
* @return Reader
*/
public function setFieldDelimiter($fieldDelimiter)
{
$this->optionsManager->setOption(Options::FIELD_DELIMITER, $fieldDelimiter);
return $this;
}
/**
* Sets the field enclosure for the CSV.
* Needs to be called before opening the reader.
*
* @param string $fieldEnclosure Character that enclose fields
*
* @return Reader
*/
public function setFieldEnclosure($fieldEnclosure)
{
$this->optionsManager->setOption(Options::FIELD_ENCLOSURE, $fieldEnclosure);
return $this;
}
/**
* Sets the encoding of the CSV file to be read.
* Needs to be called before opening the reader.
*
* @param string $encoding Encoding of the CSV file to be read
*
* @return Reader
*/
public function setEncoding($encoding)
{
$this->optionsManager->setOption(Options::ENCODING, $encoding);
return $this;
}
/**
* Returns whether stream wrappers are supported.
*
* @return bool
*/
protected function doesSupportStreamWrapper()
{
return true;
}
/**
* Opens the file at the given path to make it ready to be read.
* If setEncoding() was not called, it assumes that the file is encoded in UTF-8.
*
* @param string $filePath Path of the CSV file to be read
*
* @throws \OpenSpout\Common\Exception\IOException
*/
protected function openReader($filePath)
{
// "auto_detect_line_endings" is deprecated in PHP 8.1
if (!$this->isRunningAtLeastPhp81) {
$this->originalAutoDetectLineEndings = ini_get('auto_detect_line_endings');
ini_set('auto_detect_line_endings', '1');
}
$this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r');
if (!$this->filePointer) {
throw new IOException("Could not open file {$filePath} for reading.");
}
/** @var InternalEntityFactory $entityFactory */
$entityFactory = $this->entityFactory;
$this->sheetIterator = $entityFactory->createSheetIterator(
$this->filePointer,
$this->optionsManager,
$this->globalFunctionsHelper
);
}
/**
* Returns an iterator to iterate over sheets.
*
* @return SheetIterator To iterate over sheets
*/
protected function getConcreteSheetIterator()
{
return $this->sheetIterator;
}
/**
* Closes the reader. To be used after reading the file.
*/
protected function closeReader()
{
if (\is_resource($this->filePointer)) {
$this->globalFunctionsHelper->fclose($this->filePointer);
}
// "auto_detect_line_endings" is deprecated in PHP 8.1
if (!$this->isRunningAtLeastPhp81) {
ini_set('auto_detect_line_endings', $this->originalAutoDetectLineEndings);
}
}
}

View File

@ -1,53 +1,76 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\CSV;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
use OpenSpout\Common\Exception\EncodingConversionException;
use OpenSpout\Common\Helper\EncodingHelper;
use OpenSpout\Common\Helper\GlobalFunctionsHelper;
use OpenSpout\Common\Manager\OptionsManagerInterface;
use OpenSpout\Reader\Common\Entity\Options;
use OpenSpout\Reader\CSV\Creator\InternalEntityFactory;
use OpenSpout\Reader\RowIteratorInterface;
/**
* Iterate over CSV rows.
*/
final class RowIterator implements RowIteratorInterface
class RowIterator implements RowIteratorInterface
{
/**
* Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accommodates for very long lines).
* Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accomodates for very long lines).
*/
public const MAX_READ_BYTES_PER_LINE = 0;
/** @var null|resource Pointer to the CSV file to read */
private $filePointer;
protected $filePointer;
/** @var int Number of read rows */
private int $numReadRows = 0;
protected $numReadRows = 0;
/** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */
private ?Row $rowBuffer = null;
protected $rowBuffer;
/** @var bool Indicates whether all rows have been read */
private bool $hasReachedEndOfFile = false;
protected $hasReachedEndOfFile = false;
private readonly Options $options;
/** @var string Defines the character used to delimit fields (one character only) */
protected $fieldDelimiter;
/** @var EncodingHelper Helper to work with different encodings */
private readonly EncodingHelper $encodingHelper;
/** @var string Defines the character used to enclose fields (one character only) */
protected $fieldEnclosure;
/** @var string Encoding of the CSV file to be read */
protected $encoding;
/** @var bool Whether empty rows should be returned or skipped */
protected $shouldPreserveEmptyRows;
/** @var \OpenSpout\Common\Helper\EncodingHelper Helper to work with different encodings */
protected $encodingHelper;
/** @var \OpenSpout\Reader\CSV\Creator\InternalEntityFactory Factory to create entities */
protected $entityFactory;
/** @var \OpenSpout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/**
* @param resource $filePointer Pointer to the CSV file to read
*/
public function __construct(
$filePointer,
Options $options,
EncodingHelper $encodingHelper
OptionsManagerInterface $optionsManager,
EncodingHelper $encodingHelper,
InternalEntityFactory $entityFactory,
GlobalFunctionsHelper $globalFunctionsHelper
) {
$this->filePointer = $filePointer;
$this->options = $options;
$this->fieldDelimiter = $optionsManager->getOption(Options::FIELD_DELIMITER);
$this->fieldEnclosure = $optionsManager->getOption(Options::FIELD_ENCLOSURE);
$this->encoding = $optionsManager->getOption(Options::ENCODING);
$this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
$this->encodingHelper = $encodingHelper;
$this->entityFactory = $entityFactory;
$this->globalFunctionsHelper = $globalFunctionsHelper;
}
/**
@ -55,6 +78,7 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.rewind.php
*/
#[\ReturnTypeWillChange]
public function rewind(): void
{
$this->rewindAndSkipBom();
@ -70,9 +94,10 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.valid.php
*/
#[\ReturnTypeWillChange]
public function valid(): bool
{
return null !== $this->filePointer && !$this->hasReachedEndOfFile;
return $this->filePointer && !$this->hasReachedEndOfFile;
}
/**
@ -80,11 +105,12 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.next.php
*
* @throws EncodingConversionException If unable to convert data to UTF-8
* @throws \OpenSpout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
*/
#[\ReturnTypeWillChange]
public function next(): void
{
$this->hasReachedEndOfFile = feof($this->filePointer);
$this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
if (!$this->hasReachedEndOfFile) {
$this->readDataForNextRow();
@ -96,6 +122,7 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.current.php
*/
#[\ReturnTypeWillChange]
public function current(): ?Row
{
return $this->rowBuffer;
@ -106,27 +133,37 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.key.php
*/
#[\ReturnTypeWillChange]
public function key(): int
{
return $this->numReadRows;
}
/**
* This rewinds and skips the BOM if inserted at the beginning of the file
* by moving the file pointer after it, so that it is not read.
* Cleans up what was created to iterate over the object.
*/
private function rewindAndSkipBom(): void
#[\ReturnTypeWillChange]
public function end(): void
{
$byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->options->ENCODING);
// sets the cursor after the BOM (0 means no BOM, so rewind it)
fseek($this->filePointer, $byteOffsetToSkipBom);
// do nothing
}
/**
* @throws EncodingConversionException If unable to convert data to UTF-8
* This rewinds and skips the BOM if inserted at the beginning of the file
* by moving the file pointer after it, so that it is not read.
*/
private function readDataForNextRow(): void
protected function rewindAndSkipBom()
{
$byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding);
// sets the cursor after the BOM (0 means no BOM, so rewind it)
$this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom);
}
/**
* @throws \OpenSpout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
*/
protected function readDataForNextRow()
{
do {
$rowData = $this->getNextUTF8EncodedRow();
@ -134,10 +171,8 @@ final class RowIterator implements RowIteratorInterface
if (false !== $rowData) {
// array_map will replace NULL values by empty strings
$rowDataBufferAsArray = array_map('\strval', $rowData);
$this->rowBuffer = new Row(array_map(static function ($cellValue) {
return Cell::fromValue($cellValue);
}, $rowDataBufferAsArray), null);
$rowDataBufferAsArray = array_map(function ($value) { return (string) $value; }, $rowData);
$this->rowBuffer = $this->entityFactory->createRowFromArray($rowDataBufferAsArray);
++$this->numReadRows;
} else {
// If we reach this point, it means end of file was reached.
@ -147,19 +182,20 @@ final class RowIterator implements RowIteratorInterface
}
/**
* @param array<int, null|string>|bool $currentRowData
* @param array|bool $currentRowData
*
* @return bool Whether the data for the current row can be returned or if we need to keep reading
*/
private function shouldReadNextRow($currentRowData): bool
protected function shouldReadNextRow($currentRowData)
{
$hasSuccessfullyFetchedRowData = (false !== $currentRowData);
$hasNowReachedEndOfFile = feof($this->filePointer);
$hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer);
$isEmptyLine = $this->isEmptyLine($currentRowData);
return
(!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile)
|| (!$this->options->SHOULD_PRESERVE_EMPTY_ROWS && $isEmptyLine);
|| (!$this->shouldPreserveEmptyRows && $isEmptyLine)
;
}
/**
@ -167,25 +203,19 @@ final class RowIterator implements RowIteratorInterface
* As fgetcsv() does not manage correctly encoding for non UTF-8 data,
* we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes).
*
* @return array<int, null|string>|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
* @throws \OpenSpout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8
*
* @throws EncodingConversionException If unable to convert data to UTF-8
* @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read
*/
private function getNextUTF8EncodedRow(): array|false
protected function getNextUTF8EncodedRow()
{
$encodedRowData = fgetcsv(
$this->filePointer,
self::MAX_READ_BYTES_PER_LINE,
$this->options->FIELD_DELIMITER,
$this->options->FIELD_ENCLOSURE,
''
);
$encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure);
if (false === $encodedRowData) {
return false;
}
foreach ($encodedRowData as $cellIndex => $cellValue) {
switch ($this->options->ENCODING) {
switch ($this->encoding) {
case EncodingHelper::ENCODING_UTF16_LE:
case EncodingHelper::ENCODING_UTF32_LE:
// remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data
@ -201,18 +231,18 @@ final class RowIterator implements RowIteratorInterface
break;
}
$encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->options->ENCODING);
$encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding);
}
return $encodedRowData;
}
/**
* @param array<int, null|string>|bool $lineData Array containing the cells value for the line
* @param array|bool $lineData Array containing the cells value for the line
*
* @return bool Whether the given line is empty
*/
private function isEmptyLine($lineData): bool
protected function isEmptyLine($lineData)
{
return \is_array($lineData) && 1 === \count($lineData) && null === $lineData[0];
}

View File

@ -1,18 +1,13 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\CSV;
use OpenSpout\Reader\SheetInterface;
/**
* @implements SheetInterface<RowIterator>
*/
final readonly class Sheet implements SheetInterface
class Sheet implements SheetInterface
{
/** @var RowIterator To iterate over the CSV's rows */
private RowIterator $rowIterator;
/** @var \OpenSpout\Reader\CSV\RowIterator To iterate over the CSV's rows */
protected $rowIterator;
/**
* @param RowIterator $rowIterator Corresponding row iterator
@ -22,7 +17,10 @@ final readonly class Sheet implements SheetInterface
$this->rowIterator = $rowIterator;
}
public function getRowIterator(): RowIterator
/**
* @return \OpenSpout\Reader\CSV\RowIterator
*/
public function getRowIterator()
{
return $this->rowIterator;
}
@ -30,7 +28,7 @@ final readonly class Sheet implements SheetInterface
/**
* @return int Index of the sheet
*/
public function getIndex(): int
public function getIndex()
{
return 0;
}
@ -38,7 +36,7 @@ final readonly class Sheet implements SheetInterface
/**
* @return string Name of the sheet - empty string since CSV does not support that
*/
public function getName(): string
public function getName()
{
return '';
}
@ -46,7 +44,15 @@ final readonly class Sheet implements SheetInterface
/**
* @return bool Always TRUE as there is only one sheet
*/
public function isActive(): bool
public function isActive()
{
return true;
}
/**
* @return bool Always TRUE as the only sheet is always visible
*/
public function isVisible()
{
return true;
}

View File

@ -1,26 +1,24 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\CSV;
use OpenSpout\Reader\SheetIteratorInterface;
/**
* @implements SheetIteratorInterface<Sheet>
* Iterate over CSV unique "sheet".
*/
final class SheetIterator implements SheetIteratorInterface
class SheetIterator implements SheetIteratorInterface
{
/** @var Sheet The CSV unique "sheet" */
private readonly Sheet $sheet;
protected $sheet;
/** @var bool Whether the unique "sheet" has already been read */
private bool $hasReadUniqueSheet = false;
protected $hasReadUniqueSheet = false;
/**
* @param Sheet $sheet Corresponding unique sheet
*/
public function __construct(Sheet $sheet)
public function __construct($sheet)
{
$this->sheet = $sheet;
}
@ -30,6 +28,7 @@ final class SheetIterator implements SheetIteratorInterface
*
* @see http://php.net/manual/en/iterator.rewind.php
*/
#[\ReturnTypeWillChange]
public function rewind(): void
{
$this->hasReadUniqueSheet = false;
@ -40,6 +39,7 @@ final class SheetIterator implements SheetIteratorInterface
*
* @see http://php.net/manual/en/iterator.valid.php
*/
#[\ReturnTypeWillChange]
public function valid(): bool
{
return !$this->hasReadUniqueSheet;
@ -50,6 +50,7 @@ final class SheetIterator implements SheetIteratorInterface
*
* @see http://php.net/manual/en/iterator.next.php
*/
#[\ReturnTypeWillChange]
public function next(): void
{
$this->hasReadUniqueSheet = true;
@ -60,6 +61,7 @@ final class SheetIterator implements SheetIteratorInterface
*
* @see http://php.net/manual/en/iterator.current.php
*/
#[\ReturnTypeWillChange]
public function current(): Sheet
{
return $this->sheet;
@ -70,8 +72,18 @@ final class SheetIterator implements SheetIteratorInterface
*
* @see http://php.net/manual/en/iterator.key.php
*/
#[\ReturnTypeWillChange]
public function key(): int
{
return 1;
}
/**
* Cleans up what was created to iterate over the object.
*/
#[\ReturnTypeWillChange]
public function end(): void
{
// do nothing
}
}

View File

@ -0,0 +1,26 @@
<?php
namespace OpenSpout\Reader\Common\Creator;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
/**
* Interface EntityFactoryInterface.
*/
interface InternalEntityFactoryInterface
{
/**
* @param Cell[] $cells
*
* @return Row
*/
public function createRow(array $cells = []);
/**
* @param mixed $cellValue
*
* @return Cell
*/
public function createCell($cellValue);
}

View File

@ -0,0 +1,72 @@
<?php
namespace OpenSpout\Reader\Common\Creator;
use OpenSpout\Common\Exception\UnsupportedTypeException;
use OpenSpout\Common\Type;
use OpenSpout\Reader\ReaderInterface;
/**
* Factory to create external entities.
*/
class ReaderEntityFactory
{
/**
* Creates a reader by file extension.
*
* @param string $path The path to the spreadsheet file. Supported extensions are .csv, .ods and .xlsx
*
* @throws \OpenSpout\Common\Exception\UnsupportedTypeException
*
* @return ReaderInterface
*/
public static function createReaderFromFile(string $path)
{
return ReaderFactory::createFromFile($path);
}
/**
* This creates an instance of a CSV reader.
*
* @return \OpenSpout\Reader\CSV\Reader
*/
public static function createCSVReader()
{
try {
return ReaderFactory::createFromType(Type::CSV);
} catch (UnsupportedTypeException $e) {
// should never happen
return null;
}
}
/**
* This creates an instance of a XLSX reader.
*
* @return \OpenSpout\Reader\XLSX\Reader
*/
public static function createXLSXReader()
{
try {
return ReaderFactory::createFromType(Type::XLSX);
} catch (UnsupportedTypeException $e) {
// should never happen
return null;
}
}
/**
* This creates an instance of a ODS reader.
*
* @return \OpenSpout\Reader\ODS\Reader
*/
public static function createODSReader()
{
try {
return ReaderFactory::createFromType(Type::ODS);
} catch (UnsupportedTypeException $e) {
// should never happen
return null;
}
}
}

View File

@ -0,0 +1,109 @@
<?php
namespace OpenSpout\Reader\Common\Creator;
use OpenSpout\Common\Creator\HelperFactory;
use OpenSpout\Common\Exception\UnsupportedTypeException;
use OpenSpout\Common\Type;
use OpenSpout\Reader\CSV\Creator\InternalEntityFactory as CSVInternalEntityFactory;
use OpenSpout\Reader\CSV\Manager\OptionsManager as CSVOptionsManager;
use OpenSpout\Reader\CSV\Reader as CSVReader;
use OpenSpout\Reader\ODS\Creator\HelperFactory as ODSHelperFactory;
use OpenSpout\Reader\ODS\Creator\InternalEntityFactory as ODSInternalEntityFactory;
use OpenSpout\Reader\ODS\Creator\ManagerFactory as ODSManagerFactory;
use OpenSpout\Reader\ODS\Manager\OptionsManager as ODSOptionsManager;
use OpenSpout\Reader\ODS\Reader as ODSReader;
use OpenSpout\Reader\ReaderInterface;
use OpenSpout\Reader\XLSX\Creator\HelperFactory as XLSXHelperFactory;
use OpenSpout\Reader\XLSX\Creator\InternalEntityFactory as XLSXInternalEntityFactory;
use OpenSpout\Reader\XLSX\Creator\ManagerFactory as XLSXManagerFactory;
use OpenSpout\Reader\XLSX\Manager\OptionsManager as XLSXOptionsManager;
use OpenSpout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory;
use OpenSpout\Reader\XLSX\Reader as XLSXReader;
/**
* This factory is used to create readers, based on the type of the file to be read.
* It supports CSV, XLSX and ODS formats.
*/
class ReaderFactory
{
/**
* Creates a reader by file extension.
*
* @param string $path The path to the spreadsheet file. Supported extensions are .csv,.ods and .xlsx
*
* @throws \OpenSpout\Common\Exception\UnsupportedTypeException
*
* @return ReaderInterface
*/
public static function createFromFile(string $path)
{
$extension = strtolower(pathinfo($path, PATHINFO_EXTENSION));
return self::createFromType($extension);
}
/**
* This creates an instance of the appropriate reader, given the type of the file to be read.
*
* @param string $readerType Type of the reader to instantiate
*
* @throws \OpenSpout\Common\Exception\UnsupportedTypeException
*
* @return ReaderInterface
*/
public static function createFromType($readerType)
{
switch ($readerType) {
case Type::CSV: return self::createCSVReader();
case Type::XLSX: return self::createXLSXReader();
case Type::ODS: return self::createODSReader();
default:
throw new UnsupportedTypeException('No readers supporting the given type: '.$readerType);
}
}
/**
* @return CSVReader
*/
private static function createCSVReader()
{
$optionsManager = new CSVOptionsManager();
$helperFactory = new HelperFactory();
$entityFactory = new CSVInternalEntityFactory($helperFactory);
$globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper();
return new CSVReader($optionsManager, $globalFunctionsHelper, $entityFactory);
}
/**
* @return XLSXReader
*/
private static function createXLSXReader()
{
$optionsManager = new XLSXOptionsManager();
$helperFactory = new XLSXHelperFactory();
$managerFactory = new XLSXManagerFactory($helperFactory, new CachingStrategyFactory());
$entityFactory = new XLSXInternalEntityFactory($managerFactory, $helperFactory);
$globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper();
return new XLSXReader($optionsManager, $globalFunctionsHelper, $entityFactory, $managerFactory);
}
/**
* @return ODSReader
*/
private static function createODSReader()
{
$optionsManager = new ODSOptionsManager();
$helperFactory = new ODSHelperFactory();
$managerFactory = new ODSManagerFactory();
$entityFactory = new ODSInternalEntityFactory($helperFactory, $managerFactory);
$globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper();
return new ODSReader($optionsManager, $globalFunctionsHelper, $entityFactory);
}
}

View File

@ -0,0 +1,22 @@
<?php
namespace OpenSpout\Reader\Common\Entity;
/**
* Readers' options holder.
*/
abstract class Options
{
// Common options
public const SHOULD_FORMAT_DATES = 'shouldFormatDates';
public const SHOULD_PRESERVE_EMPTY_ROWS = 'shouldPreserveEmptyRows';
// CSV specific options
public const FIELD_DELIMITER = 'fieldDelimiter';
public const FIELD_ENCLOSURE = 'fieldEnclosure';
public const ENCODING = 'encoding';
// XLSX specific options
public const TEMP_FOLDER = 'tempFolder';
public const SHOULD_USE_1904_DATES = 'shouldUse1904Dates';
}

View File

@ -1,26 +1,51 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\Common\Manager;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
use OpenSpout\Reader\Common\Creator\InternalEntityFactoryInterface;
class RowManager
{
/** @var InternalEntityFactoryInterface Factory to create entities */
private $entityFactory;
/**
* @internal
* @param InternalEntityFactoryInterface $entityFactory Factory to create entities
*/
final class RowManager
public function __construct(InternalEntityFactoryInterface $entityFactory)
{
$this->entityFactory = $entityFactory;
}
/**
* Detect whether a row is considered empty.
* An empty row has all of its cells empty.
*
* @return bool
*/
public function isEmpty(Row $row)
{
foreach ($row->getCells() as $cell) {
if (!$cell->isEmpty()) {
return false;
}
}
return true;
}
/**
* Fills the missing indexes of a row with empty cells.
*
* @return Row
*/
public function fillMissingIndexesWithEmptyCells(Row $row): void
public function fillMissingIndexesWithEmptyCells(Row $row)
{
$numCells = $row->getNumCells();
if (0 === $numCells) {
return;
return $row;
}
$rowCells = $row->getCells();
@ -37,7 +62,7 @@ final class RowManager
for ($cellIndex = 0; $cellIndex < $maxCellIndex; ++$cellIndex) {
if (!isset($rowCells[$cellIndex])) {
$row->setCellAtIndex(Cell::fromValue(''), $cellIndex);
$row->setCellAtIndex($this->entityFactory->createCell(''), $cellIndex);
$needsSorting = true;
}
}
@ -47,5 +72,7 @@ final class RowManager
ksort($rowCells);
$row->setCells($rowCells);
}
return $row;
}
}

View File

@ -1,17 +1,13 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\Common;
use OpenSpout\Reader\Exception\XMLProcessingException;
use OpenSpout\Reader\Wrapper\XMLReader;
use ReflectionMethod;
/**
* @internal
* Helps process XML files.
*/
final class XMLProcessor
class XMLProcessor
{
// Node types
public const NODE_TYPE_START = XMLReader::ELEMENT;
@ -25,16 +21,16 @@ final class XMLProcessor
public const PROCESSING_CONTINUE = 1;
public const PROCESSING_STOP = 2;
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
private readonly XMLReader $xmlReader;
/** @var \OpenSpout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
/** @var array<string, array{reflectionMethod: ReflectionMethod, reflectionObject: object}> Registered callbacks */
private array $callbacks = [];
/** @var array Registered callbacks */
private $callbacks = [];
/**
* @param XMLReader $xmlReader XMLReader object
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object
*/
public function __construct(XMLReader $xmlReader)
public function __construct($xmlReader)
{
$this->xmlReader = $xmlReader;
}
@ -43,8 +39,10 @@ final class XMLProcessor
* @param string $nodeName A callback may be triggered when a node with this name is read
* @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END]
* @param callable $callback Callback to execute when the read node has the given name and type
*
* @return XMLProcessor
*/
public function registerCallback(string $nodeName, int $nodeType, $callback): self
public function registerCallback($nodeName, $nodeType, $callback)
{
$callbackKey = $this->getCallbackKey($nodeName, $nodeType);
$this->callbacks[$callbackKey] = $this->getInvokableCallbackData($callback);
@ -56,9 +54,9 @@ final class XMLProcessor
* Resumes the reading of the XML file where it was left off.
* Stops whenever a callback indicates that reading should stop or at the end of the file.
*
* @throws XMLProcessingException
* @throws \OpenSpout\Reader\Exception\XMLProcessingException
*/
public function readUntilStopped(): void
public function readUntilStopped()
{
while ($this->xmlReader->read()) {
$nodeType = $this->xmlReader->nodeType;
@ -84,7 +82,7 @@ final class XMLProcessor
*
* @return string Key used to store the associated callback
*/
private function getCallbackKey(string $nodeName, int $nodeType): string
private function getCallbackKey($nodeName, $nodeType)
{
return "{$nodeName}{$nodeType}";
}
@ -97,13 +95,13 @@ final class XMLProcessor
*
* @param callable $callback Array reference to a callback: [OBJECT, METHOD_NAME]
*
* @return array{reflectionMethod: ReflectionMethod, reflectionObject: object} Associative array containing the elements needed to invoke the callback using Reflection
* @return array Associative array containing the elements needed to invoke the callback using Reflection
*/
private function getInvokableCallbackData($callback): array
private function getInvokableCallbackData($callback)
{
$callbackObject = $callback[0];
$callbackMethodName = $callback[1];
$reflectionMethod = new ReflectionMethod($callbackObject, $callbackMethodName);
$reflectionMethod = new \ReflectionMethod(\get_class($callbackObject), $callbackMethodName);
$reflectionMethod->setAccessible(true);
return [
@ -117,9 +115,9 @@ final class XMLProcessor
* @param string $nodeNameWithoutPrefix Name of the same node, un-prefixed
* @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END]
*
* @return null|array{reflectionMethod: ReflectionMethod, reflectionObject: object} Callback data to be used for execution when a node of the given name/type is read or NULL if none found
* @return null|array Callback data to be used for execution when a node of the given name/type is read or NULL if none found
*/
private function getRegisteredCallbackData(string $nodeNamePossiblyWithPrefix, string $nodeNameWithoutPrefix, int $nodeType): ?array
private function getRegisteredCallbackData($nodeNamePossiblyWithPrefix, $nodeNameWithoutPrefix, $nodeType)
{
// With prefixed nodes, we should match if (by order of preference):
// 1. the callback was registered with the prefixed node name (e.g. "x:worksheet")
@ -138,12 +136,12 @@ final class XMLProcessor
}
/**
* @param array{reflectionMethod: ReflectionMethod, reflectionObject: object} $callbackData Associative array containing data to invoke the callback using Reflection
* @param XMLReader[] $args Arguments to pass to the callback
* @param array $callbackData Associative array containing data to invoke the callback using Reflection
* @param array $args Arguments to pass to the callback
*
* @return int Callback response
*/
private function invokeCallback(array $callbackData, array $args): int
private function invokeCallback($callbackData, $args)
{
$reflectionMethod = $callbackData[self::CALLBACK_REFLECTION_METHOD];
$callbackObject = $callbackData[self::CALLBACK_REFLECTION_OBJECT];

View File

@ -0,0 +1,30 @@
<?php
namespace OpenSpout\Reader\Exception;
use Throwable;
class InvalidValueException extends ReaderException
{
/** @var mixed */
private $invalidValue;
/**
* @param mixed $invalidValue
* @param string $message
* @param int $code
*/
public function __construct($invalidValue, $message = '', $code = 0, Throwable $previous = null)
{
$this->invalidValue = $invalidValue;
parent::__construct($message, $code, $previous);
}
/**
* @return mixed
*/
public function getInvalidValue()
{
return $this->invalidValue;
}
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Reader\Exception;
class IteratorNotRewindableException extends ReaderException
{
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Reader\Exception;
class NoSheetsFoundException extends ReaderException
{
}

View File

@ -0,0 +1,9 @@
<?php
namespace OpenSpout\Reader\Exception;
use OpenSpout\Common\Exception\SpoutException;
abstract class ReaderException extends SpoutException
{
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Reader\Exception;
class ReaderNotOpenedException extends ReaderException
{
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Reader\Exception;
class SharedStringNotFoundException extends ReaderException
{
}

View File

@ -0,0 +1,7 @@
<?php
namespace OpenSpout\Reader\Exception;
class XMLProcessingException extends ReaderException
{
}

View File

@ -0,0 +1,14 @@
<?php
namespace OpenSpout\Reader;
/**
* Interface IteratorInterface.
*/
interface IteratorInterface extends \Iterator
{
/**
* Cleans up what was created to iterate over the object.
*/
public function end();
}

View File

@ -0,0 +1,43 @@
<?php
namespace OpenSpout\Reader\ODS\Creator;
use OpenSpout\Reader\ODS\Helper\CellValueFormatter;
use OpenSpout\Reader\ODS\Helper\SettingsHelper;
/**
* Factory to create helpers.
*/
class HelperFactory extends \OpenSpout\Common\Creator\HelperFactory
{
/**
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
*
* @return CellValueFormatter
*/
public function createCellValueFormatter($shouldFormatDates)
{
$escaper = $this->createStringsEscaper();
return new CellValueFormatter($shouldFormatDates, $escaper);
}
/**
* @param InternalEntityFactory $entityFactory
*
* @return SettingsHelper
*/
public function createSettingsHelper($entityFactory)
{
return new SettingsHelper($entityFactory);
}
/**
* @return \OpenSpout\Common\Helper\Escaper\ODS
*/
public function createStringsEscaper()
{
// @noinspection PhpUnnecessaryFullyQualifiedNameInspection
return new \OpenSpout\Common\Helper\Escaper\ODS();
}
}

View File

@ -0,0 +1,124 @@
<?php
namespace OpenSpout\Reader\ODS\Creator;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
use OpenSpout\Reader\Common\Creator\InternalEntityFactoryInterface;
use OpenSpout\Reader\Common\Entity\Options;
use OpenSpout\Reader\Common\XMLProcessor;
use OpenSpout\Reader\ODS\RowIterator;
use OpenSpout\Reader\ODS\Sheet;
use OpenSpout\Reader\ODS\SheetIterator;
use OpenSpout\Reader\Wrapper\XMLReader;
/**
* Factory to create entities.
*/
class InternalEntityFactory implements InternalEntityFactoryInterface
{
/** @var HelperFactory */
private $helperFactory;
/** @var ManagerFactory */
private $managerFactory;
public function __construct(HelperFactory $helperFactory, ManagerFactory $managerFactory)
{
$this->helperFactory = $helperFactory;
$this->managerFactory = $managerFactory;
}
/**
* @param string $filePath Path of the file to be read
* @param \OpenSpout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager
*
* @return SheetIterator
*/
public function createSheetIterator($filePath, $optionsManager)
{
$escaper = $this->helperFactory->createStringsEscaper();
$settingsHelper = $this->helperFactory->createSettingsHelper($this);
return new SheetIterator($filePath, $optionsManager, $escaper, $settingsHelper, $this);
}
/**
* @param XMLReader $xmlReader XML Reader
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
* @param bool $isSheetActive Whether the sheet was defined as active
* @param bool $isSheetVisible Whether the sheet is visible
* @param \OpenSpout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager
*
* @return Sheet
*/
public function createSheet($xmlReader, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible, $optionsManager)
{
$rowIterator = $this->createRowIterator($xmlReader, $optionsManager);
return new Sheet($rowIterator, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible);
}
/**
* @param Cell[] $cells
*
* @return Row
*/
public function createRow(array $cells = [])
{
return new Row($cells, null);
}
/**
* @param mixed $cellValue
*
* @return Cell
*/
public function createCell($cellValue)
{
return new Cell($cellValue);
}
/**
* @return XMLReader
*/
public function createXMLReader()
{
return new XMLReader();
}
/**
* @return \ZipArchive
*/
public function createZipArchive()
{
return new \ZipArchive();
}
/**
* @param XMLReader $xmlReader XML Reader
* @param \OpenSpout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager
*
* @return RowIterator
*/
private function createRowIterator($xmlReader, $optionsManager)
{
$shouldFormatDates = $optionsManager->getOption(Options::SHOULD_FORMAT_DATES);
$cellValueFormatter = $this->helperFactory->createCellValueFormatter($shouldFormatDates);
$xmlProcessor = $this->createXMLProcessor($xmlReader);
$rowManager = $this->managerFactory->createRowManager($this);
return new RowIterator($xmlReader, $optionsManager, $cellValueFormatter, $xmlProcessor, $rowManager, $this);
}
/**
* @param XMLReader $xmlReader
*
* @return XMLProcessor
*/
private function createXMLProcessor($xmlReader)
{
return new XMLProcessor($xmlReader);
}
}

View File

@ -0,0 +1,21 @@
<?php
namespace OpenSpout\Reader\ODS\Creator;
use OpenSpout\Reader\Common\Manager\RowManager;
/**
* Factory to create managers.
*/
class ManagerFactory
{
/**
* @param InternalEntityFactory $entityFactory Factory to create entities
*
* @return RowManager
*/
public function createRowManager($entityFactory)
{
return new RowManager($entityFactory);
}
}

View File

@ -1,26 +1,15 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\ODS\Helper;
use DateInterval;
use DateTimeImmutable;
use DOMElement;
use DOMNode;
use DOMText;
use Exception;
use OpenSpout\Common\Helper\Escaper\ODS;
use OpenSpout\Reader\Exception\InvalidValueException;
/**
* @internal
* This class provides helper functions to format cell values.
*/
final readonly class CellValueFormatter
class CellValueFormatter
{
/**
* Definition of all possible cell types.
*/
/** Definition of all possible cell types */
public const CELL_TYPE_STRING = 'string';
public const CELL_TYPE_FLOAT = 'float';
public const CELL_TYPE_BOOLEAN = 'boolean';
@ -30,9 +19,7 @@ final readonly class CellValueFormatter
public const CELL_TYPE_PERCENTAGE = 'percentage';
public const CELL_TYPE_VOID = 'void';
/**
* Definition of XML nodes names used to parse data.
*/
/** Definition of XML nodes names used to parse data */
public const XML_NODE_P = 'p';
public const XML_NODE_TEXT_A = 'text:a';
public const XML_NODE_TEXT_SPAN = 'text:span';
@ -40,9 +27,7 @@ final readonly class CellValueFormatter
public const XML_NODE_TEXT_TAB = 'text:tab';
public const XML_NODE_TEXT_LINE_BREAK = 'text:line-break';
/**
* Definition of XML attributes used to parse data.
*/
/** Definition of XML attributes used to parse data */
public const XML_ATTRIBUTE_TYPE = 'office:value-type';
public const XML_ATTRIBUTE_VALUE = 'office:value';
public const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value';
@ -51,26 +36,24 @@ final readonly class CellValueFormatter
public const XML_ATTRIBUTE_CURRENCY = 'office:currency';
public const XML_ATTRIBUTE_C = 'text:c';
/**
* List of XML nodes representing whitespaces and their corresponding value.
*/
private const WHITESPACE_XML_NODES = [
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
protected $shouldFormatDates;
/** @var \OpenSpout\Common\Helper\Escaper\ODS Used to unescape XML data */
protected $escaper;
/** @var array List of XML nodes representing whitespaces and their corresponding value */
private static $WHITESPACE_XML_NODES = [
self::XML_NODE_TEXT_S => ' ',
self::XML_NODE_TEXT_TAB => "\t",
self::XML_NODE_TEXT_LINE_BREAK => "\n",
];
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
private bool $shouldFormatDates;
/** @var ODS Used to unescape XML data */
private ODS $escaper;
/**
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param ODS $escaper Used to unescape XML data
* @param \OpenSpout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data
*/
public function __construct(bool $shouldFormatDates, ODS $escaper)
public function __construct($shouldFormatDates, $escaper)
{
$this->shouldFormatDates = $shouldFormatDates;
$this->escaper = $escaper;
@ -81,32 +64,52 @@ final readonly class CellValueFormatter
*
* @see http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13
*
* @return bool|DateInterval|DateTimeImmutable|float|int|string The value associated with the cell, empty string if cell's type is void/undefined
* @param \DOMElement $node
*
* @throws InvalidValueException If the node value is not valid
*
* @return bool|\DateInterval|\DateTime|float|int|string The value associated with the cell, empty string if cell's type is void/undefined
*/
public function extractAndFormatNodeValue(DOMElement $node): bool|DateInterval|DateTimeImmutable|float|int|string
public function extractAndFormatNodeValue($node)
{
$cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
return match ($cellType) {
self::CELL_TYPE_STRING => $this->formatStringCellValue($node),
self::CELL_TYPE_FLOAT => $this->formatFloatCellValue($node),
self::CELL_TYPE_BOOLEAN => $this->formatBooleanCellValue($node),
self::CELL_TYPE_DATE => $this->formatDateCellValue($node),
self::CELL_TYPE_TIME => $this->formatTimeCellValue($node),
self::CELL_TYPE_CURRENCY => $this->formatCurrencyCellValue($node),
self::CELL_TYPE_PERCENTAGE => $this->formatPercentageCellValue($node),
default => '',
};
switch ($cellType) {
case self::CELL_TYPE_STRING:
return $this->formatStringCellValue($node);
case self::CELL_TYPE_FLOAT:
return $this->formatFloatCellValue($node);
case self::CELL_TYPE_BOOLEAN:
return $this->formatBooleanCellValue($node);
case self::CELL_TYPE_DATE:
return $this->formatDateCellValue($node);
case self::CELL_TYPE_TIME:
return $this->formatTimeCellValue($node);
case self::CELL_TYPE_CURRENCY:
return $this->formatCurrencyCellValue($node);
case self::CELL_TYPE_PERCENTAGE:
return $this->formatPercentageCellValue($node);
case self::CELL_TYPE_VOID:
default:
return '';
}
}
/**
* Returns the cell String value.
*
* @param \DOMElement $node
*
* @return string The value associated with the cell
*/
private function formatStringCellValue(DOMElement $node): string
protected function formatStringCellValue($node)
{
$pNodeValues = [];
$pNodes = $node->getElementsByTagName(self::XML_NODE_P);
@ -123,9 +126,11 @@ final readonly class CellValueFormatter
/**
* Returns the cell Numeric value from the given node.
*
* @param \DOMElement $node
*
* @return float|int The value associated with the cell
*/
private function formatFloatCellValue(DOMElement $node): float|int
protected function formatFloatCellValue($node)
{
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
@ -138,19 +143,27 @@ final readonly class CellValueFormatter
/**
* Returns the cell Boolean value from the given node.
*
* @param \DOMElement $node
*
* @return bool The value associated with the cell
*/
private function formatBooleanCellValue(DOMElement $node): bool
protected function formatBooleanCellValue($node)
{
return (bool) $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE);
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE);
return (bool) $nodeValue;
}
/**
* Returns the cell Date value from the given node.
*
* @param \DOMElement $node
*
* @throws InvalidValueException If the value is not a valid date
*
* @return \DateTime|string The value associated with the cell
*/
private function formatDateCellValue(DOMElement $node): DateTimeImmutable|string
protected function formatDateCellValue($node)
{
// The XML node looks like this:
// <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date">
@ -166,9 +179,9 @@ final readonly class CellValueFormatter
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE);
try {
$cellValue = new DateTimeImmutable($nodeValue);
} catch (Exception $previous) {
throw new InvalidValueException($nodeValue, '', 0, $previous);
$cellValue = new \DateTime($nodeValue);
} catch (\Exception $e) {
throw new InvalidValueException($nodeValue);
}
}
@ -178,11 +191,13 @@ final readonly class CellValueFormatter
/**
* Returns the cell Time value from the given node.
*
* @return DateInterval|string The value associated with the cell
* @param \DOMElement $node
*
* @throws InvalidValueException If the value is not a valid time
*
* @return \DateInterval|string The value associated with the cell
*/
private function formatTimeCellValue(DOMElement $node): DateInterval|string
protected function formatTimeCellValue($node)
{
// The XML node looks like this:
// <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time">
@ -198,9 +213,9 @@ final readonly class CellValueFormatter
$nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE);
try {
$cellValue = new DateInterval($nodeValue);
} catch (Exception $previous) {
throw new InvalidValueException($nodeValue, '', 0, $previous);
$cellValue = new \DateInterval($nodeValue);
} catch (\Exception $e) {
throw new InvalidValueException($nodeValue);
}
}
@ -210,9 +225,11 @@ final readonly class CellValueFormatter
/**
* Returns the cell Currency value from the given node.
*
* @param \DOMElement $node
*
* @return string The value associated with the cell (e.g. "100 USD" or "9.99 EUR")
*/
private function formatCurrencyCellValue(DOMElement $node): string
protected function formatCurrencyCellValue($node)
{
$value = $node->getAttribute(self::XML_ATTRIBUTE_VALUE);
$currency = $node->getAttribute(self::XML_ATTRIBUTE_CURRENCY);
@ -223,22 +240,29 @@ final readonly class CellValueFormatter
/**
* Returns the cell Percentage value from the given node.
*
* @param \DOMElement $node
*
* @return float|int The value associated with the cell
*/
private function formatPercentageCellValue(DOMElement $node): float|int
protected function formatPercentageCellValue($node)
{
// percentages are formatted like floats
return $this->formatFloatCellValue($node);
}
private function extractTextValueFromNode(DOMNode $pNode): string
/**
* @param \DOMNode $pNode
*
* @return string
*/
private function extractTextValueFromNode($pNode)
{
$textValue = '';
foreach ($pNode->childNodes as $childNode) {
if ($childNode instanceof DOMText) {
if ($childNode instanceof \DOMText) {
$textValue .= $childNode->nodeValue;
} elseif ($this->isWhitespaceNode($childNode->nodeName) && $childNode instanceof DOMElement) {
} elseif ($this->isWhitespaceNode($childNode->nodeName)) {
$textValue .= $this->transformWhitespaceNode($childNode);
} elseif (self::XML_NODE_TEXT_A === $childNode->nodeName || self::XML_NODE_TEXT_SPAN === $childNode->nodeName) {
$textValue .= $this->extractTextValueFromNode($childNode);
@ -253,10 +277,14 @@ final readonly class CellValueFormatter
* - <text:s />
* - <text:tab />
* - <text:line-break />.
*
* @param string $nodeName
*
* @return bool
*/
private function isWhitespaceNode(string $nodeName): bool
private function isWhitespaceNode($nodeName)
{
return isset(self::WHITESPACE_XML_NODES[$nodeName]);
return isset(self::$WHITESPACE_XML_NODES[$nodeName]);
}
/**
@ -269,15 +297,15 @@ final readonly class CellValueFormatter
*
* @see https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415200_253892949
*
* @param DOMElement $node The XML node representing a whitespace
* @param \DOMElement $node The XML node representing a whitespace
*
* @return string The corresponding whitespace value
*/
private function transformWhitespaceNode(DOMElement $node): string
private function transformWhitespaceNode($node)
{
$countAttribute = $node->getAttribute(self::XML_ATTRIBUTE_C); // only defined for "<text:s>"
$numWhitespaces = '' !== $countAttribute ? (int) $countAttribute : 1;
$numWhitespaces = (!empty($countAttribute)) ? (int) $countAttribute : 1;
return str_repeat(self::WHITESPACE_XML_NODES[$node->nodeName], $numWhitespaces);
return str_repeat(self::$WHITESPACE_XML_NODES[$node->nodeName], $numWhitespaces);
}
}

View File

@ -1,34 +1,41 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\ODS\Helper;
use OpenSpout\Reader\Exception\XMLProcessingException;
use OpenSpout\Reader\Wrapper\XMLReader;
use OpenSpout\Reader\ODS\Creator\InternalEntityFactory;
/**
* @internal
* This class provides helper functions to extract data from the "settings.xml" file.
*/
final class SettingsHelper
class SettingsHelper
{
public const SETTINGS_XML_FILE_PATH = 'settings.xml';
/**
* Definition of XML nodes name and attribute used to parse settings data.
*/
/** Definition of XML nodes name and attribute used to parse settings data */
public const XML_NODE_CONFIG_ITEM = 'config:config-item';
public const XML_ATTRIBUTE_CONFIG_NAME = 'config:name';
public const XML_ATTRIBUTE_VALUE_ACTIVE_TABLE = 'ActiveTable';
/** @var InternalEntityFactory Factory to create entities */
private $entityFactory;
/**
* @param InternalEntityFactory $entityFactory Factory to create entities
*/
public function __construct($entityFactory)
{
$this->entityFactory = $entityFactory;
}
/**
* @param string $filePath Path of the file to be read
*
* @return null|string Name of the sheet that was defined as active or NULL if none found
*/
public function getActiveSheetName(string $filePath): ?string
public function getActiveSheetName($filePath)
{
$xmlReader = new XMLReader();
$xmlReader = $this->entityFactory->createXMLReader();
if (false === $xmlReader->openFileInZip($filePath, self::SETTINGS_XML_FILE_PATH)) {
return null;
}
@ -43,7 +50,7 @@ final class SettingsHelper
break;
}
}
} catch (XMLProcessingException) { // @codeCoverageIgnore
} catch (XMLProcessingException $exception) {
// do nothing
}

View File

@ -0,0 +1,32 @@
<?php
namespace OpenSpout\Reader\ODS\Manager;
use OpenSpout\Common\Manager\OptionsManagerAbstract;
use OpenSpout\Reader\Common\Entity\Options;
/**
* ODS Reader options manager.
*/
class OptionsManager extends OptionsManagerAbstract
{
/**
* {@inheritdoc}
*/
protected function getSupportedOptions()
{
return [
Options::SHOULD_FORMAT_DATES,
Options::SHOULD_PRESERVE_EMPTY_ROWS,
];
}
/**
* {@inheritdoc}
*/
protected function setDefaultOptions()
{
$this->setOption(Options::SHOULD_FORMAT_DATES, false);
$this->setOption(Options::SHOULD_PRESERVE_EMPTY_ROWS, false);
}
}

View File

@ -0,0 +1,73 @@
<?php
namespace OpenSpout\Reader\ODS;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Reader\ODS\Creator\InternalEntityFactory;
use OpenSpout\Reader\ReaderAbstract;
/**
* This class provides support to read data from a ODS file.
*/
class Reader extends ReaderAbstract
{
/** @var \ZipArchive */
protected $zip;
/** @var SheetIterator To iterator over the ODS sheets */
protected $sheetIterator;
/**
* Returns whether stream wrappers are supported.
*
* @return bool
*/
protected function doesSupportStreamWrapper()
{
return false;
}
/**
* Opens the file at the given file path to make it ready to be read.
*
* @param string $filePath Path of the file to be read
*
* @throws \OpenSpout\Common\Exception\IOException If the file at the given path or its content cannot be read
* @throws \OpenSpout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
protected function openReader($filePath)
{
/** @var InternalEntityFactory $entityFactory */
$entityFactory = $this->entityFactory;
$this->zip = $entityFactory->createZipArchive();
if (true === $this->zip->open($filePath)) {
/** @var InternalEntityFactory $entityFactory */
$entityFactory = $this->entityFactory;
$this->sheetIterator = $entityFactory->createSheetIterator($filePath, $this->optionsManager);
} else {
throw new IOException("Could not open {$filePath} for reading.");
}
}
/**
* Returns an iterator to iterate over sheets.
*
* @return SheetIterator To iterate over sheets
*/
protected function getConcreteSheetIterator()
{
return $this->sheetIterator;
}
/**
* Closes the reader. To be used after reading the file.
*/
protected function closeReader()
{
if (null !== $this->zip) {
$this->zip->close();
}
}
}

View File

@ -1,81 +1,103 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\ODS;
use DOMElement;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Common\Manager\OptionsManagerInterface;
use OpenSpout\Reader\Common\Entity\Options;
use OpenSpout\Reader\Common\Manager\RowManager;
use OpenSpout\Reader\Common\XMLProcessor;
use OpenSpout\Reader\Exception\InvalidValueException;
use OpenSpout\Reader\Exception\IteratorNotRewindableException;
use OpenSpout\Reader\Exception\SharedStringNotFoundException;
use OpenSpout\Reader\Exception\XMLProcessingException;
use OpenSpout\Reader\IteratorInterface;
use OpenSpout\Reader\ODS\Creator\InternalEntityFactory;
use OpenSpout\Reader\ODS\Helper\CellValueFormatter;
use OpenSpout\Reader\RowIteratorInterface;
use OpenSpout\Reader\Wrapper\XMLReader;
final class RowIterator implements RowIteratorInterface
class RowIterator implements IteratorInterface
{
/**
* Definition of XML nodes names used to parse data.
*/
/** Definition of XML nodes names used to parse data */
public const XML_NODE_TABLE = 'table:table';
public const XML_NODE_ROW = 'table:table-row';
public const XML_NODE_CELL = 'table:table-cell';
public const MAX_COLUMNS_EXCEL = 16384;
/**
* Definition of XML attribute used to parse data.
*/
/** Definition of XML attribute used to parse data */
public const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated';
public const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated';
private readonly Options $options;
/** @var \OpenSpout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
/** @var XMLProcessor Helper Object to process XML nodes */
private readonly XMLProcessor $xmlProcessor;
/** @var \OpenSpout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
protected $xmlProcessor;
/** @var CellValueFormatter Helper to format cell values */
private readonly CellValueFormatter $cellValueFormatter;
/** @var bool Whether empty rows should be returned or skipped */
protected $shouldPreserveEmptyRows;
/** @var Helper\CellValueFormatter Helper to format cell values */
protected $cellValueFormatter;
/** @var RowManager Manages rows */
protected $rowManager;
/** @var InternalEntityFactory Factory to create entities */
protected $entityFactory;
/** @var bool Whether the iterator has already been rewound once */
private bool $hasAlreadyBeenRewound = false;
protected $hasAlreadyBeenRewound = false;
/** @var Row The currently processed row */
private Row $currentlyProcessedRow;
protected $currentlyProcessedRow;
/** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */
private ?Row $rowBuffer = null;
protected $rowBuffer;
/** @var bool Indicates whether all rows have been read */
private bool $hasReachedEndOfFile = false;
protected $hasReachedEndOfFile = false;
/** @var int Last row index processed (one-based) */
private int $lastRowIndexProcessed = 0;
protected $lastRowIndexProcessed = 0;
/** @var int Row index to be processed next (one-based) */
private int $nextRowIndexToBeProcessed = 1;
protected $nextRowIndexToBeProcessed = 1;
/** @var null|Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */
private ?Cell $lastProcessedCell = null;
protected $lastProcessedCell;
/** @var int Number of times the last processed row should be repeated */
private int $numRowsRepeated = 1;
protected $numRowsRepeated = 1;
/** @var int Number of times the last cell value should be copied to the cells on its right */
private int $numColumnsRepeated = 1;
protected $numColumnsRepeated = 1;
/** @var bool Whether at least one cell has been read for the row currently being processed */
private bool $hasAlreadyReadOneCellInCurrentRow = false;
protected $hasAlreadyReadOneCellInCurrentRow = false;
/**
* @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element
* @param OptionsManagerInterface $optionsManager Reader's options manager
* @param CellValueFormatter $cellValueFormatter Helper to format cell values
* @param XMLProcessor $xmlProcessor Helper to process XML files
* @param RowManager $rowManager Manages rows
* @param InternalEntityFactory $entityFactory Factory to create entities
*/
public function __construct(
Options $options,
XMLReader $xmlReader,
OptionsManagerInterface $optionsManager,
CellValueFormatter $cellValueFormatter,
XMLProcessor $xmlProcessor
XMLProcessor $xmlProcessor,
RowManager $rowManager,
InternalEntityFactory $entityFactory
) {
$this->xmlReader = $xmlReader;
$this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
$this->cellValueFormatter = $cellValueFormatter;
$this->entityFactory = $entityFactory;
$this->rowManager = $rowManager;
// Register all callbacks to process different nodes when reading the XML file
$this->xmlProcessor = $xmlProcessor;
@ -83,7 +105,6 @@ final class RowIterator implements RowIteratorInterface
$this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']);
$this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']);
$this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']);
$this->options = $options;
}
/**
@ -92,8 +113,9 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.rewind.php
*
* @throws IteratorNotRewindableException If the iterator is rewound more than once
* @throws \OpenSpout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once
*/
#[\ReturnTypeWillChange]
public function rewind(): void
{
// Because sheet and row data is located in the file, we can't rewind both the
@ -117,6 +139,7 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.valid.php
*/
#[\ReturnTypeWillChange]
public function valid(): bool
{
return !$this->hasReachedEndOfFile;
@ -127,9 +150,10 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.next.php
*
* @throws SharedStringNotFoundException If a shared string was not found
* @throws IOException If unable to read the sheet data XML
* @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
* @throws \OpenSpout\Common\Exception\IOException If unable to read the sheet data XML
*/
#[\ReturnTypeWillChange]
public function next(): void
{
if ($this->doesNeedDataForNextRowToBeProcessed()) {
@ -144,6 +168,7 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.current.php
*/
#[\ReturnTypeWillChange]
public function current(): Row
{
return $this->rowBuffer;
@ -154,11 +179,21 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.key.php
*/
#[\ReturnTypeWillChange]
public function key(): int
{
return $this->lastRowIndexProcessed;
}
/**
* Cleans up what was created to iterate over the object.
*/
#[\ReturnTypeWillChange]
public function end(): void
{
$this->xmlReader->close();
}
/**
* Returns whether we need data for the next row to be processed.
* We DO need to read data if:
@ -168,34 +203,39 @@ final class RowIterator implements RowIteratorInterface
*
* @return bool whether we need data for the next row to be processed
*/
private function doesNeedDataForNextRowToBeProcessed(): bool
protected function doesNeedDataForNextRowToBeProcessed()
{
$hasReadAtLeastOneRow = (0 !== $this->lastRowIndexProcessed);
return
!$hasReadAtLeastOneRow
|| $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1;
|| $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1
;
}
/**
* @throws SharedStringNotFoundException If a shared string was not found
* @throws IOException If unable to read the sheet data XML
* @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
* @throws \OpenSpout\Common\Exception\IOException If unable to read the sheet data XML
*/
private function readDataForNextRow(): void
protected function readDataForNextRow()
{
$this->currentlyProcessedRow = new Row([], null);
$this->currentlyProcessedRow = $this->entityFactory->createRow();
try {
$this->xmlProcessor->readUntilStopped();
} catch (XMLProcessingException $exception) {
throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]");
}
$this->rowBuffer = $this->currentlyProcessedRow;
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
*
* @return int A return code that indicates what action should the processor take next
*/
private function processRowStartingNode(XMLReader $xmlReader): int
protected function processRowStartingNode($xmlReader)
{
// Reset data from current row
$this->hasAlreadyReadOneCellInCurrentRow = false;
@ -207,16 +247,16 @@ final class RowIterator implements RowIteratorInterface
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
*
* @return int A return code that indicates what action should the processor take next
*/
private function processCellStartingNode(XMLReader $xmlReader): int
protected function processCellStartingNode($xmlReader)
{
$currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader);
// NOTE: expand() will automatically decode all XML entities of the child nodes
/** @var DOMElement $node */
/** @var \DOMElement $node */
$node = $xmlReader->expand();
$currentCell = $this->getCell($node);
@ -237,12 +277,12 @@ final class RowIterator implements RowIteratorInterface
/**
* @return int A return code that indicates what action should the processor take next
*/
private function processRowEndingNode(): int
protected function processRowEndingNode()
{
$isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell);
// if the fetched row is empty and we don't want to preserve it...
if (!$this->options->SHOULD_PRESERVE_EMPTY_ROWS && $isEmptyRow) {
if (!$this->shouldPreserveEmptyRows && $isEmptyRow) {
// ... skip it
return XMLProcessor::PROCESSING_CONTINUE;
}
@ -275,7 +315,7 @@ final class RowIterator implements RowIteratorInterface
/**
* @return int A return code that indicates what action should the processor take next
*/
private function processTableEndingNode(): int
protected function processTableEndingNode()
{
// The closing "</table:table>" marks the end of the file
$this->hasReachedEndOfFile = true;
@ -284,11 +324,11 @@ final class RowIterator implements RowIteratorInterface
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node
*
* @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing
*/
private function getNumRowsRepeatedForCurrentNode(XMLReader $xmlReader): int
protected function getNumRowsRepeatedForCurrentNode($xmlReader)
{
$numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED);
@ -296,11 +336,11 @@ final class RowIterator implements RowIteratorInterface
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node
*
* @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing
*/
private function getNumColumnsRepeatedForCurrentNode(XMLReader $xmlReader): int
protected function getNumColumnsRepeatedForCurrentNode($xmlReader)
{
$numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED);
@ -310,15 +350,18 @@ final class RowIterator implements RowIteratorInterface
/**
* Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
*
* @param \DOMElement $node
*
* @return Cell The cell set with the associated with the cell
*/
private function getCell(DOMElement $node): Cell
protected function getCell($node)
{
try {
$cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
$cell = Cell::fromValue($cellValue);
$cell = $this->entityFactory->createCell($cellValue);
} catch (InvalidValueException $exception) {
$cell = new Cell\ErrorCell($exception->getInvalidValue(), null);
$cell = $this->entityFactory->createCell($exception->getInvalidValue());
$cell->setType(Cell::TYPE_ERROR);
}
return $cell;
@ -330,14 +373,16 @@ final class RowIterator implements RowIteratorInterface
* After finishing processing each cell, the last read cell is not part of the
* row data yet (as we still need to apply the "num-columns-repeated" attribute).
*
* @param Row $currentRow
* @param null|Cell $lastReadCell The last read cell
*
* @return bool Whether the row is empty
*/
private function isEmptyRow(Row $currentRow, ?Cell $lastReadCell): bool
protected function isEmptyRow($currentRow, $lastReadCell)
{
return
$currentRow->isEmpty()
&& (null === $lastReadCell || $lastReadCell instanceof Cell\EmptyCell);
$this->rowManager->isEmpty($currentRow)
&& (!isset($lastReadCell) || $lastReadCell->isEmpty())
;
}
}

View File

@ -1,30 +1,31 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\ODS;
use OpenSpout\Reader\SheetWithVisibilityInterface;
use OpenSpout\Reader\SheetInterface;
/**
* @implements SheetWithVisibilityInterface<RowIterator>
* Represents a sheet within a ODS file.
*/
final readonly class Sheet implements SheetWithVisibilityInterface
class Sheet implements SheetInterface
{
/** @var RowIterator To iterate over sheet's rows */
private RowIterator $rowIterator;
/** @var \OpenSpout\Reader\ODS\RowIterator To iterate over sheet's rows */
protected $rowIterator;
/** @var int ID of the sheet */
protected $id;
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
private int $index;
protected $index;
/** @var string Name of the sheet */
private string $name;
protected $name;
/** @var bool Whether the sheet was the active one */
private bool $isActive;
protected $isActive;
/** @var bool Whether the sheet is visible */
private bool $isVisible;
protected $isVisible;
/**
* @param RowIterator $rowIterator The corresponding row iterator
@ -33,7 +34,7 @@ final readonly class Sheet implements SheetWithVisibilityInterface
* @param bool $isSheetActive Whether the sheet was defined as active
* @param bool $isSheetVisible Whether the sheet is visible
*/
public function __construct(RowIterator $rowIterator, int $sheetIndex, string $sheetName, bool $isSheetActive, bool $isSheetVisible)
public function __construct($rowIterator, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible)
{
$this->rowIterator = $rowIterator;
$this->index = $sheetIndex;
@ -42,7 +43,10 @@ final readonly class Sheet implements SheetWithVisibilityInterface
$this->isVisible = $isSheetVisible;
}
public function getRowIterator(): RowIterator
/**
* @return \OpenSpout\Reader\ODS\RowIterator
*/
public function getRowIterator()
{
return $this->rowIterator;
}
@ -50,7 +54,7 @@ final readonly class Sheet implements SheetWithVisibilityInterface
/**
* @return int Index of the sheet, based on order in the workbook (zero-based)
*/
public function getIndex(): int
public function getIndex()
{
return $this->index;
}
@ -58,7 +62,7 @@ final readonly class Sheet implements SheetWithVisibilityInterface
/**
* @return string Name of the sheet
*/
public function getName(): string
public function getName()
{
return $this->name;
}
@ -66,7 +70,7 @@ final readonly class Sheet implements SheetWithVisibilityInterface
/**
* @return bool Whether the sheet was defined as active
*/
public function isActive(): bool
public function isActive()
{
return $this->isActive;
}
@ -74,7 +78,7 @@ final readonly class Sheet implements SheetWithVisibilityInterface
/**
* @return bool Whether the sheet is visible
*/
public function isVisible(): bool
public function isVisible()
{
return $this->isVisible;
}

View File

@ -1,31 +1,24 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\ODS;
use DOMElement;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Common\Helper\Escaper\ODS;
use OpenSpout\Reader\Common\XMLProcessor;
use OpenSpout\Reader\Exception\XMLProcessingException;
use OpenSpout\Reader\ODS\Helper\CellValueFormatter;
use OpenSpout\Reader\IteratorInterface;
use OpenSpout\Reader\ODS\Creator\InternalEntityFactory;
use OpenSpout\Reader\ODS\Helper\SettingsHelper;
use OpenSpout\Reader\SheetIteratorInterface;
use OpenSpout\Reader\Wrapper\XMLReader;
/**
* @implements SheetIteratorInterface<Sheet>
* Iterate over ODS sheet.
*/
final class SheetIterator implements SheetIteratorInterface
class SheetIterator implements IteratorInterface
{
public const CONTENT_XML_FILE_PATH = 'content.xml';
public const XML_STYLE_NAMESPACE = 'urn:oasis:names:tc:opendocument:xmlns:style:1.0';
/**
* Definition of XML nodes name and attribute used to parse sheet data.
*/
/** Definition of XML nodes name and attribute used to parse sheet data */
public const XML_NODE_AUTOMATIC_STYLES = 'office:automatic-styles';
public const XML_NODE_STYLE_TABLE_PROPERTIES = 'table-properties';
public const XML_NODE_TABLE = 'table:table';
@ -35,37 +28,45 @@ final class SheetIterator implements SheetIteratorInterface
public const XML_ATTRIBUTE_TABLE_DISPLAY = 'table:display';
/** @var string Path of the file to be read */
private readonly string $filePath;
protected $filePath;
private readonly Options $options;
/** @var \OpenSpout\Common\Manager\OptionsManagerInterface Reader's options manager */
protected $optionsManager;
/** @var InternalEntityFactory Factory to create entities */
protected $entityFactory;
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
private readonly XMLReader $xmlReader;
protected $xmlReader;
/** @var ODS Used to unescape XML data */
private readonly ODS $escaper;
/** @var \OpenSpout\Common\Helper\Escaper\ODS Used to unescape XML data */
protected $escaper;
/** @var bool Whether there are still at least a sheet to be read */
private bool $hasFoundSheet;
protected $hasFoundSheet;
/** @var int The index of the sheet being read (zero-based) */
private int $currentSheetIndex;
protected $currentSheetIndex;
/** @var string The name of the sheet that was defined as active */
private readonly ?string $activeSheetName;
protected $activeSheetName;
/** @var array<string, bool> Associative array [STYLE_NAME] => [IS_SHEET_VISIBLE] */
private array $sheetsVisibility;
/** @var array Associative array [STYLE_NAME] => [IS_SHEET_VISIBLE] */
protected $sheetsVisibility;
public function __construct(
string $filePath,
Options $options,
ODS $escaper,
SettingsHelper $settingsHelper
) {
/**
* @param string $filePath Path of the file to be read
* @param \OpenSpout\Common\Manager\OptionsManagerInterface $optionsManager
* @param \OpenSpout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data
* @param SettingsHelper $settingsHelper Helper to get data from "settings.xml"
* @param InternalEntityFactory $entityFactory Factory to create entities
*/
public function __construct($filePath, $optionsManager, $escaper, $settingsHelper, $entityFactory)
{
$this->filePath = $filePath;
$this->options = $options;
$this->xmlReader = new XMLReader();
$this->optionsManager = $optionsManager;
$this->entityFactory = $entityFactory;
$this->xmlReader = $entityFactory->createXMLReader();
$this->escaper = $escaper;
$this->activeSheetName = $settingsHelper->getActiveSheetName($filePath);
}
@ -75,9 +76,10 @@ final class SheetIterator implements SheetIteratorInterface
*
* @see http://php.net/manual/en/iterator.rewind.php
*
* @throws IOException If unable to open the XML file containing sheets' data
* @throws \OpenSpout\Common\Exception\IOException If unable to open the XML file containing sheets' data
*/
public function rewind(): void
#[\ReturnTypeWillChange]
public function rewind()
{
$this->xmlReader->close();
@ -101,15 +103,13 @@ final class SheetIterator implements SheetIteratorInterface
* Checks if current position is valid.
*
* @see http://php.net/manual/en/iterator.valid.php
*
* @return bool
*/
public function valid(): bool
#[\ReturnTypeWillChange]
public function valid()
{
$valid = $this->hasFoundSheet;
if (!$valid) {
$this->xmlReader->close();
}
return $valid;
return $this->hasFoundSheet;
}
/**
@ -117,7 +117,8 @@ final class SheetIterator implements SheetIteratorInterface
*
* @see http://php.net/manual/en/iterator.next.php
*/
public function next(): void
#[\ReturnTypeWillChange]
public function next()
{
$this->hasFoundSheet = $this->xmlReader->readUntilNodeFound(self::XML_NODE_TABLE);
@ -130,29 +131,27 @@ final class SheetIterator implements SheetIteratorInterface
* Return the current element.
*
* @see http://php.net/manual/en/iterator.current.php
*
* @return \OpenSpout\Reader\ODS\Sheet
*/
public function current(): Sheet
#[\ReturnTypeWillChange]
public function current()
{
$escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME);
\assert(null !== $escapedSheetName);
$sheetName = $this->escaper->unescape($escapedSheetName);
$isSheetActive = $this->isSheetActive($sheetName, $this->currentSheetIndex, $this->activeSheetName);
$sheetStyleName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_STYLE_NAME);
\assert(null !== $sheetStyleName);
$isSheetVisible = $this->isSheetVisible($sheetStyleName);
return new Sheet(
new RowIterator(
$this->options,
new CellValueFormatter($this->options->SHOULD_FORMAT_DATES, new ODS()),
new XMLProcessor($this->xmlReader)
),
return $this->entityFactory->createSheet(
$this->xmlReader,
$this->currentSheetIndex,
$sheetName,
$isSheetActive,
$isSheetVisible
$isSheetVisible,
$this->optionsManager
);
}
@ -160,33 +159,44 @@ final class SheetIterator implements SheetIteratorInterface
* Return the key of the current element.
*
* @see http://php.net/manual/en/iterator.key.php
*
* @return int
*/
public function key(): int
#[\ReturnTypeWillChange]
public function key()
{
return $this->currentSheetIndex + 1;
}
/**
* Cleans up what was created to iterate over the object.
*/
#[\ReturnTypeWillChange]
public function end()
{
$this->xmlReader->close();
}
/**
* Extracts the visibility of the sheets.
*
* @return array<string, bool> Associative array [STYLE_NAME] => [IS_SHEET_VISIBLE]
* @return array Associative array [STYLE_NAME] => [IS_SHEET_VISIBLE]
*/
private function readSheetsVisibility(): array
private function readSheetsVisibility()
{
$sheetsVisibility = [];
$this->xmlReader->readUntilNodeFound(self::XML_NODE_AUTOMATIC_STYLES);
/** @var \DOMElement $automaticStylesNode */
$automaticStylesNode = $this->xmlReader->expand();
\assert($automaticStylesNode instanceof DOMElement);
$tableStyleNodes = $automaticStylesNode->getElementsByTagNameNS(self::XML_STYLE_NAMESPACE, self::XML_NODE_STYLE_TABLE_PROPERTIES);
/** @var \DOMElement $tableStyleNode */
foreach ($tableStyleNodes as $tableStyleNode) {
$isSheetVisible = ('false' !== $tableStyleNode->getAttribute(self::XML_ATTRIBUTE_TABLE_DISPLAY));
$parentStyleNode = $tableStyleNode->parentNode;
\assert($parentStyleNode instanceof DOMElement);
$styleName = $parentStyleNode->getAttribute(self::XML_ATTRIBUTE_STYLE_NAME);
$sheetsVisibility[$styleName] = $isSheetVisible;
@ -204,13 +214,14 @@ final class SheetIterator implements SheetIteratorInterface
*
* @return bool Whether the current sheet was defined as the active one
*/
private function isSheetActive(string $sheetName, int $sheetIndex, ?string $activeSheetName): bool
private function isSheetActive($sheetName, $sheetIndex, $activeSheetName)
{
// The given sheet is active if its name matches the defined active sheet's name
// or if no information about the active sheet was found, it defaults to the first sheet.
return
(null === $activeSheetName && 0 === $sheetIndex)
|| ($activeSheetName === $sheetName);
|| ($activeSheetName === $sheetName)
;
}
/**
@ -220,7 +231,7 @@ final class SheetIterator implements SheetIteratorInterface
*
* @return bool Whether the current sheet is visible
*/
private function isSheetVisible(string $sheetStyleName): bool
private function isSheetVisible($sheetStyleName)
{
return $this->sheetsVisibility[$sheetStyleName] ??
true;

View File

@ -1,22 +1,65 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Reader\Exception\ReaderException;
use OpenSpout\Common\Helper\GlobalFunctionsHelper;
use OpenSpout\Common\Manager\OptionsManagerInterface;
use OpenSpout\Reader\Common\Creator\InternalEntityFactoryInterface;
use OpenSpout\Reader\Common\Entity\Options;
use OpenSpout\Reader\Exception\ReaderNotOpenedException;
/**
* @template T of SheetIteratorInterface
*
* @implements ReaderInterface<T>
*/
abstract class AbstractReader implements ReaderInterface
abstract class ReaderAbstract implements ReaderInterface
{
/** @var bool Indicates whether the stream is currently open */
private bool $isStreamOpened = false;
protected $isStreamOpened = false;
/** @var InternalEntityFactoryInterface Factory to create entities */
protected $entityFactory;
/** @var \OpenSpout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var OptionsManagerInterface Writer options manager */
protected $optionsManager;
public function __construct(
OptionsManagerInterface $optionsManager,
GlobalFunctionsHelper $globalFunctionsHelper,
InternalEntityFactoryInterface $entityFactory
) {
$this->optionsManager = $optionsManager;
$this->globalFunctionsHelper = $globalFunctionsHelper;
$this->entityFactory = $entityFactory;
}
/**
* Sets whether date/time values should be returned as PHP objects or be formatted as strings.
*
* @param bool $shouldFormatDates
*
* @return ReaderAbstract
*/
public function setShouldFormatDates($shouldFormatDates)
{
$this->optionsManager->setOption(Options::SHOULD_FORMAT_DATES, $shouldFormatDates);
return $this;
}
/**
* Sets whether empty rows should be returned or skipped.
*
* @param bool $shouldPreserveEmptyRows
*
* @return ReaderAbstract
*/
public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows)
{
$this->optionsManager->setOption(Options::SHOULD_PRESERVE_EMPTY_ROWS, $shouldPreserveEmptyRows);
return $this;
}
/**
* Prepares the reader to read the given file. It also makes sure
@ -24,9 +67,9 @@ abstract class AbstractReader implements ReaderInterface
*
* @param string $filePath Path of the file to be read
*
* @throws IOException If the file at the given path does not exist, is not readable or is corrupted
* @throws \OpenSpout\Common\Exception\IOException If the file at the given path does not exist, is not readable or is corrupted
*/
public function open(string $filePath): void
public function open($filePath)
{
if ($this->isStreamWrapper($filePath) && (!$this->doesSupportStreamWrapper() || !$this->isSupportedStreamWrapper($filePath))) {
throw new IOException("Could not open {$filePath} for reading! Stream wrapper used is not supported for this type of file.");
@ -34,10 +77,10 @@ abstract class AbstractReader implements ReaderInterface
if (!$this->isPhpStream($filePath)) {
// we skip the checks if the provided file path points to a PHP stream
if (!file_exists($filePath)) {
if (!$this->globalFunctionsHelper->file_exists($filePath)) {
throw new IOException("Could not open {$filePath} for reading! File does not exist.");
}
if (!is_readable($filePath)) {
if (!$this->globalFunctionsHelper->is_readable($filePath)) {
throw new IOException("Could not open {$filePath} for reading! File is not readable.");
}
}
@ -46,66 +89,86 @@ abstract class AbstractReader implements ReaderInterface
$fileRealPath = $this->getFileRealPath($filePath);
$this->openReader($fileRealPath);
$this->isStreamOpened = true;
} catch (ReaderException $exception) {
throw new IOException(
"Could not open {$filePath} for reading!",
0,
$exception
);
} catch (\Exception $exception) {
throw new IOException("Could not open {$filePath} for reading! ({$exception->getMessage()})");
}
}
/**
* Returns an iterator to iterate over sheets.
*
* @throws \OpenSpout\Reader\Exception\ReaderNotOpenedException If called before opening the reader
*
* @return SheetIteratorInterface To iterate over sheets
*/
public function getSheetIterator()
{
if (!$this->isStreamOpened) {
throw new ReaderNotOpenedException('Reader should be opened first.');
}
return $this->getConcreteSheetIterator();
}
/**
* Closes the reader, preventing any additional reading.
*/
final public function close(): void
public function close()
{
if ($this->isStreamOpened) {
$this->closeReader();
$sheetIterator = $this->getConcreteSheetIterator();
if (null !== $sheetIterator) {
$sheetIterator->end();
}
$this->isStreamOpened = false;
}
}
/**
* Returns whether stream wrappers are supported.
*
* @return bool
*/
abstract protected function doesSupportStreamWrapper(): bool;
abstract protected function doesSupportStreamWrapper();
/**
* Opens the file at the given file path to make it ready to be read.
*
* @param string $filePath Path of the file to be read
*/
abstract protected function openReader(string $filePath): void;
abstract protected function openReader($filePath);
/**
* Returns an iterator to iterate over sheets.
*
* @return SheetIteratorInterface To iterate over sheets
*/
abstract protected function getConcreteSheetIterator();
/**
* Closes the reader. To be used after reading the file.
*/
abstract protected function closeReader(): void;
final protected function ensureStreamOpened(): void
{
if (!$this->isStreamOpened) {
throw new ReaderNotOpenedException('Reader should be opened first.');
}
}
abstract protected function closeReader();
/**
* Returns the real path of the given path.
* If the given path is a valid stream wrapper, returns the path unchanged.
*
* @param string $filePath
*
* @return string
*/
private function getFileRealPath(string $filePath): string
protected function getFileRealPath($filePath)
{
if ($this->isSupportedStreamWrapper($filePath)) {
return $filePath;
}
// Need to use realpath to fix "Can't open file" on some Windows setup
$realpath = realpath($filePath);
\assert(false !== $realpath);
return $realpath;
return realpath($filePath);
}
/**
@ -116,10 +179,10 @@ abstract class AbstractReader implements ReaderInterface
*
* @return null|string The stream wrapper scheme or NULL if not a stream wrapper
*/
private function getStreamWrapperScheme(string $filePath): ?string
protected function getStreamWrapperScheme($filePath)
{
$streamScheme = null;
if (1 === preg_match('/^(\w+):\/\//', $filePath, $matches)) {
if (preg_match('/^(\w+):\/\//', $filePath, $matches)) {
$streamScheme = $matches[1];
}
@ -134,7 +197,7 @@ abstract class AbstractReader implements ReaderInterface
*
* @return bool Whether the given path is an unsupported stream wrapper
*/
private function isStreamWrapper(string $filePath): bool
protected function isStreamWrapper($filePath)
{
return null !== $this->getStreamWrapperScheme($filePath);
}
@ -148,11 +211,13 @@ abstract class AbstractReader implements ReaderInterface
*
* @return bool Whether the given path is an supported stream wrapper
*/
private function isSupportedStreamWrapper(string $filePath): bool
protected function isSupportedStreamWrapper($filePath)
{
$streamScheme = $this->getStreamWrapperScheme($filePath);
return null === $streamScheme || \in_array($streamScheme, stream_get_wrappers(), true);
return (null !== $streamScheme) ?
\in_array($streamScheme, $this->globalFunctionsHelper->stream_get_wrappers(), true) :
true;
}
/**
@ -162,7 +227,7 @@ abstract class AbstractReader implements ReaderInterface
*
* @return bool Whether the given path maps to a PHP stream
*/
private function isPhpStream(string $filePath): bool
protected function isPhpStream($filePath)
{
$streamScheme = $this->getStreamWrapperScheme($filePath);

View File

@ -1,13 +1,9 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader;
use OpenSpout\Common\Exception\IOException;
/**
* @template T of SheetIteratorInterface
* Interface ReaderInterface.
*/
interface ReaderInterface
{
@ -17,21 +13,21 @@ interface ReaderInterface
*
* @param string $filePath Path of the file to be read
*
* @throws IOException
* @throws \OpenSpout\Common\Exception\IOException
*/
public function open(string $filePath): void;
public function open($filePath);
/**
* Returns an iterator to iterate over sheets.
*
* @return T
* @throws \OpenSpout\Reader\Exception\ReaderNotOpenedException If called before opening the reader
*
* @throws Exception\ReaderNotOpenedException If called before opening the reader
* @return SheetIteratorInterface To iterate over sheets
*/
public function getSheetIterator(): SheetIteratorInterface;
public function getSheetIterator();
/**
* Closes the reader, preventing any additional reading.
*/
public function close(): void;
public function close();
}

View File

@ -0,0 +1,22 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader;
use OpenSpout\Common\Entity\Row;
interface RowIteratorInterface extends IteratorInterface
{
/**
* Cleans up what was created to iterate over the object.
*/
#[\ReturnTypeWillChange]
public function end();
/**
* @return null|Row
*/
#[\ReturnTypeWillChange]
public function current();
}

View File

@ -0,0 +1,34 @@
<?php
namespace OpenSpout\Reader;
/**
* Interface SheetInterface.
*/
interface SheetInterface
{
/**
* @return IteratorInterface iterator to iterate over the sheet's rows
*/
public function getRowIterator();
/**
* @return int Index of the sheet
*/
public function getIndex();
/**
* @return string Name of the sheet
*/
public function getName();
/**
* @return bool Whether the sheet was defined as active
*/
public function isActive();
/**
* @return bool Whether the sheet is visible
*/
public function isVisible();
}

View File

@ -0,0 +1,23 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader;
/**
* Interface IteratorInterface.
*/
interface SheetIteratorInterface extends IteratorInterface
{
/**
* Cleans up what was created to iterate over the object.
*/
#[\ReturnTypeWillChange]
public function end();
/**
* @return null|SheetInterface
*/
#[\ReturnTypeWillChange]
public function current();
}

View File

@ -1,24 +1,22 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\Wrapper;
use OpenSpout\Reader\Exception\XMLProcessingException;
/**
* @internal
* Trait XMLInternalErrorsHelper.
*/
trait XMLInternalErrorsHelper
{
/** @var bool Stores whether XML errors were initially stored internally - used to reset */
private bool $initialUseInternalErrorsValue;
protected $initialUseInternalErrorsValue;
/**
* To avoid displaying lots of warning/error messages on screen,
* stores errors internally instead.
*/
private function useXMLInternalErrors(): void
protected function useXMLInternalErrors()
{
libxml_clear_errors();
$this->initialUseInternalErrorsValue = libxml_use_internal_errors(true);
@ -28,9 +26,9 @@ trait XMLInternalErrorsHelper
* Throws an XMLProcessingException if an error occured.
* It also always resets the "libxml_use_internal_errors" setting back to its initial value.
*
* @throws XMLProcessingException
* @throws \OpenSpout\Reader\Exception\XMLProcessingException
*/
private function resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured(): void
protected function resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured()
{
if ($this->hasXMLErrorOccured()) {
$this->resetXMLInternalErrorsSetting();
@ -41,7 +39,7 @@ trait XMLInternalErrorsHelper
$this->resetXMLInternalErrorsSetting();
}
private function resetXMLInternalErrorsSetting(): void
protected function resetXMLInternalErrorsSetting()
{
libxml_use_internal_errors($this->initialUseInternalErrorsValue);
}
@ -51,7 +49,7 @@ trait XMLInternalErrorsHelper
*
* @return bool TRUE if an error occured, FALSE otherwise
*/
private function hasXMLErrorOccured(): bool
private function hasXMLErrorOccured()
{
return false !== libxml_get_last_error();
}
@ -61,11 +59,11 @@ trait XMLInternalErrorsHelper
*
* @see libxml_get_last_error
*
* @return string Last XML error message or null if no error
* @return null|string Last XML error message or null if no error
*/
private function getLastXMLErrorMessage(): string
private function getLastXMLErrorMessage()
{
$errorMessage = '';
$errorMessage = null;
$error = libxml_get_last_error();
if (false !== $error) {

View File

@ -1,17 +1,13 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\Wrapper;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Reader\Exception\XMLProcessingException;
use ZipArchive;
/**
* @internal
* Wrapper around the built-in XMLReader.
*
* @see \XMLReader
*/
final class XMLReader extends \XMLReader
class XMLReader extends \XMLReader
{
use XMLInternalErrorsHelper;
@ -25,13 +21,14 @@ final class XMLReader extends \XMLReader
*
* @return bool TRUE on success or FALSE on failure
*/
public function openFileInZip(string $zipFilePath, string $fileInsideZipPath): bool
public function openFileInZip($zipFilePath, $fileInsideZipPath)
{
$wasOpenSuccessful = false;
$realPathURI = $this->getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath);
// We need to check first that the file we are trying to read really exist because:
// - PHP emits a warning when trying to open a file that does not exist.
// - HHVM does not check if file exists within zip file (@link https://github.com/facebook/hhvm/issues/5779)
if ($this->fileExistsWithinZip($realPathURI)) {
$wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET);
}
@ -48,17 +45,12 @@ final class XMLReader extends \XMLReader
*
* @return string The real path URI
*/
public function getRealPathURIForFileInZip(string $zipFilePath, string $fileInsideZipPath): string
public function getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath)
{
// The file path should not start with a '/', otherwise it won't be found
$fileInsideZipPathWithoutLeadingSlash = ltrim($fileInsideZipPath, '/');
$realpath = realpath($zipFilePath);
if (false === $realpath) {
throw new IOException("Could not open {$zipFilePath} for reading! File does not exist.");
}
return self::ZIP_WRAPPER.$realpath.'#'.$fileInsideZipPathWithoutLeadingSlash;
return self::ZIP_WRAPPER.realpath($zipFilePath).'#'.$fileInsideZipPathWithoutLeadingSlash;
}
/**
@ -66,9 +58,12 @@ final class XMLReader extends \XMLReader
*
* @see \XMLReader::read
*
* @throws XMLProcessingException If an error/warning occurred
* @throws \OpenSpout\Reader\Exception\XMLProcessingException If an error/warning occurred
*
* @return bool TRUE on success or FALSE on failure
*/
public function read(): bool
#[\ReturnTypeWillChange]
public function read()
{
$this->useXMLInternalErrors();
@ -84,11 +79,11 @@ final class XMLReader extends \XMLReader
*
* @param string $nodeName Name of the node to find
*
* @return bool TRUE on success or FALSE on failure
* @throws \OpenSpout\Reader\Exception\XMLProcessingException If an error/warning occurred
*
* @throws XMLProcessingException If an error/warning occurred
* @return bool TRUE on success or FALSE on failure
*/
public function readUntilNodeFound(string $nodeName): bool
public function readUntilNodeFound($nodeName)
{
do {
$wasReadSuccessful = $this->read();
@ -105,9 +100,12 @@ final class XMLReader extends \XMLReader
*
* @param null|string $localName The name of the next node to move to
*
* @throws XMLProcessingException If an error/warning occurred
* @throws \OpenSpout\Reader\Exception\XMLProcessingException If an error/warning occurred
*
* @return bool TRUE on success or FALSE on failure
*/
public function next($localName = null): bool
#[\ReturnTypeWillChange]
public function next($localName = null)
{
$this->useXMLInternalErrors();
@ -119,17 +117,21 @@ final class XMLReader extends \XMLReader
}
/**
* @param string $nodeName
*
* @return bool Whether the XML Reader is currently positioned on the starting node with given name
*/
public function isPositionedOnStartingNode(string $nodeName): bool
public function isPositionedOnStartingNode($nodeName)
{
return $this->isPositionedOnNode($nodeName, self::ELEMENT);
}
/**
* @param string $nodeName
*
* @return bool Whether the XML Reader is currently positioned on the ending node with given name
*/
public function isPositionedOnEndingNode(string $nodeName): bool
public function isPositionedOnEndingNode($nodeName)
{
return $this->isPositionedOnNode($nodeName, self::END_ELEMENT);
}
@ -137,7 +139,7 @@ final class XMLReader extends \XMLReader
/**
* @return string The name of the current node, un-prefixed
*/
public function getCurrentNodeName(): string
public function getCurrentNodeName()
{
return $this->localName;
}
@ -149,16 +151,16 @@ final class XMLReader extends \XMLReader
*
* @return bool TRUE if the file exists, FALSE otherwise
*/
private function fileExistsWithinZip(string $zipStreamURI): bool
protected function fileExistsWithinZip($zipStreamURI)
{
$doesFileExists = false;
$pattern = '/zip:\/\/([^#]+)#(.*)/';
if (1 === preg_match($pattern, $zipStreamURI, $matches)) {
if (preg_match($pattern, $zipStreamURI, $matches)) {
$zipFilePath = $matches[1];
$innerFilePath = $matches[2];
$zip = new ZipArchive();
$zip = new \ZipArchive();
if (true === $zip->open($zipFilePath)) {
$doesFileExists = (false !== $zip->locateName($innerFilePath));
$zip->close();
@ -169,9 +171,12 @@ final class XMLReader extends \XMLReader
}
/**
* @param string $nodeName
* @param int $nodeType
*
* @return bool Whether the XML Reader is currently positioned on the node with given name and type
*/
private function isPositionedOnNode(string $nodeName, int $nodeType): bool
private function isPositionedOnNode($nodeName, $nodeType)
{
/**
* In some cases, the node has a prefix (for instance, "<sheet>" can also be "<x:sheet>").
@ -179,7 +184,7 @@ final class XMLReader extends \XMLReader
*
* @see https://github.com/box/spout/issues/233
*/
$hasPrefix = str_contains($nodeName, ':');
$hasPrefix = (false !== strpos($nodeName, ':'));
$currentNodeName = ($hasPrefix) ? $this->name : $this->localName;
return $this->nodeType === $nodeType && $currentNodeName === $nodeName;

View File

@ -0,0 +1,38 @@
<?php
namespace OpenSpout\Reader\XLSX\Creator;
use OpenSpout\Common\Helper\Escaper;
use OpenSpout\Reader\XLSX\Helper\CellValueFormatter;
use OpenSpout\Reader\XLSX\Manager\SharedStringsManager;
use OpenSpout\Reader\XLSX\Manager\StyleManager;
/**
* Factory to create helpers.
*/
class HelperFactory extends \OpenSpout\Common\Creator\HelperFactory
{
/**
* @param SharedStringsManager $sharedStringsManager Manages shared strings
* @param StyleManager $styleManager Manages styles
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900
*
* @return CellValueFormatter
*/
public function createCellValueFormatter($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates)
{
$escaper = $this->createStringsEscaper();
return new CellValueFormatter($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper);
}
/**
* @return Escaper\XLSX
*/
public function createStringsEscaper()
{
// @noinspection PhpUnnecessaryFullyQualifiedNameInspection
return new Escaper\XLSX();
}
}

View File

@ -0,0 +1,163 @@
<?php
namespace OpenSpout\Reader\XLSX\Creator;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
use OpenSpout\Reader\Common\Creator\InternalEntityFactoryInterface;
use OpenSpout\Reader\Common\Entity\Options;
use OpenSpout\Reader\Common\XMLProcessor;
use OpenSpout\Reader\Wrapper\XMLReader;
use OpenSpout\Reader\XLSX\Manager\SharedStringsManager;
use OpenSpout\Reader\XLSX\RowIterator;
use OpenSpout\Reader\XLSX\Sheet;
use OpenSpout\Reader\XLSX\SheetIterator;
/**
* Factory to create entities.
*/
class InternalEntityFactory implements InternalEntityFactoryInterface
{
/** @var HelperFactory */
private $helperFactory;
/** @var ManagerFactory */
private $managerFactory;
public function __construct(ManagerFactory $managerFactory, HelperFactory $helperFactory)
{
$this->managerFactory = $managerFactory;
$this->helperFactory = $helperFactory;
}
/**
* @param string $filePath Path of the file to be read
* @param \OpenSpout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager
* @param SharedStringsManager $sharedStringsManager Manages shared strings
*
* @return SheetIterator
*/
public function createSheetIterator($filePath, $optionsManager, $sharedStringsManager)
{
$sheetManager = $this->managerFactory->createSheetManager(
$filePath,
$optionsManager,
$sharedStringsManager,
$this
);
return new SheetIterator($sheetManager);
}
/**
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
* @param bool $isSheetActive Whether the sheet was defined as active
* @param bool $isSheetVisible Whether the sheet is visible
* @param \OpenSpout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager
* @param SharedStringsManager $sharedStringsManager Manages shared strings
*
* @return Sheet
*/
public function createSheet(
$filePath,
$sheetDataXMLFilePath,
$sheetIndex,
$sheetName,
$isSheetActive,
$isSheetVisible,
$optionsManager,
$sharedStringsManager
) {
$rowIterator = $this->createRowIterator($filePath, $sheetDataXMLFilePath, $optionsManager, $sharedStringsManager);
return new Sheet($rowIterator, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible);
}
/**
* @param Cell[] $cells
*
* @return Row
*/
public function createRow(array $cells = [])
{
return new Row($cells, null);
}
/**
* @param mixed $cellValue
*
* @return Cell
*/
public function createCell($cellValue)
{
return new Cell($cellValue);
}
/**
* @return \ZipArchive
*/
public function createZipArchive()
{
return new \ZipArchive();
}
/**
* @return XMLReader
*/
public function createXMLReader()
{
return new XMLReader();
}
/**
* @param XMLReader $xmlReader
*
* @return XMLProcessor
*/
public function createXMLProcessor($xmlReader)
{
return new XMLProcessor($xmlReader);
}
/**
* @param string $filePath Path of the XLSX file being read
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
* @param \OpenSpout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager
* @param SharedStringsManager $sharedStringsManager Manages shared strings
*
* @return RowIterator
*/
private function createRowIterator($filePath, $sheetDataXMLFilePath, $optionsManager, $sharedStringsManager)
{
$xmlReader = $this->createXMLReader();
$xmlProcessor = $this->createXMLProcessor($xmlReader);
$styleManager = $this->managerFactory->createStyleManager($filePath, $this);
$rowManager = $this->managerFactory->createRowManager($this);
$shouldFormatDates = $optionsManager->getOption(Options::SHOULD_FORMAT_DATES);
$shouldUse1904Dates = $optionsManager->getOption(Options::SHOULD_USE_1904_DATES);
$cellValueFormatter = $this->helperFactory->createCellValueFormatter(
$sharedStringsManager,
$styleManager,
$shouldFormatDates,
$shouldUse1904Dates
);
$shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS);
return new RowIterator(
$filePath,
$sheetDataXMLFilePath,
$shouldPreserveEmptyRows,
$xmlReader,
$xmlProcessor,
$cellValueFormatter,
$rowManager,
$this
);
}
}

View File

@ -0,0 +1,109 @@
<?php
namespace OpenSpout\Reader\XLSX\Creator;
use OpenSpout\Reader\Common\Manager\RowManager;
use OpenSpout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory;
use OpenSpout\Reader\XLSX\Manager\SharedStringsManager;
use OpenSpout\Reader\XLSX\Manager\SheetManager;
use OpenSpout\Reader\XLSX\Manager\StyleManager;
use OpenSpout\Reader\XLSX\Manager\WorkbookRelationshipsManager;
/**
* Factory to create managers.
*/
class ManagerFactory
{
/** @var HelperFactory */
private $helperFactory;
/** @var CachingStrategyFactory */
private $cachingStrategyFactory;
/** @var null|WorkbookRelationshipsManager */
private $cachedWorkbookRelationshipsManager;
/**
* @param HelperFactory $helperFactory Factory to create helpers
* @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies
*/
public function __construct(HelperFactory $helperFactory, CachingStrategyFactory $cachingStrategyFactory)
{
$this->helperFactory = $helperFactory;
$this->cachingStrategyFactory = $cachingStrategyFactory;
}
/**
* @param string $filePath Path of the XLSX file being read
* @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
* @param InternalEntityFactory $entityFactory Factory to create entities
*
* @return SharedStringsManager
*/
public function createSharedStringsManager($filePath, $tempFolder, $entityFactory)
{
$workbookRelationshipsManager = $this->createWorkbookRelationshipsManager($filePath, $entityFactory);
return new SharedStringsManager(
$filePath,
$tempFolder,
$workbookRelationshipsManager,
$entityFactory,
$this->helperFactory,
$this->cachingStrategyFactory
);
}
/**
* @param string $filePath Path of the XLSX file being read
* @param \OpenSpout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager
* @param \OpenSpout\Reader\XLSX\Manager\SharedStringsManager $sharedStringsManager Manages shared strings
* @param InternalEntityFactory $entityFactory Factory to create entities
*
* @return SheetManager
*/
public function createSheetManager($filePath, $optionsManager, $sharedStringsManager, $entityFactory)
{
$escaper = $this->helperFactory->createStringsEscaper();
return new SheetManager($filePath, $optionsManager, $sharedStringsManager, $escaper, $entityFactory);
}
/**
* @param string $filePath Path of the XLSX file being read
* @param InternalEntityFactory $entityFactory Factory to create entities
*
* @return StyleManager
*/
public function createStyleManager($filePath, $entityFactory)
{
$workbookRelationshipsManager = $this->createWorkbookRelationshipsManager($filePath, $entityFactory);
return new StyleManager($filePath, $workbookRelationshipsManager, $entityFactory);
}
/**
* @param InternalEntityFactory $entityFactory Factory to create entities
*
* @return RowManager
*/
public function createRowManager($entityFactory)
{
return new RowManager($entityFactory);
}
/**
* @param string $filePath Path of the XLSX file being read
* @param InternalEntityFactory $entityFactory Factory to create entities
*
* @return WorkbookRelationshipsManager
*/
private function createWorkbookRelationshipsManager($filePath, $entityFactory)
{
if (!isset($this->cachedWorkbookRelationshipsManager)) {
$this->cachedWorkbookRelationshipsManager = new WorkbookRelationshipsManager($filePath, $entityFactory);
}
return $this->cachedWorkbookRelationshipsManager;
}
}

View File

@ -1,18 +1,16 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Helper;
use OpenSpout\Common\Exception\InvalidArgumentException;
/**
* @internal
* This class provides helper functions when working with cells.
*/
final class CellHelper
class CellHelper
{
// Using ord() is super slow... Using a pre-computed hash table instead.
private const columnLetterToIndexMapping = [
private static $columnLetterToIndexMapping = [
'A' => 0, 'B' => 1, 'C' => 2, 'D' => 3, 'E' => 4, 'F' => 5, 'G' => 6,
'H' => 7, 'I' => 8, 'J' => 9, 'K' => 10, 'L' => 11, 'M' => 12, 'N' => 13,
'O' => 14, 'P' => 15, 'Q' => 16, 'R' => 17, 'S' => 18, 'T' => 19, 'U' => 20,
@ -27,9 +25,11 @@ final class CellHelper
*
* @param string $cellIndex The Excel cell index ('A1', 'BC13', ...)
*
* @throws InvalidArgumentException When the given cell index is invalid
* @throws \OpenSpout\Common\Exception\InvalidArgumentException When the given cell index is invalid
*
* @return int
*/
public static function getColumnIndexFromCellIndex(string $cellIndex): int
public static function getColumnIndexFromCellIndex($cellIndex)
{
if (!self::isValidCellIndex($cellIndex)) {
throw new InvalidArgumentException('Cannot get column index from an invalid cell index.');
@ -48,21 +48,21 @@ final class CellHelper
// Also, not using the pow() function because it's slooooow...
switch ($columnLength) {
case 1:
$columnIndex = self::columnLetterToIndexMapping[$columnLetters];
$columnIndex = (self::$columnLetterToIndexMapping[$columnLetters]);
break;
case 2:
$firstLetterIndex = (self::columnLetterToIndexMapping[$columnLetters[0]] + 1) * 26;
$secondLetterIndex = self::columnLetterToIndexMapping[$columnLetters[1]];
$firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 26;
$secondLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[1]];
$columnIndex = $firstLetterIndex + $secondLetterIndex;
break;
case 3:
$firstLetterIndex = (self::columnLetterToIndexMapping[$columnLetters[0]] + 1) * 676;
$secondLetterIndex = (self::columnLetterToIndexMapping[$columnLetters[1]] + 1) * 26;
$thirdLetterIndex = self::columnLetterToIndexMapping[$columnLetters[2]];
$firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 676;
$secondLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[1]] + 1) * 26;
$thirdLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[2]];
$columnIndex = $firstLetterIndex + $secondLetterIndex + $thirdLetterIndex;
break;
@ -77,8 +77,10 @@ final class CellHelper
* There can only be 3 letters, as there can only be 16,384 rows, which is equivalent to 'XFE'.
*
* @param string $cellIndex The Excel cell index ('A1', 'BC13', ...)
*
* @return bool
*/
private static function isValidCellIndex(string $cellIndex): bool
protected static function isValidCellIndex($cellIndex)
{
return 1 === preg_match('/^[A-Z]{1,3}\d+$/', $cellIndex);
}

View File

@ -1,27 +1,17 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Helper;
use DateInterval;
use DateTimeImmutable;
use DOMElement;
use Exception;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Helper\Escaper\XLSX;
use OpenSpout\Reader\Exception\InvalidValueException;
use OpenSpout\Reader\XLSX\Manager\SharedStringsManager;
use OpenSpout\Reader\XLSX\Manager\StyleManagerInterface;
use OpenSpout\Reader\XLSX\Manager\StyleManager;
/**
* This class provides helper functions to format cell values.
*/
final readonly class CellValueFormatter
class CellValueFormatter
{
/**
* Definition of all possible cell types.
*/
/** Definition of all possible cell types */
public const CELL_TYPE_INLINE_STRING = 'inlineStr';
public const CELL_TYPE_STR = 'str';
public const CELL_TYPE_SHARED_STRING = 's';
@ -30,53 +20,41 @@ final readonly class CellValueFormatter
public const CELL_TYPE_DATE = 'd';
public const CELL_TYPE_ERROR = 'e';
/**
* Definition of XML nodes names used to parse data.
*/
/** Definition of XML nodes names used to parse data */
public const XML_NODE_VALUE = 'v';
public const XML_NODE_INLINE_STRING_VALUE = 't';
public const XML_NODE_FORMULA = 'f';
/**
* Definition of XML attributes used to parse data.
*/
/** Definition of XML attributes used to parse data */
public const XML_ATTRIBUTE_TYPE = 't';
public const XML_ATTRIBUTE_STYLE_ID = 's';
/**
* Constants used for date formatting.
*/
/** Constants used for date formatting */
public const NUM_SECONDS_IN_ONE_DAY = 86400;
/** @var SharedStringsManager Manages shared strings */
private SharedStringsManager $sharedStringsManager;
protected $sharedStringsManager;
/** @var StyleManagerInterface Manages styles */
private StyleManagerInterface $styleManager;
/** @var StyleManager Manages styles */
protected $styleManager;
/** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */
private bool $shouldFormatDates;
protected $shouldFormatDates;
/** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */
private bool $shouldUse1904Dates;
protected $shouldUse1904Dates;
/** @var XLSX Used to unescape XML data */
private XLSX $escaper;
/** @var \OpenSpout\Common\Helper\Escaper\XLSX Used to unescape XML data */
protected $escaper;
/**
* @param SharedStringsManager $sharedStringsManager Manages shared strings
* @param StyleManagerInterface $styleManager Manages styles
* @param StyleManager $styleManager Manages styles
* @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings
* @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900
* @param XLSX $escaper Used to unescape XML data
* @param \OpenSpout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data
*/
public function __construct(
SharedStringsManager $sharedStringsManager,
StyleManagerInterface $styleManager,
bool $shouldFormatDates,
bool $shouldUse1904Dates,
XLSX $escaper
) {
public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper)
{
$this->sharedStringsManager = $sharedStringsManager;
$this->styleManager = $styleManager;
$this->shouldFormatDates = $shouldFormatDates;
@ -86,60 +64,72 @@ final readonly class CellValueFormatter
/**
* Returns the (unescaped) correctly marshalled, cell value associated to the given XML node.
*
* @param \DOMElement $node
*
* @throws InvalidValueException If the value is not valid
*
* @return bool|\DateTime|float|int|string The value associated with the cell
*/
public function extractAndFormatNodeValue(DOMElement $node): Cell
public function extractAndFormatNodeValue($node)
{
// Default cell type is "n"
$cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE);
if ('' === $cellType) {
$cellType = self::CELL_TYPE_NUMERIC;
}
$cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC;
$cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID);
$vNodeValue = $this->getVNodeValue($node);
$fNodeValue = $node->getElementsByTagName(self::XML_NODE_FORMULA)->item(0)?->nodeValue;
if (null !== $fNodeValue) {
$computedValue = $this->formatRawValueForCellType($cellType, $node, $vNodeValue);
return new Cell\FormulaCell(
'='.$fNodeValue,
null,
$computedValue instanceof Cell\ErrorCell ? null : $computedValue
);
if (('' === $vNodeValue) && (self::CELL_TYPE_INLINE_STRING !== $cellType)) {
return $vNodeValue;
}
if ('' === $vNodeValue && self::CELL_TYPE_INLINE_STRING !== $cellType) {
return Cell::fromValue($vNodeValue);
switch ($cellType) {
case self::CELL_TYPE_INLINE_STRING:
return $this->formatInlineStringCellValue($node);
case self::CELL_TYPE_SHARED_STRING:
return $this->formatSharedStringCellValue($vNodeValue);
case self::CELL_TYPE_STR:
return $this->formatStrCellValue($vNodeValue);
case self::CELL_TYPE_BOOLEAN:
return $this->formatBooleanCellValue($vNodeValue);
case self::CELL_TYPE_NUMERIC:
return $this->formatNumericCellValue($vNodeValue, $cellStyleId);
case self::CELL_TYPE_DATE:
return $this->formatDateCellValue($vNodeValue);
default:
throw new InvalidValueException($vNodeValue);
}
$rawValue = $this->formatRawValueForCellType($cellType, $node, $vNodeValue);
if ($rawValue instanceof Cell) {
return $rawValue;
}
return Cell::fromValue($rawValue);
}
/**
* Returns the cell's string value from a node's nested value node.
*
* @param \DOMElement $node
*
* @return string The value associated with the cell
*/
private function getVNodeValue(DOMElement $node): string
protected function getVNodeValue($node)
{
// for cell types having a "v" tag containing the value.
// if not, the returned value should be empty string.
$vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0);
return (string) $vNode?->nodeValue;
return (null !== $vNode) ? $vNode->nodeValue : '';
}
/**
* Returns the cell String value where string is inline.
*
* @param \DOMElement $node
*
* @return string The value associated with the cell
*/
private function formatInlineStringCellValue(DOMElement $node): string
protected function formatInlineStringCellValue($node)
{
// inline strings are formatted this way (they can contain any number of <t> nodes):
// <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t><t>[INLINE_STRING_2]</t></is></c>
@ -147,9 +137,8 @@ final readonly class CellValueFormatter
$cellValue = '';
for ($i = 0; $i < $tNodes->count(); ++$i) {
$nodeValue = $tNodes->item($i)->nodeValue;
\assert(null !== $nodeValue);
$cellValue .= $this->escaper->unescape($nodeValue);
$tNode = $tNodes->item($i);
$cellValue .= $this->escaper->unescape($tNode->nodeValue);
}
return $cellValue;
@ -158,9 +147,11 @@ final readonly class CellValueFormatter
/**
* Returns the cell String value from shared-strings file using nodeValue index.
*
* @param string $nodeValue
*
* @return string The value associated with the cell
*/
private function formatSharedStringCellValue(string $nodeValue): string
protected function formatSharedStringCellValue($nodeValue)
{
// shared strings are formatted this way:
// <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c>
@ -173,9 +164,11 @@ final readonly class CellValueFormatter
/**
* Returns the cell String value, where string is stored in value node.
*
* @param string $nodeValue
*
* @return string The value associated with the cell
*/
private function formatStrCellValue(string $nodeValue): string
protected function formatStrCellValue($nodeValue)
{
$escapedCellValue = trim($nodeValue);
@ -186,17 +179,18 @@ final readonly class CellValueFormatter
* Returns the cell Numeric value from string of nodeValue.
* The value can also represent a timestamp and a DateTime will be returned.
*
* @param string $nodeValue
* @param int $cellStyleId 0 being the default style
*
* @return \DateTime|float|int The value associated with the cell
*/
private function formatNumericCellValue(float|int|string $nodeValue, int $cellStyleId): DateInterval|DateTimeImmutable|float|int|string
protected function formatNumericCellValue($nodeValue, $cellStyleId)
{
// Numeric values can represent numbers as well as timestamps.
// We need to look at the style of the cell to determine whether it is one or the other.
$formatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
$shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId);
if (DateIntervalFormatHelper::isDurationFormat($formatCode)) {
$cellValue = $this->formatExcelDateIntervalValue((float) $nodeValue, $formatCode);
} elseif ($this->styleManager->shouldFormatNumericValueAsDate($cellStyleId)) {
if ($shouldFormatAsDate) {
$cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId);
} else {
$nodeIntValue = (int) $nodeValue;
@ -207,48 +201,48 @@ final readonly class CellValueFormatter
return $cellValue;
}
private function formatExcelDateIntervalValue(float $nodeValue, string $excelFormat): DateInterval|string
{
$dateInterval = DateIntervalFormatHelper::createDateIntervalFromHours($nodeValue);
if ($this->shouldFormatDates) {
return DateIntervalFormatHelper::formatDateInterval($dateInterval, $excelFormat);
}
return $dateInterval;
}
/**
* Returns a cell's PHP Date value, associated to the given timestamp.
* NOTE: The timestamp is a float representing the number of days since the base Excel date:
* Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
* NOTE: The timestamp can also represent a time, if it is a value between 0 and 1.
*
* @see ECMA-376 Part 1 - §18.17.4
*
* @param float $nodeValue
* @param int $cellStyleId 0 being the default style
*
* @throws InvalidValueException If the value is not a valid timestamp
*
* @see ECMA-376 Part 1 - §18.17.4
* @return \DateTime The value associated with the cell
*/
private function formatExcelTimestampValue(float $nodeValue, int $cellStyleId): DateTimeImmutable|string
protected function formatExcelTimestampValue($nodeValue, $cellStyleId)
{
if (!$this->isValidTimestampValue($nodeValue)) {
throw new InvalidValueException((string) $nodeValue);
if ($this->isValidTimestampValue($nodeValue)) {
$cellValue = $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
} else {
throw new InvalidValueException($nodeValue);
}
return $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId);
return $cellValue;
}
/**
* Returns whether the given timestamp is supported by SpreadsheetML.
*
* @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries.
*
* @param float $timestampValue
*
* @return bool
*/
private function isValidTimestampValue(float $timestampValue): bool
protected function isValidTimestampValue($timestampValue)
{
// @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011)
return
$this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884
|| !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884;
|| !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884
;
}
/**
@ -256,32 +250,22 @@ final readonly class CellValueFormatter
* Only the time value matters. The date part is set to the base Excel date:
* Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting.
*
* @param float $nodeValue
* @param int $cellStyleId 0 being the default style
*
* @return \DateTime|string The value associated with the cell
*/
private function formatExcelTimestampValueAsDateTimeValue(float $nodeValue, int $cellStyleId): DateTimeImmutable|string
protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId)
{
$baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30';
$daysSinceBaseDate = (int) $nodeValue;
$daysSign = '+';
if ($daysSinceBaseDate < 0) {
$daysSinceBaseDate = abs($daysSinceBaseDate);
$daysSign = '-';
}
$timeRemainder = fmod($nodeValue, 1);
$secondsRemainder = round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0);
$secondsSign = '+';
if ($secondsRemainder < 0) {
$secondsRemainder = abs($secondsRemainder);
$secondsSign = '-';
}
$dateObj = DateTimeImmutable::createFromFormat('|Y-m-d', $baseDate);
\assert(false !== $dateObj);
$dateObj = $dateObj->modify($daysSign.$daysSinceBaseDate.'days');
\assert(false !== $dateObj);
$dateObj = $dateObj->modify($secondsSign.$secondsRemainder.'seconds');
\assert(false !== $dateObj);
$dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate);
$dateObj->modify('+'.$daysSinceBaseDate.'days');
$dateObj->modify('+'.$secondsRemainder.'seconds');
if ($this->shouldFormatDates) {
//$styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId);
@ -301,9 +285,11 @@ final readonly class CellValueFormatter
/**
* Returns the cell Boolean value from a specific node's Value.
*
* @param string $nodeValue
*
* @return bool The value associated with the cell
*/
private function formatBooleanCellValue(string $nodeValue): bool
protected function formatBooleanCellValue($nodeValue)
{
return (bool) $nodeValue;
}
@ -314,35 +300,20 @@ final readonly class CellValueFormatter
* @see ECMA-376 Part 1 - §18.17.4
*
* @param string $nodeValue ISO 8601 Date string
*
* @throws InvalidValueException If the value is not a valid date
*
* @return \DateTime|string The value associated with the cell
*/
private function formatDateCellValue(string $nodeValue): Cell\ErrorCell|DateTimeImmutable|string
protected function formatDateCellValue($nodeValue)
{
// Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php)
try {
$cellValue = ($this->shouldFormatDates) ? $nodeValue : new DateTimeImmutable($nodeValue);
} catch (Exception) {
return new Cell\ErrorCell($nodeValue, null);
$cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue);
} catch (\Exception $e) {
throw new InvalidValueException($nodeValue);
}
return $cellValue;
}
private function formatRawValueForCellType(
string $cellType,
DOMElement $node,
string $vNodeValue
): bool|Cell\ErrorCell|DateInterval|DateTimeImmutable|float|int|string {
return match ($cellType) {
self::CELL_TYPE_INLINE_STRING => $this->formatInlineStringCellValue($node),
self::CELL_TYPE_SHARED_STRING => $this->formatSharedStringCellValue($vNodeValue),
self::CELL_TYPE_STR => $this->formatStrCellValue($vNodeValue),
self::CELL_TYPE_BOOLEAN => $this->formatBooleanCellValue($vNodeValue),
self::CELL_TYPE_NUMERIC => $this->formatNumericCellValue(
$vNodeValue,
(int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID)
),
self::CELL_TYPE_DATE => $this->formatDateCellValue($vNodeValue),
default => new Cell\ErrorCell($vNodeValue, null),
};
}
}

View File

@ -1,13 +1,11 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Helper;
/**
* @internal
* This class provides helper functions to format Excel dates.
*/
final class DateFormatHelper
class DateFormatHelper
{
public const KEY_GENERAL = 'general';
public const KEY_HOUR_12 = '12h';
@ -16,9 +14,10 @@ final class DateFormatHelper
/**
* This map is used to replace Excel format characters by their PHP equivalent.
* Keys should be ordered from longest to smallest.
* Mapping between Excel format characters and PHP format characters.
*
* @var array Mapping between Excel format characters and PHP format characters
*/
private const excelDateFormatToPHPDateFormatMapping = [
private static $excelDateFormatToPHPDateFormatMapping = [
self::KEY_GENERAL => [
// Time
'am/pm' => 'A', // Uppercase Ante meridiem and Post meridiem
@ -58,13 +57,12 @@ final class DateFormatHelper
*
* @return string PHP date format (as defined here: http://php.net/manual/en/function.date.php)
*/
public static function toPHPDateFormat(string $excelDateFormat): string
public static function toPHPDateFormat($excelDateFormat)
{
// Remove brackets potentially present at the beginning of the format string
// and text portion of the format at the end of it (starting with ";")
// See §18.8.31 of ECMA-376 for more detail.
$dateFormat = preg_replace('/^(?:\[\$[^\]]+?\])?([^;]*).*/', '$1', $excelDateFormat);
\assert(null !== $dateFormat);
// Double quotes are used to escape characters that must not be interpreted.
// For instance, ["Day " dd] should result in "Day 13" and we should not try to interpret "D", "a", "y"
@ -85,13 +83,13 @@ final class DateFormatHelper
$transformedPart = str_replace('\\', '', $transformedPart);
// Apply general transformation first...
$transformedPart = strtr($transformedPart, self::excelDateFormatToPHPDateFormatMapping[self::KEY_GENERAL]);
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_GENERAL]);
// ... then apply hour transformation, for 12-hour or 24-hour format
if (self::has12HourFormatMarker($dateFormatPart)) {
$transformedPart = strtr($transformedPart, self::excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_12]);
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_12]);
} else {
$transformedPart = strtr($transformedPart, self::excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_24]);
$transformedPart = strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_24]);
}
// overwrite the parts array with the new transformed part
@ -104,10 +102,9 @@ final class DateFormatHelper
// Finally, to have the date format compatible with the DateTime::format() function, we need to escape
// all characters that are inside double quotes (and double quotes must be removed).
// For instance, ["Day " dd] should become [\D\a\y\ dd]
return preg_replace_callback('/"(.+?)"/', static function ($matches): string {
return preg_replace_callback('/"(.+?)"/', function ($matches) {
$stringToEscape = $matches[1];
$letters = preg_split('//u', $stringToEscape, -1, PREG_SPLIT_NO_EMPTY);
\assert(false !== $letters);
return '\\'.implode('\\', $letters);
}, $phpDateFormat);
@ -118,7 +115,7 @@ final class DateFormatHelper
*
* @return bool Whether the given date format has the 12-hour format marker
*/
private static function has12HourFormatMarker(string $excelDateFormat): bool
private static function has12HourFormatMarker($excelDateFormat)
{
return false !== stripos($excelDateFormat, 'am/pm');
}

View File

@ -0,0 +1,36 @@
<?php
namespace OpenSpout\Reader\XLSX\Manager;
use OpenSpout\Common\Manager\OptionsManagerAbstract;
use OpenSpout\Reader\Common\Entity\Options;
/**
* XLSX Reader options manager.
*/
class OptionsManager extends OptionsManagerAbstract
{
/**
* {@inheritdoc}
*/
protected function getSupportedOptions()
{
return [
Options::TEMP_FOLDER,
Options::SHOULD_FORMAT_DATES,
Options::SHOULD_PRESERVE_EMPTY_ROWS,
Options::SHOULD_USE_1904_DATES,
];
}
/**
* {@inheritdoc}
*/
protected function setDefaultOptions()
{
$this->setOption(Options::TEMP_FOLDER, sys_get_temp_dir());
$this->setOption(Options::SHOULD_FORMAT_DATES, true);
$this->setOption(Options::SHOULD_PRESERVE_EMPTY_ROWS, false);
$this->setOption(Options::SHOULD_USE_1904_DATES, false);
}
}

View File

@ -1,13 +1,10 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Manager\SharedStringsCaching;
/**
* @internal
*/
final readonly class CachingStrategyFactory implements CachingStrategyFactoryInterface
use OpenSpout\Reader\XLSX\Creator\HelperFactory;
class CachingStrategyFactory
{
/**
* The memory amount needed to store a string was obtained empirically from this data:.
@ -50,29 +47,23 @@ final readonly class CachingStrategyFactory implements CachingStrategyFactoryInt
*/
public const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000;
private MemoryLimit $memoryLimit;
public function __construct(MemoryLimit $memoryLimit)
{
$this->memoryLimit = $memoryLimit;
}
/**
* Returns the best caching strategy, given the number of unique shared strings
* and the amount of memory available.
*
* @param null|int $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
* @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
* @param HelperFactory $helperFactory Factory to create helpers
*
* @return CachingStrategyInterface The best caching strategy
*/
public function createBestCachingStrategy(?int $sharedStringsUniqueCount, string $tempFolder): CachingStrategyInterface
public function createBestCachingStrategy($sharedStringsUniqueCount, $tempFolder, $helperFactory)
{
if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) {
return new InMemoryStrategy($sharedStringsUniqueCount);
}
return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE);
return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE, $helperFactory);
}
/**
@ -80,15 +71,17 @@ final readonly class CachingStrategyFactory implements CachingStrategyFactoryInt
* and the amount of memory available.
*
* @param null|int $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
*
* @return bool
*/
private function isInMemoryStrategyUsageSafe(?int $sharedStringsUniqueCount): bool
protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)
{
// if the number of shared strings in unknown, do not use "in memory" strategy
if (null === $sharedStringsUniqueCount) {
return false;
}
$memoryAvailable = $this->memoryLimit->getMemoryLimitInKB();
$memoryAvailable = $this->getMemoryLimitInKB();
if (-1 === (int) $memoryAvailable) {
// if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe
@ -100,4 +93,49 @@ final readonly class CachingStrategyFactory implements CachingStrategyFactoryInt
return $isInMemoryStrategyUsageSafe;
}
/**
* Returns the PHP "memory_limit" in Kilobytes.
*
* @return float
*/
protected function getMemoryLimitInKB()
{
$memoryLimitFormatted = $this->getMemoryLimitFromIni();
$memoryLimitFormatted = strtolower(trim($memoryLimitFormatted));
// No memory limit
if ('-1' === $memoryLimitFormatted) {
return -1;
}
if (preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) {
$amount = (int) ($matches[1]);
$unit = $matches[2];
switch ($unit) {
case 'b': return $amount / 1024;
case 'k': return $amount;
case 'm': return $amount * 1024;
case 'g': return $amount * 1024 * 1024;
case 't': return $amount * 1024 * 1024 * 1024;
}
}
return -1;
}
/**
* Returns the formatted "memory_limit" value.
*
* @return string
*/
protected function getMemoryLimitFromIni()
{
return ini_get('memory_limit');
}
}

View File

@ -1,13 +1,9 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Manager\SharedStringsCaching;
use OpenSpout\Reader\Exception\SharedStringNotFoundException;
/**
* @internal
* Interface CachingStrategyInterface.
*/
interface CachingStrategyInterface
{
@ -17,27 +13,27 @@ interface CachingStrategyInterface
* @param string $sharedString The string to be added to the cache
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
*/
public function addStringForIndex(string $sharedString, int $sharedStringIndex): void;
public function addStringForIndex($sharedString, $sharedStringIndex);
/**
* Closes the cache after the last shared string was added.
* This prevents any additional string from being added to the cache.
*/
public function closeCache(): void;
public function closeCache();
/**
* Returns the string located at the given index from the cache.
*
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
*
* @return string The shared string at the given index
* @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
*
* @throws SharedStringNotFoundException If no shared string found for the given index
* @return string The shared string at the given index
*/
public function getStringAtIndex(int $sharedStringIndex): string;
public function getStringAtIndex($sharedStringIndex);
/**
* Destroys the cache, freeing memory and removing any created artifacts.
*/
public function clearCache(): void;
public function clearCache();
}

View File

@ -1,69 +1,67 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Manager\SharedStringsCaching;
use OpenSpout\Common\Helper\FileSystemHelper;
use OpenSpout\Reader\Exception\SharedStringNotFoundException;
use OpenSpout\Reader\XLSX\Creator\HelperFactory;
/**
* This class implements the file-based caching strategy for shared strings.
* Shared strings are stored in small files (with a max number of strings per file).
* This strategy is slower than an in-memory strategy but is used to avoid out of memory crashes.
*
* @internal
*/
final class FileBasedStrategy implements CachingStrategyInterface
class FileBasedStrategy implements CachingStrategyInterface
{
/**
* Value to use to escape the line feed character ("\n").
*/
/** Value to use to escape the line feed character ("\n") */
public const ESCAPED_LINE_FEED_CHARACTER = '_x000A_';
/** @var FileSystemHelper Helper to perform file system operations */
private readonly FileSystemHelper $fileSystemHelper;
/** @var \OpenSpout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var \OpenSpout\Common\Helper\FileSystemHelper Helper to perform file system operations */
protected $fileSystemHelper;
/** @var string Temporary folder where the temporary files will be created */
private readonly string $tempFolder;
protected $tempFolder;
/**
* @var int Maximum number of strings that can be stored in one temp file
*
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
*/
private readonly int $maxNumStringsPerTempFile;
protected $maxNumStringsPerTempFile;
/** @var null|resource Pointer to the last temp file a shared string was written to */
private $tempFilePointer;
protected $tempFilePointer;
/**
* @var string Path of the temporary file whose contents is currently stored in memory
*
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
*/
private string $readMemoryTempFilePath = '';
/** @var string Path of the temporary file whose contents is currently being written to */
private string $writeMemoryTempFilePath = '';
protected $inMemoryTempFilePath;
/**
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
* @var array Contents of the temporary file that was last read
*
* @var string[] Contents of the temporary file that was last read
* @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE
*/
private array $inMemoryTempFileContents;
protected $inMemoryTempFileContents;
/**
* @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
* @param int $maxNumStringsPerTempFile Maximum number of strings that can be stored in one temp file
* @param HelperFactory $helperFactory Factory to create helpers
*/
public function __construct(string $tempFolder, int $maxNumStringsPerTempFile)
public function __construct($tempFolder, $maxNumStringsPerTempFile, $helperFactory)
{
$this->fileSystemHelper = new FileSystemHelper($tempFolder);
$this->fileSystemHelper = $helperFactory->createFileSystemHelper($tempFolder);
$this->tempFolder = $this->fileSystemHelper->createFolder($tempFolder, uniqid('sharedstrings'));
$this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile;
$this->globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper();
$this->tempFilePointer = null;
}
/**
@ -72,37 +70,33 @@ final class FileBasedStrategy implements CachingStrategyInterface
* @param string $sharedString The string to be added to the cache
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
*/
public function addStringForIndex(string $sharedString, int $sharedStringIndex): void
public function addStringForIndex($sharedString, $sharedStringIndex)
{
$tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex);
if ($this->writeMemoryTempFilePath !== $tempFilePath) {
if (null !== $this->tempFilePointer) {
fclose($this->tempFilePointer);
if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) {
if ($this->tempFilePointer) {
$this->globalFunctionsHelper->fclose($this->tempFilePointer);
}
$resource = fopen($tempFilePath, 'w');
\assert(false !== $resource);
$this->tempFilePointer = $resource;
$this->writeMemoryTempFilePath = $tempFilePath;
$this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w');
}
// The shared string retrieval logic expects each cell data to be on one line only
// Encoding the line feed character allows to preserve this assumption
$lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString);
fwrite($this->tempFilePointer, $lineFeedEncodedSharedString.PHP_EOL);
$this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString.PHP_EOL);
}
/**
* Closes the cache after the last shared string was added.
* This prevents any additional string from being added to the cache.
*/
public function closeCache(): void
public function closeCache()
{
// close pointer to the last temp file that was written
if (null !== $this->tempFilePointer) {
$this->writeMemoryTempFilePath = '';
fclose($this->tempFilePointer);
if ($this->tempFilePointer) {
$this->globalFunctionsHelper->fclose($this->tempFilePointer);
}
}
@ -111,22 +105,22 @@ final class FileBasedStrategy implements CachingStrategyInterface
*
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
*
* @return string The shared string at the given index
* @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
*
* @throws SharedStringNotFoundException If no shared string found for the given index
* @return string The shared string at the given index
*/
public function getStringAtIndex(int $sharedStringIndex): string
public function getStringAtIndex($sharedStringIndex)
{
$tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex);
$indexInFile = $sharedStringIndex % $this->maxNumStringsPerTempFile;
if ($this->readMemoryTempFilePath !== $tempFilePath) {
$contents = @file_get_contents($tempFilePath);
if (false === $contents) {
throw new SharedStringNotFoundException("Shared string temp file could not be read: {$tempFilePath} ; for index: {$sharedStringIndex}");
if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) {
throw new SharedStringNotFoundException("Shared string temp file not found: {$tempFilePath} ; for index: {$sharedStringIndex}");
}
$this->inMemoryTempFileContents = explode(PHP_EOL, $contents);
$this->readMemoryTempFilePath = $tempFilePath;
if ($this->inMemoryTempFilePath !== $tempFilePath) {
$this->inMemoryTempFileContents = explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath));
$this->inMemoryTempFilePath = $tempFilePath;
}
$sharedString = null;
@ -147,10 +141,12 @@ final class FileBasedStrategy implements CachingStrategyInterface
/**
* Destroys the cache, freeing memory and removing any created artifacts.
*/
public function clearCache(): void
public function clearCache()
{
if ($this->tempFolder) {
$this->fileSystemHelper->deleteFolderRecursively($this->tempFolder);
}
}
/**
* Returns the path for the temp file that should contain the string for the given index.
@ -159,25 +155,33 @@ final class FileBasedStrategy implements CachingStrategyInterface
*
* @return string The temp file path for the given index
*/
private function getSharedStringTempFilePath(int $sharedStringIndex): string
protected function getSharedStringTempFilePath($sharedStringIndex)
{
$numTempFile = (int) ($sharedStringIndex / $this->maxNumStringsPerTempFile);
return $this->tempFolder.\DIRECTORY_SEPARATOR.'sharedstrings'.$numTempFile;
return $this->tempFolder.'/sharedstrings'.$numTempFile;
}
/**
* Escapes the line feed characters (\n).
*
* @param string $unescapedString
*
* @return string
*/
private function escapeLineFeed(string $unescapedString): string
private function escapeLineFeed($unescapedString)
{
return str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString);
}
/**
* Unescapes the line feed characters (\n).
*
* @param string $escapedString
*
* @return string
*/
private function unescapeLineFeed(string $escapedString): string
private function unescapeLineFeed($escapedString)
{
return str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString);
}

View File

@ -1,33 +1,28 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Manager\SharedStringsCaching;
use OpenSpout\Reader\Exception\SharedStringNotFoundException;
use RuntimeException;
use SplFixedArray;
/**
* This class implements the in-memory caching strategy for shared strings.
* This strategy is used when the number of unique strings is low, compared to the memory available.
*
* @internal
*/
final class InMemoryStrategy implements CachingStrategyInterface
class InMemoryStrategy implements CachingStrategyInterface
{
/** @var SplFixedArray<string> Array used to cache the shared strings */
private SplFixedArray $inMemoryCache;
/** @var \SplFixedArray Array used to cache the shared strings */
protected $inMemoryCache;
/** @var bool Whether the cache has been closed */
private bool $isCacheClosed = false;
protected $isCacheClosed;
/**
* @param int $sharedStringsUniqueCount Number of unique shared strings
*/
public function __construct(int $sharedStringsUniqueCount)
public function __construct($sharedStringsUniqueCount)
{
$this->inMemoryCache = new SplFixedArray($sharedStringsUniqueCount);
$this->inMemoryCache = new \SplFixedArray($sharedStringsUniqueCount);
$this->isCacheClosed = false;
}
/**
@ -36,7 +31,7 @@ final class InMemoryStrategy implements CachingStrategyInterface
* @param string $sharedString The string to be added to the cache
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
*/
public function addStringForIndex(string $sharedString, int $sharedStringIndex): void
public function addStringForIndex($sharedString, $sharedStringIndex)
{
if (!$this->isCacheClosed) {
$this->inMemoryCache->offsetSet($sharedStringIndex, $sharedString);
@ -47,7 +42,7 @@ final class InMemoryStrategy implements CachingStrategyInterface
* Closes the cache after the last shared string was added.
* This prevents any additional string from being added to the cache.
*/
public function closeCache(): void
public function closeCache()
{
$this->isCacheClosed = true;
}
@ -57,15 +52,15 @@ final class InMemoryStrategy implements CachingStrategyInterface
*
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
*
* @return string The shared string at the given index
* @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
*
* @throws SharedStringNotFoundException If no shared string found for the given index
* @return string The shared string at the given index
*/
public function getStringAtIndex(int $sharedStringIndex): string
public function getStringAtIndex($sharedStringIndex)
{
try {
return $this->inMemoryCache->offsetGet($sharedStringIndex);
} catch (RuntimeException) {
} catch (\RuntimeException $e) {
throw new SharedStringNotFoundException("Shared string not found for index: {$sharedStringIndex}");
}
}
@ -73,9 +68,9 @@ final class InMemoryStrategy implements CachingStrategyInterface
/**
* Destroys the cache, freeing memory and removing any created artifacts.
*/
public function clearCache(): void
public function clearCache()
{
$this->inMemoryCache = new SplFixedArray(0);
$this->inMemoryCache = new \SplFixedArray(0);
$this->isCacheClosed = false;
}
}

View File

@ -1,69 +1,83 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Manager;
use DOMElement;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Reader\Exception\SharedStringNotFoundException;
use OpenSpout\Reader\Exception\XMLProcessingException;
use OpenSpout\Reader\Wrapper\XMLReader;
use OpenSpout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactoryInterface;
use OpenSpout\Reader\XLSX\Creator\HelperFactory;
use OpenSpout\Reader\XLSX\Creator\InternalEntityFactory;
use OpenSpout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory;
use OpenSpout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyInterface;
use OpenSpout\Reader\XLSX\Options;
/**
* @internal
* This class manages the shared strings defined in the associated XML file.
*/
final class SharedStringsManager
class SharedStringsManager
{
/**
* Definition of XML nodes names used to parse data.
*/
/** Definition of XML nodes names used to parse data */
public const XML_NODE_SST = 'sst';
public const XML_NODE_SI = 'si';
public const XML_NODE_R = 'r';
public const XML_NODE_T = 't';
/**
* Definition of XML attributes used to parse data.
*/
/** Definition of XML attributes used to parse data */
public const XML_ATTRIBUTE_COUNT = 'count';
public const XML_ATTRIBUTE_UNIQUE_COUNT = 'uniqueCount';
public const XML_ATTRIBUTE_XML_SPACE = 'xml:space';
public const XML_ATTRIBUTE_VALUE_PRESERVE = 'preserve';
/** @var string Path of the XLSX file being read */
private readonly string $filePath;
protected $filePath;
private readonly Options $options;
/** @var string Temporary folder where the temporary files to store shared strings will be stored */
protected $tempFolder;
/** @var WorkbookRelationshipsManager Helps retrieving workbook relationships */
private readonly WorkbookRelationshipsManager $workbookRelationshipsManager;
protected $workbookRelationshipsManager;
/** @var CachingStrategyFactoryInterface Factory to create shared strings caching strategies */
private readonly CachingStrategyFactoryInterface $cachingStrategyFactory;
/** @var InternalEntityFactory Factory to create entities */
protected $entityFactory;
/** @var HelperFactory Factory to create helpers */
protected $helperFactory;
/** @var CachingStrategyFactory Factory to create shared strings caching strategies */
protected $cachingStrategyFactory;
/** @var CachingStrategyInterface The best caching strategy for storing shared strings */
private CachingStrategyInterface $cachingStrategy;
protected $cachingStrategy;
/**
* @param string $filePath Path of the XLSX file being read
* @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored
* @param WorkbookRelationshipsManager $workbookRelationshipsManager Helps retrieving workbook relationships
* @param InternalEntityFactory $entityFactory Factory to create entities
* @param HelperFactory $helperFactory Factory to create helpers
* @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies
*/
public function __construct(
string $filePath,
Options $options,
WorkbookRelationshipsManager $workbookRelationshipsManager,
CachingStrategyFactoryInterface $cachingStrategyFactory
$filePath,
$tempFolder,
$workbookRelationshipsManager,
$entityFactory,
$helperFactory,
$cachingStrategyFactory
) {
$this->filePath = $filePath;
$this->options = $options;
$this->tempFolder = $tempFolder;
$this->workbookRelationshipsManager = $workbookRelationshipsManager;
$this->entityFactory = $entityFactory;
$this->helperFactory = $helperFactory;
$this->cachingStrategyFactory = $cachingStrategyFactory;
}
/**
* Returns whether the XLSX file contains a shared strings XML file.
*
* @return bool
*/
public function hasSharedStrings(): bool
public function hasSharedStrings()
{
return $this->workbookRelationshipsManager->hasSharedStringsXMLFile();
}
@ -78,12 +92,12 @@ final class SharedStringsManager
* The XML file can be really big with sheets containing a lot of data. That is why
* we need to use a XML reader that provides streaming like the XMLReader library.
*
* @throws IOException If shared strings XML file can't be read
* @throws \OpenSpout\Common\Exception\IOException If shared strings XML file can't be read
*/
public function extractSharedStrings(): void
public function extractSharedStrings()
{
$sharedStringsXMLFilePath = $this->workbookRelationshipsManager->getSharedStringsXMLFilePath();
$xmlReader = new XMLReader();
$xmlReader = $this->entityFactory->createXMLReader();
$sharedStringIndex = 0;
if (false === $xmlReader->openFileInZip($this->filePath, $sharedStringsXMLFilePath)) {
@ -117,11 +131,11 @@ final class SharedStringsManager
*
* @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file
*
* @return string The shared string at the given index
* @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index
*
* @throws SharedStringNotFoundException If no shared string found for the given index
* @return string The shared string at the given index
*/
public function getStringAtIndex(int $sharedStringIndex): string
public function getStringAtIndex($sharedStringIndex)
{
return $this->cachingStrategy->getStringAtIndex($sharedStringIndex);
}
@ -129,9 +143,9 @@ final class SharedStringsManager
/**
* Destroys the cache, freeing memory and removing any created artifacts.
*/
public function cleanup(): void
public function cleanup()
{
if (isset($this->cachingStrategy)) {
if (null !== $this->cachingStrategy) {
$this->cachingStrategy->clearCache();
}
}
@ -139,13 +153,13 @@ final class SharedStringsManager
/**
* Returns the shared strings unique count, as specified in <sst> tag.
*
* @param XMLReader $xmlReader XMLReader instance
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader instance
*
* @throws \OpenSpout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read
*
* @return null|int Number of unique shared strings in the sharedStrings.xml file
*
* @throws IOException If sharedStrings.xml is invalid and can't be read
*/
private function getSharedStringsUniqueCount(XMLReader $xmlReader): ?int
protected function getSharedStringsUniqueCount($xmlReader)
{
$xmlReader->next(self::XML_NODE_SST);
@ -169,38 +183,37 @@ final class SharedStringsManager
* Returns the best shared strings caching strategy.
*
* @param null|int $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown)
*
* @return CachingStrategyInterface
*/
private function getBestSharedStringsCachingStrategy(?int $sharedStringsUniqueCount): CachingStrategyInterface
protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount)
{
return $this->cachingStrategyFactory
->createBestCachingStrategy($sharedStringsUniqueCount, $this->options->getTempFolder())
->createBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder, $this->helperFactory)
;
}
/**
* Processes the shared strings item XML node which the given XML reader is positioned on.
*
* @param XMLReader $xmlReader XML Reader positioned on a "<si>" node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on a "<si>" node
* @param int $sharedStringIndex Index of the processed shared strings item
*/
private function processSharedStringsItem(XMLReader $xmlReader, int $sharedStringIndex): void
protected function processSharedStringsItem($xmlReader, $sharedStringIndex)
{
$sharedStringValue = '';
// NOTE: expand() will automatically decode all XML entities of the child nodes
/** @var \DOMElement $siNode */
$siNode = $xmlReader->expand();
\assert($siNode instanceof DOMElement);
$textNodes = $siNode->getElementsByTagName(self::XML_NODE_T);
foreach ($textNodes as $textNode) {
if ($this->shouldExtractTextNodeValue($textNode)) {
$textNodeValue = $textNode->nodeValue;
\assert(null !== $textNodeValue);
$shouldPreserveWhitespace = $this->shouldPreserveWhitespace($textNode);
$sharedStringValue .= $shouldPreserveWhitespace
? $textNodeValue
: trim($textNodeValue);
$sharedStringValue .= ($shouldPreserveWhitespace) ? $textNodeValue : trim($textNodeValue);
}
}
@ -212,15 +225,13 @@ final class SharedStringsManager
* Some text nodes are part of a node describing the pronunciation for instance.
* We'll only consider the nodes whose parents are "<si>" or "<r>".
*
* @param DOMElement $textNode Text node to check
* @param \DOMElement $textNode Text node to check
*
* @return bool Whether the given text node's value must be extracted
*/
private function shouldExtractTextNodeValue(DOMElement $textNode): bool
protected function shouldExtractTextNodeValue($textNode)
{
$parentNode = $textNode->parentNode;
\assert(null !== $parentNode);
$parentTagName = $parentNode->localName;
$parentTagName = $textNode->parentNode->localName;
return self::XML_NODE_SI === $parentTagName || self::XML_NODE_R === $parentTagName;
}
@ -228,11 +239,11 @@ final class SharedStringsManager
/**
* If the text node has the attribute 'xml:space="preserve"', then preserve whitespace.
*
* @param DOMElement $textNode The text node element (<t>) whose whitespace may be preserved
* @param \DOMElement $textNode The text node element (<t>) whose whitespace may be preserved
*
* @return bool Whether whitespace should be preserved
*/
private function shouldPreserveWhitespace(DOMElement $textNode): bool
protected function shouldPreserveWhitespace($textNode)
{
$spaceValue = $textNode->getAttribute(self::XML_ATTRIBUTE_XML_SPACE);

View File

@ -1,43 +1,29 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Manager;
use OpenSpout\Common\Helper\Escaper\XLSX;
use OpenSpout\Reader\Common\Manager\RowManager;
use OpenSpout\Reader\Common\Entity\Options;
use OpenSpout\Reader\Common\XMLProcessor;
use OpenSpout\Reader\Wrapper\XMLReader;
use OpenSpout\Reader\XLSX\Helper\CellValueFormatter;
use OpenSpout\Reader\XLSX\Options;
use OpenSpout\Reader\XLSX\RowIterator;
use OpenSpout\Reader\XLSX\Creator\InternalEntityFactory;
use OpenSpout\Reader\XLSX\Sheet;
use OpenSpout\Reader\XLSX\SheetHeaderReader;
use OpenSpout\Reader\XLSX\SheetMergeCellsReader;
/**
* @internal
* This class manages XLSX sheets.
*/
final class SheetManager
class SheetManager
{
/**
* Paths of XML files relative to the XLSX file root.
*/
/** Paths of XML files relative to the XLSX file root */
public const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels';
public const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml';
/**
* Definition of XML node names used to parse data.
*/
/** Definition of XML node names used to parse data */
public const XML_NODE_WORKBOOK_PROPERTIES = 'workbookPr';
public const XML_NODE_WORKBOOK_VIEW = 'workbookView';
public const XML_NODE_SHEET = 'sheet';
public const XML_NODE_SHEETS = 'sheets';
public const XML_NODE_RELATIONSHIP = 'Relationship';
/**
* Definition of XML attributes used to parse data.
*/
/** Definition of XML attributes used to parse data */
public const XML_ATTRIBUTE_DATE_1904 = 'date1904';
public const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab';
public const XML_ATTRIBUTE_R_ID = 'r:id';
@ -46,41 +32,51 @@ final class SheetManager
public const XML_ATTRIBUTE_ID = 'Id';
public const XML_ATTRIBUTE_TARGET = 'Target';
/**
* State value to represent a hidden sheet.
*/
/** State value to represent a hidden sheet */
public const SHEET_STATE_HIDDEN = 'hidden';
/** @var string Path of the XLSX file being read */
private readonly string $filePath;
protected $filePath;
private readonly Options $options;
/** @var \OpenSpout\Common\Manager\OptionsManagerInterface Reader's options manager */
protected $optionsManager;
/** @var SharedStringsManager Manages shared strings */
private readonly SharedStringsManager $sharedStringsManager;
/** @var \OpenSpout\Reader\XLSX\Manager\SharedStringsManager Manages shared strings */
protected $sharedStringsManager;
/** @var XLSX Used to unescape XML data */
private readonly XLSX $escaper;
/** @var \OpenSpout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */
protected $globalFunctionsHelper;
/** @var Sheet[] List of sheets */
private array $sheets;
/** @var InternalEntityFactory Factory to create entities */
protected $entityFactory;
/** @var \OpenSpout\Common\Helper\Escaper\XLSX Used to unescape XML data */
protected $escaper;
/** @var array List of sheets */
protected $sheets;
/** @var int Index of the sheet currently read */
private int $currentSheetIndex;
protected $currentSheetIndex;
/** @var int Index of the active sheet (0 by default) */
private int $activeSheetIndex;
protected $activeSheetIndex;
public function __construct(
string $filePath,
Options $options,
SharedStringsManager $sharedStringsManager,
XLSX $escaper
) {
/**
* @param string $filePath Path of the XLSX file being read
* @param \OpenSpout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager
* @param \OpenSpout\Reader\XLSX\Manager\SharedStringsManager $sharedStringsManager Manages shared strings
* @param \OpenSpout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data
* @param InternalEntityFactory $entityFactory Factory to create entities
* @param mixed $sharedStringsManager
*/
public function __construct($filePath, $optionsManager, $sharedStringsManager, $escaper, $entityFactory)
{
$this->filePath = $filePath;
$this->options = $options;
$this->optionsManager = $optionsManager;
$this->sharedStringsManager = $sharedStringsManager;
$this->escaper = $escaper;
$this->entityFactory = $entityFactory;
}
/**
@ -89,14 +85,14 @@ final class SheetManager
*
* @return Sheet[] Sheets within the XLSX file
*/
public function getSheets(): array
public function getSheets()
{
$this->sheets = [];
$this->currentSheetIndex = 0;
$this->activeSheetIndex = 0; // By default, the first sheet is active
$xmlReader = new XMLReader();
$xmlProcessor = new XMLProcessor($xmlReader);
$xmlReader = $this->entityFactory->createXMLReader();
$xmlProcessor = $this->entityFactory->createXMLProcessor($xmlReader);
$xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_PROPERTIES, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookPropertiesStartingNode']);
$xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_VIEW, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookViewStartingNode']);
@ -112,26 +108,26 @@ final class SheetManager
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookPr>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<workbookPr>" starting node
*
* @return int A return code that indicates what action should the processor take next
*/
private function processWorkbookPropertiesStartingNode(XMLReader $xmlReader): int
protected function processWorkbookPropertiesStartingNode($xmlReader)
{
// Using "filter_var($x, FILTER_VALIDATE_BOOLEAN)" here because the value of the "date1904" attribute
// may be the string "false", that is not mapped to the boolean "false" by default...
$shouldUse1904Dates = filter_var($xmlReader->getAttribute(self::XML_ATTRIBUTE_DATE_1904), FILTER_VALIDATE_BOOLEAN);
$this->options->SHOULD_USE_1904_DATES = $shouldUse1904Dates;
$this->optionsManager->setOption(Options::SHOULD_USE_1904_DATES, $shouldUse1904Dates);
return XMLProcessor::PROCESSING_CONTINUE;
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<workbookView>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<workbookView>" starting node
*
* @return int A return code that indicates what action should the processor take next
*/
private function processWorkbookViewStartingNode(XMLReader $xmlReader): int
protected function processWorkbookViewStartingNode($xmlReader)
{
// The "workbookView" node is located before "sheet" nodes, ensuring that
// the active sheet is known before parsing sheets data.
@ -141,11 +137,11 @@ final class SheetManager
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<sheet>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<sheet>" starting node
*
* @return int A return code that indicates what action should the processor take next
*/
private function processSheetStartingNode(XMLReader $xmlReader): int
protected function processSheetStartingNode($xmlReader)
{
$isSheetActive = ($this->currentSheetIndex === $this->activeSheetIndex);
$this->sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $this->currentSheetIndex, $isSheetActive);
@ -157,7 +153,7 @@ final class SheetManager
/**
* @return int A return code that indicates what action should the processor take next
*/
private function processSheetsEndingNode(): int
protected function processSheetsEndingNode()
{
return XMLProcessor::PROCESSING_STOP;
}
@ -167,44 +163,33 @@ final class SheetManager
* We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID
* ("r:id" in "workbook.xml", "Id" in "workbook.xml.res").
*
* @param XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml"
* @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based)
* @param bool $isSheetActive Whether this sheet was defined as active
*
* @return Sheet Sheet instance
* @return \OpenSpout\Reader\XLSX\Sheet Sheet instance
*/
private function getSheetFromSheetXMLNode(XMLReader $xmlReaderOnSheetNode, int $sheetIndexZeroBased, bool $isSheetActive): Sheet
protected function getSheetFromSheetXMLNode($xmlReaderOnSheetNode, $sheetIndexZeroBased, $isSheetActive)
{
$sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID);
\assert(null !== $sheetId);
$sheetState = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_STATE);
$isSheetVisible = (self::SHEET_STATE_HIDDEN !== $sheetState);
$escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME);
\assert(null !== $escapedSheetName);
$sheetName = $this->escaper->unescape($escapedSheetName);
$sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId);
$mergeCells = [];
if ($this->options->SHOULD_LOAD_MERGE_CELLS) {
$mergeCells = (new SheetMergeCellsReader(
return $this->entityFactory->createSheet(
$this->filePath,
$sheetDataXMLFilePath,
$xmlReader = new XMLReader(),
new XMLProcessor($xmlReader)
))->getMergeCells();
}
return new Sheet(
$this->createRowIterator($this->filePath, $sheetDataXMLFilePath, $this->options, $this->sharedStringsManager),
$this->createSheetHeaderReader($this->filePath, $sheetDataXMLFilePath),
$sheetIndexZeroBased,
$sheetName,
$isSheetActive,
$isSheetVisible,
$mergeCells
$this->optionsManager,
$this->sharedStringsManager
);
}
@ -213,12 +198,12 @@ final class SheetManager
*
* @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID
*/
private function getSheetDataXMLFilePathForSheetId(string $sheetId): string
protected function getSheetDataXMLFilePathForSheetId($sheetId)
{
$sheetDataXMLFilePath = '';
// find the file path of the sheet, by looking at the "workbook.xml.res" file
$xmlReader = new XMLReader();
$xmlReader = $this->entityFactory->createXMLReader();
if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) {
@ -228,10 +213,9 @@ final class SheetManager
// In workbook.xml.rels, it is only "worksheets/sheet1.xml"
// In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml"
$sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
\assert(null !== $sheetDataXMLFilePath);
// sometimes, the sheet data file path already contains "/xl/"...
if (!str_starts_with($sheetDataXMLFilePath, '/xl/')) {
if (0 !== strpos($sheetDataXMLFilePath, '/xl/')) {
$sheetDataXMLFilePath = '/xl/'.$sheetDataXMLFilePath;
break;
@ -245,51 +229,4 @@ final class SheetManager
return $sheetDataXMLFilePath;
}
private function createRowIterator(
string $filePath,
string $sheetDataXMLFilePath,
Options $options,
SharedStringsManager $sharedStringsManager
): RowIterator {
$workbookRelationshipsManager = new WorkbookRelationshipsManager($filePath);
$styleManager = new StyleManager(
$filePath,
$workbookRelationshipsManager->hasStylesXMLFile()
? $workbookRelationshipsManager->getStylesXMLFilePath()
: null
);
$cellValueFormatter = new CellValueFormatter(
$sharedStringsManager,
$styleManager,
$options->SHOULD_FORMAT_DATES,
$options->SHOULD_USE_1904_DATES,
new XLSX()
);
return new RowIterator(
$filePath,
$sheetDataXMLFilePath,
$options->SHOULD_PRESERVE_EMPTY_ROWS,
$xmlReader = new XMLReader(),
new XMLProcessor($xmlReader),
$cellValueFormatter,
new RowManager()
);
}
private function createSheetHeaderReader(
string $filePath,
string $sheetDataXMLFilePath
): SheetHeaderReader {
$xmlReader = new XMLReader();
return new SheetHeaderReader(
$filePath,
$sheetDataXMLFilePath,
$xmlReader,
new XMLProcessor($xmlReader)
);
}
}

View File

@ -1,42 +1,36 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Manager;
use OpenSpout\Reader\Wrapper\XMLReader;
use OpenSpout\Reader\XLSX\Creator\InternalEntityFactory;
class StyleManager implements StyleManagerInterface
/**
* This class manages XLSX styles.
*/
class StyleManager
{
/**
* Nodes used to find relevant information in the styles XML file.
*/
final public const XML_NODE_NUM_FMTS = 'numFmts';
final public const XML_NODE_NUM_FMT = 'numFmt';
final public const XML_NODE_CELL_XFS = 'cellXfs';
final public const XML_NODE_XF = 'xf';
/** Nodes used to find relevant information in the styles XML file */
public const XML_NODE_NUM_FMTS = 'numFmts';
public const XML_NODE_NUM_FMT = 'numFmt';
public const XML_NODE_CELL_XFS = 'cellXfs';
public const XML_NODE_XF = 'xf';
/** Attributes used to find relevant information in the styles XML file */
public const XML_ATTRIBUTE_NUM_FMT_ID = 'numFmtId';
public const XML_ATTRIBUTE_FORMAT_CODE = 'formatCode';
public const XML_ATTRIBUTE_APPLY_NUMBER_FORMAT = 'applyNumberFormat';
/** By convention, default style ID is 0 */
public const DEFAULT_STYLE_ID = 0;
public const NUMBER_FORMAT_GENERAL = 'General';
/**
* Attributes used to find relevant information in the styles XML file.
*/
final public const XML_ATTRIBUTE_NUM_FMT_ID = 'numFmtId';
final public const XML_ATTRIBUTE_FORMAT_CODE = 'formatCode';
final public const XML_ATTRIBUTE_APPLY_NUMBER_FORMAT = 'applyNumberFormat';
final public const XML_ATTRIBUTE_COUNT = 'count';
/**
* By convention, default style ID is 0.
*/
final public const DEFAULT_STYLE_ID = 0;
final public const NUMBER_FORMAT_GENERAL = 'General';
/**
* Mapping between built-in numFmtId and the associated format - for dates only.
*
* @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx
*
* @var array Mapping between built-in numFmtId and the associated format - for dates only
*/
private const builtinNumFmtIdToNumFormatMapping = [
protected static $builtinNumFmtIdToNumFormatMapping = [
14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy'
15 => 'd-mmm-yy',
16 => 'd-mmm',
@ -52,32 +46,56 @@ class StyleManager implements StyleManagerInterface
];
/** @var string Path of the XLSX file being read */
private readonly string $filePath;
protected $filePath;
/** @var bool Whether the XLSX file contains a styles XML file */
protected $hasStylesXMLFile;
/** @var null|string Path of the styles XML file */
private readonly ?string $stylesXMLFilePath;
protected $stylesXMLFilePath;
/** @var array<int, string> Array containing a mapping NUM_FMT_ID => FORMAT_CODE */
private array $customNumberFormats;
/** @var InternalEntityFactory Factory to create entities */
protected $entityFactory;
/** @var array<array-key, array<string, null|bool|int>> Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */
private array $stylesAttributes;
/** @var array Array containing the IDs of built-in number formats indicating a date */
protected $builtinNumFmtIdIndicatingDates;
/** @var array<int, bool> Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */
private array $numFmtIdToIsDateFormatCache = [];
/** @var null|array Array containing a mapping NUM_FMT_ID => FORMAT_CODE */
protected $customNumberFormats;
/** @var null|array Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */
protected $stylesAttributes;
/** @var array Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */
protected $numFmtIdToIsDateFormatCache = [];
/**
* @param string $filePath Path of the XLSX file being read
* @param WorkbookRelationshipsManager $workbookRelationshipsManager Helps retrieving workbook relationships
* @param InternalEntityFactory $entityFactory Factory to create entities
*/
public function __construct(string $filePath, ?string $stylesXMLFilePath)
public function __construct($filePath, $workbookRelationshipsManager, $entityFactory)
{
$this->filePath = $filePath;
$this->stylesXMLFilePath = $stylesXMLFilePath;
$this->entityFactory = $entityFactory;
$this->builtinNumFmtIdIndicatingDates = array_keys(self::$builtinNumFmtIdToNumFormatMapping);
$this->hasStylesXMLFile = $workbookRelationshipsManager->hasStylesXMLFile();
if ($this->hasStylesXMLFile) {
$this->stylesXMLFilePath = $workbookRelationshipsManager->getStylesXMLFilePath();
}
}
public function shouldFormatNumericValueAsDate(int $styleId): bool
/**
* Returns whether the style with the given ID should consider
* numeric values as timestamps and format the cell as a date.
*
* @param int $styleId Zero-based style ID
*
* @return bool Whether the cell with the given cell should display a date instead of a numeric value
*/
public function shouldFormatNumericValueAsDate($styleId)
{
if (null === $this->stylesXMLFilePath) {
if (!$this->hasStylesXMLFile) {
return false;
}
@ -95,65 +113,43 @@ class StyleManager implements StyleManagerInterface
return $this->doesStyleIndicateDate($styleAttributes);
}
public function getNumberFormatCode(int $styleId): string
/**
* Returns the format as defined in "styles.xml" of the given style.
* NOTE: It is assumed that the style DOES have a number format associated to it.
*
* @param int $styleId Zero-based style ID
*
* @return string The number format code associated with the given style
*/
public function getNumberFormatCode($styleId)
{
if (null === $this->stylesXMLFilePath) {
return '';
}
$stylesAttributes = $this->getStylesAttributes();
$styleAttributes = $stylesAttributes[$styleId];
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
\assert(\is_int($numFmtId));
if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) {
$numberFormatCode = self::builtinNumFmtIdToNumFormatMapping[$numFmtId];
$numberFormatCode = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId];
} else {
$customNumberFormats = $this->getCustomNumberFormats();
$numberFormatCode = $customNumberFormats[$numFmtId] ?? '';
$numberFormatCode = $customNumberFormats[$numFmtId];
}
return $numberFormatCode;
}
/**
* @return array<int, string> The custom number formats
*/
protected function getCustomNumberFormats(): array
{
if (!isset($this->customNumberFormats)) {
$this->extractRelevantInfo();
}
return $this->customNumberFormats;
}
/**
* @return array<array-key, array<string, null|bool|int>> The styles attributes
*/
protected function getStylesAttributes(): array
{
if (!isset($this->stylesAttributes)) {
$this->extractRelevantInfo();
}
return $this->stylesAttributes;
}
/**
* Reads the styles.xml file and extract the relevant information from the file.
*/
private function extractRelevantInfo(): void
protected function extractRelevantInfo()
{
$this->customNumberFormats = [];
$this->stylesAttributes = [];
$xmlReader = new XMLReader();
$xmlReader = $this->entityFactory->createXMLReader();
if ($xmlReader->openFileInZip($this->filePath, $this->stylesXMLFilePath)) {
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)
&& '0' !== $xmlReader->getAttribute(self::XML_ATTRIBUTE_COUNT)) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) {
$this->extractNumberFormats($xmlReader);
} elseif ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) {
$this->extractStyleAttributes($xmlReader);
@ -169,15 +165,14 @@ class StyleManager implements StyleManagerInterface
* For simplicity, the styles attributes are kept in memory. This is possible thanks
* to the reuse of formats. So 1 million cells should not use 1 million formats.
*
* @param XMLReader $xmlReader XML Reader positioned on the "numFmts" node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "numFmts" node
*/
private function extractNumberFormats(XMLReader $xmlReader): void
protected function extractNumberFormats($xmlReader)
{
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMT)) {
$numFmtId = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID);
$numFmtId = (int) ($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID));
$formatCode = $xmlReader->getAttribute(self::XML_ATTRIBUTE_FORMAT_CODE);
\assert(null !== $formatCode);
$this->customNumberFormats[$numFmtId] = $formatCode;
} elseif ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_NUM_FMTS)) {
// Once done reading "numFmts" node's children
@ -191,9 +186,9 @@ class StyleManager implements StyleManagerInterface
* For simplicity, the styles attributes are kept in memory. This is possible thanks
* to the reuse of styles. So 1 million cells should not use 1 million styles.
*
* @param XMLReader $xmlReader XML Reader positioned on the "cellXfs" node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "cellXfs" node
*/
private function extractStyleAttributes(XMLReader $xmlReader): void
protected function extractStyleAttributes($xmlReader)
{
while ($xmlReader->read()) {
if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) {
@ -215,11 +210,35 @@ class StyleManager implements StyleManagerInterface
}
/**
* @param array<string, null|bool|int> $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId")
* @return array The custom number formats
*/
protected function getCustomNumberFormats()
{
if (!isset($this->customNumberFormats)) {
$this->extractRelevantInfo();
}
return $this->customNumberFormats;
}
/**
* @return array The styles attributes
*/
protected function getStylesAttributes()
{
if (!isset($this->stylesAttributes)) {
$this->extractRelevantInfo();
}
return $this->stylesAttributes;
}
/**
* @param array $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId")
*
* @return bool Whether the style with the given attributes indicates that the number is a date
*/
private function doesStyleIndicateDate(array $styleAttributes): bool
protected function doesStyleIndicateDate($styleAttributes)
{
$applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT];
$numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID];
@ -229,7 +248,7 @@ class StyleManager implements StyleManagerInterface
// - "numFmtId" attribute set
// This is a preliminary check, as having "numFmtId" set just means the style should apply a specific number format,
// but this is not necessarily a date.
if (false === $applyNumberFormat || !\is_int($numFmtId)) {
if (false === $applyNumberFormat || null === $numFmtId) {
return false;
}
@ -241,9 +260,11 @@ class StyleManager implements StyleManagerInterface
* The result is cached to avoid recomputing the same thing over and over, as
* "numFmtId" attributes can be shared between multiple styles.
*
* @param int $numFmtId
*
* @return bool Whether the number format ID indicates that the number is a date
*/
private function doesNumFmtIdIndicateDate(int $numFmtId): bool
protected function doesNumFmtIdIndicateDate($numFmtId)
{
if (!isset($this->numFmtIdToIsDateFormatCache[$numFmtId])) {
$formatCode = $this->getFormatCodeForNumFmtId($numFmtId);
@ -258,28 +279,34 @@ class StyleManager implements StyleManagerInterface
}
/**
* @param int $numFmtId
*
* @return null|string The custom number format or NULL if none defined for the given numFmtId
*/
private function getFormatCodeForNumFmtId(int $numFmtId): ?string
protected function getFormatCodeForNumFmtId($numFmtId)
{
$customNumberFormats = $this->getCustomNumberFormats();
// Using isset here because it is way faster than array_key_exists...
return $customNumberFormats[$numFmtId] ?? null;
return (isset($customNumberFormats[$numFmtId])) ? $customNumberFormats[$numFmtId] : null;
}
/**
* @param int $numFmtId
*
* @return bool Whether the number format ID indicates that the number is a date
*/
private function isNumFmtIdBuiltInDateFormat(int $numFmtId): bool
protected function isNumFmtIdBuiltInDateFormat($numFmtId)
{
return \array_key_exists($numFmtId, self::builtinNumFmtIdToNumFormatMapping);
return \in_array($numFmtId, $this->builtinNumFmtIdIndicatingDates, true);
}
/**
* @param null|string $formatCode
*
* @return bool Whether the given format code indicates that the number is a date
*/
private function isFormatCodeCustomDateFormat(?string $formatCode): bool
protected function isFormatCodeCustomDateFormat($formatCode)
{
// if no associated format code or if using the default "General" format
if (null === $formatCode || 0 === strcasecmp($formatCode, self::NUMBER_FORMAT_GENERAL)) {
@ -290,18 +317,15 @@ class StyleManager implements StyleManagerInterface
}
/**
* @param string $formatCode
*
* @return bool Whether the given format code matches a date format pattern
*/
private function isFormatCodeMatchingDateFormatPattern(string $formatCode): bool
protected function isFormatCodeMatchingDateFormatPattern($formatCode)
{
// Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\")
$pattern = '((?<!\\\)\[.+?(?<!\\\)\])';
$formatCode = preg_replace($pattern, '', $formatCode);
\assert(null !== $formatCode);
// Remove strings in double quotes, as they won't be interpreted as date format characters
$formatCode = preg_replace('/"[^"]+"/', '', $formatCode);
\assert(null !== $formatCode);
// custom date formats contain specific characters to represent the date:
// e - yy - m - d - h - s
@ -313,7 +337,7 @@ class StyleManager implements StyleManagerInterface
// character not preceded by "\" (case insensitive)
$pattern = '/(?<!\\\)'.$dateFormatCharacter.'/i';
if (1 === preg_match($pattern, $formatCode)) {
if (preg_match($pattern, $formatCode)) {
$hasFoundDateFormatCharacter = true;
break;

View File

@ -1,64 +1,62 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX\Manager;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Reader\Wrapper\XMLReader;
use OpenSpout\Reader\XLSX\Creator\InternalEntityFactory;
/**
* @internal
* This class manages the workbook relationships defined in the associated XML file.
*/
final class WorkbookRelationshipsManager
class WorkbookRelationshipsManager
{
public const BASE_PATH = 'xl/';
/**
* Path of workbook relationships XML file inside the XLSX file.
*/
/** Path of workbook relationships XML file inside the XLSX file */
public const WORKBOOK_RELS_XML_FILE_PATH = 'xl/_rels/workbook.xml.rels';
/**
* Relationships types - For Transitional and Strict OOXML.
*/
/** Relationships types - For Transitional and Strict OOXML */
public const RELATIONSHIP_TYPE_SHARED_STRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
public const RELATIONSHIP_TYPE_STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles';
public const RELATIONSHIP_TYPE_SHARED_STRINGS_STRICT = 'http://purl.oclc.org/ooxml/officeDocument/relationships/sharedStrings';
public const RELATIONSHIP_TYPE_STYLES_STRICT = 'http://purl.oclc.org/ooxml/officeDocument/relationships/styles';
/**
* Nodes and attributes used to find relevant information in the workbook relationships XML file.
*/
/** Nodes and attributes used to find relevant information in the workbook relationships XML file */
public const XML_NODE_RELATIONSHIP = 'Relationship';
public const XML_ATTRIBUTE_TYPE = 'Type';
public const XML_ATTRIBUTE_TARGET = 'Target';
/** @var string Path of the XLSX file being read */
private readonly string $filePath;
private $filePath;
/** @var array<string, string> Cache of the already read workbook relationships: [TYPE] => [FILE_NAME] */
private array $cachedWorkbookRelationships;
/** @var InternalEntityFactory Factory to create entities */
private $entityFactory;
/** @var null|array Cache of the already read workbook relationships: [TYPE] => [FILE_NAME] */
private $cachedWorkbookRelationships;
/**
* @param string $filePath Path of the XLSX file being read
* @param InternalEntityFactory $entityFactory Factory to create entities
*/
public function __construct(string $filePath)
public function __construct($filePath, $entityFactory)
{
$this->filePath = $filePath;
$this->entityFactory = $entityFactory;
}
/**
* @return string The path of the shared string XML file
*/
public function getSharedStringsXMLFilePath(): string
public function getSharedStringsXMLFilePath()
{
$workbookRelationships = $this->getWorkbookRelationships();
$sharedStringsXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS]
?? $workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS_STRICT];
// the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml")
$doesContainBasePath = str_contains($sharedStringsXMLFilePath, self::BASE_PATH);
$doesContainBasePath = (false !== strpos($sharedStringsXMLFilePath, self::BASE_PATH));
if (!$doesContainBasePath) {
// make sure we return an absolute file path
$sharedStringsXMLFilePath = self::BASE_PATH.$sharedStringsXMLFilePath;
@ -70,7 +68,7 @@ final class WorkbookRelationshipsManager
/**
* @return bool Whether the XLSX file contains a shared string XML file
*/
public function hasSharedStringsXMLFile(): bool
public function hasSharedStringsXMLFile()
{
$workbookRelationships = $this->getWorkbookRelationships();
@ -81,7 +79,7 @@ final class WorkbookRelationshipsManager
/**
* @return bool Whether the XLSX file contains a styles XML file
*/
public function hasStylesXMLFile(): bool
public function hasStylesXMLFile()
{
$workbookRelationships = $this->getWorkbookRelationships();
@ -92,14 +90,14 @@ final class WorkbookRelationshipsManager
/**
* @return string The path of the styles XML file
*/
public function getStylesXMLFilePath(): string
public function getStylesXMLFilePath()
{
$workbookRelationships = $this->getWorkbookRelationships();
$stylesXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_STYLES]
?? $workbookRelationships[self::RELATIONSHIP_TYPE_STYLES_STRICT];
// the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml")
$doesContainBasePath = str_contains($stylesXMLFilePath, self::BASE_PATH);
$doesContainBasePath = (false !== strpos($stylesXMLFilePath, self::BASE_PATH));
if (!$doesContainBasePath) {
// make sure we return a full path
$stylesXMLFilePath = self::BASE_PATH.$stylesXMLFilePath;
@ -112,14 +110,14 @@ final class WorkbookRelationshipsManager
* Reads the workbook.xml.rels and extracts the filename associated to the different types.
* It caches the result so that the file is read only once.
*
* @return array<string, string>
* @throws \OpenSpout\Common\Exception\IOException If workbook.xml.rels can't be read
*
* @throws IOException If workbook.xml.rels can't be read
* @return array
*/
private function getWorkbookRelationships(): array
private function getWorkbookRelationships()
{
if (!isset($this->cachedWorkbookRelationships)) {
$xmlReader = new XMLReader();
$xmlReader = $this->entityFactory->createXMLReader();
if (false === $xmlReader->openFileInZip($this->filePath, self::WORKBOOK_RELS_XML_FILE_PATH)) {
throw new IOException('Could not open "'.self::WORKBOOK_RELS_XML_FILE_PATH.'".');
@ -137,12 +135,13 @@ final class WorkbookRelationshipsManager
/**
* Extracts and store the data of the current workbook relationship.
*
* @param XMLReader $xmlReader
*/
private function processWorkbookRelationship(XMLReader $xmlReader): void
private function processWorkbookRelationship($xmlReader)
{
$type = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TYPE);
$target = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET);
\assert(null !== $target);
// @NOTE: if a type is defined more than once, we overwrite the previous value
// To be changed if we want to get the file paths of sheet XML files for instance.

View File

@ -0,0 +1,122 @@
<?php
namespace OpenSpout\Reader\XLSX;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Common\Helper\GlobalFunctionsHelper;
use OpenSpout\Common\Manager\OptionsManagerInterface;
use OpenSpout\Reader\Common\Creator\InternalEntityFactoryInterface;
use OpenSpout\Reader\Common\Entity\Options;
use OpenSpout\Reader\ReaderAbstract;
use OpenSpout\Reader\XLSX\Creator\InternalEntityFactory;
use OpenSpout\Reader\XLSX\Creator\ManagerFactory;
/**
* This class provides support to read data from a XLSX file.
*/
class Reader extends ReaderAbstract
{
/** @var ManagerFactory */
protected $managerFactory;
/** @var \ZipArchive */
protected $zip;
/** @var \OpenSpout\Reader\XLSX\Manager\SharedStringsManager Manages shared strings */
protected $sharedStringsManager;
/** @var SheetIterator To iterator over the XLSX sheets */
protected $sheetIterator;
public function __construct(
OptionsManagerInterface $optionsManager,
GlobalFunctionsHelper $globalFunctionsHelper,
InternalEntityFactoryInterface $entityFactory,
ManagerFactory $managerFactory
) {
parent::__construct($optionsManager, $globalFunctionsHelper, $entityFactory);
$this->managerFactory = $managerFactory;
}
/**
* @param string $tempFolder Temporary folder where the temporary files will be created
*
* @return Reader
*/
public function setTempFolder($tempFolder)
{
$this->optionsManager->setOption(Options::TEMP_FOLDER, $tempFolder);
return $this;
}
/**
* Returns whether stream wrappers are supported.
*
* @return bool
*/
protected function doesSupportStreamWrapper()
{
return false;
}
/**
* Opens the file at the given file path to make it ready to be read.
* It also parses the sharedStrings.xml file to get all the shared strings available in memory
* and fetches all the available sheets.
*
* @param string $filePath Path of the file to be read
*
* @throws \OpenSpout\Common\Exception\IOException If the file at the given path or its content cannot be read
* @throws \OpenSpout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
protected function openReader($filePath)
{
/** @var InternalEntityFactory $entityFactory */
$entityFactory = $this->entityFactory;
$this->zip = $entityFactory->createZipArchive();
if (true === $this->zip->open($filePath)) {
$tempFolder = $this->optionsManager->getOption(Options::TEMP_FOLDER);
$this->sharedStringsManager = $this->managerFactory->createSharedStringsManager($filePath, $tempFolder, $entityFactory);
if ($this->sharedStringsManager->hasSharedStrings()) {
// Extracts all the strings from the sheets for easy access in the future
$this->sharedStringsManager->extractSharedStrings();
}
$this->sheetIterator = $entityFactory->createSheetIterator(
$filePath,
$this->optionsManager,
$this->sharedStringsManager
);
} else {
throw new IOException("Could not open {$filePath} for reading.");
}
}
/**
* Returns an iterator to iterate over sheets.
*
* @return SheetIterator To iterate over sheets
*/
protected function getConcreteSheetIterator()
{
return $this->sheetIterator;
}
/**
* Closes the reader. To be used after reading the file.
*/
protected function closeReader()
{
if (null !== $this->zip) {
$this->zip->close();
}
if (null !== $this->sharedStringsManager) {
$this->sharedStringsManager->cleanup();
}
}
}

View File

@ -1,88 +1,85 @@
<?php
declare(strict_types=1);
namespace OpenSpout\Reader\XLSX;
use DOMElement;
use OpenSpout\Common\Entity\Cell;
use OpenSpout\Common\Entity\Row;
use OpenSpout\Common\Exception\InvalidArgumentException;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Reader\Common\Manager\RowManager;
use OpenSpout\Reader\Common\XMLProcessor;
use OpenSpout\Reader\Exception\SharedStringNotFoundException;
use OpenSpout\Reader\RowIteratorInterface;
use OpenSpout\Reader\Exception\InvalidValueException;
use OpenSpout\Reader\Exception\XMLProcessingException;
use OpenSpout\Reader\IteratorInterface;
use OpenSpout\Reader\Wrapper\XMLReader;
use OpenSpout\Reader\XLSX\Creator\InternalEntityFactory;
use OpenSpout\Reader\XLSX\Helper\CellHelper;
use OpenSpout\Reader\XLSX\Helper\CellValueFormatter;
final class RowIterator implements RowIteratorInterface
class RowIterator implements IteratorInterface
{
/**
* Definition of XML nodes names used to parse data.
*/
/** Definition of XML nodes names used to parse data */
public const XML_NODE_DIMENSION = 'dimension';
public const XML_NODE_WORKSHEET = 'worksheet';
public const XML_NODE_ROW = 'row';
public const XML_NODE_CELL = 'c';
/**
* Definition of XML attributes used to parse data.
*/
/** Definition of XML attributes used to parse data */
public const XML_ATTRIBUTE_REF = 'ref';
public const XML_ATTRIBUTE_SPANS = 'spans';
public const XML_ATTRIBUTE_ROW_INDEX = 'r';
public const XML_ATTRIBUTE_CELL_INDEX = 'r';
/** @var string Path of the XLSX file being read */
private readonly string $filePath;
protected $filePath;
/** @var string Path of the sheet data XML file as in [Content_Types].xml */
private readonly string $sheetDataXMLFilePath;
protected $sheetDataXMLFilePath;
/** @var XMLReader The XMLReader object that will help read sheet's XML data */
private readonly XMLReader $xmlReader;
/** @var \OpenSpout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */
protected $xmlReader;
/** @var XMLProcessor Helper Object to process XML nodes */
private readonly XMLProcessor $xmlProcessor;
/** @var \OpenSpout\Reader\Common\XMLProcessor Helper Object to process XML nodes */
protected $xmlProcessor;
/** @var CellValueFormatter Helper to format cell values */
private readonly CellValueFormatter $cellValueFormatter;
/** @var Helper\CellValueFormatter Helper to format cell values */
protected $cellValueFormatter;
/** @var RowManager Manages rows */
private readonly RowManager $rowManager;
/** @var \OpenSpout\Reader\Common\Manager\RowManager Manages rows */
protected $rowManager;
/** @var \OpenSpout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */
protected $entityFactory;
/**
* TODO: This variable can be deleted when row indices get preserved.
*
* @var int Number of read rows
*/
private int $numReadRows = 0;
protected $numReadRows = 0;
/** @var Row Contains the row currently processed */
private Row $currentlyProcessedRow;
protected $currentlyProcessedRow;
/** @var null|Row Buffer used to store the current row, while checking if there are more rows to read */
private ?Row $rowBuffer = null;
protected $rowBuffer;
/** @var bool Indicates whether all rows have been read */
private bool $hasReachedEndOfFile = false;
protected $hasReachedEndOfFile = false;
/** @var int The number of columns the sheet has (0 meaning undefined) */
private int $numColumns = 0;
protected $numColumns = 0;
/** @var bool Whether empty rows should be returned or skipped */
private readonly bool $shouldPreserveEmptyRows;
protected $shouldPreserveEmptyRows;
/** @var int Last row index processed (one-based) */
private int $lastRowIndexProcessed = 0;
protected $lastRowIndexProcessed = 0;
/** @var int Row index to be processed next (one-based) */
private int $nextRowIndexToBeProcessed = 0;
protected $nextRowIndexToBeProcessed = 0;
/** @var int Last column index processed (zero-based) */
private int $lastColumnIndexProcessed = -1;
protected $lastColumnIndexProcessed = -1;
/**
* @param string $filePath Path of the XLSX file being read
@ -92,15 +89,17 @@ final class RowIterator implements RowIteratorInterface
* @param XMLProcessor $xmlProcessor Helper to process XML files
* @param CellValueFormatter $cellValueFormatter Helper to format cell values
* @param RowManager $rowManager Manages rows
* @param InternalEntityFactory $entityFactory Factory to create entities
*/
public function __construct(
string $filePath,
string $sheetDataXMLFilePath,
bool $shouldPreserveEmptyRows,
XMLReader $xmlReader,
$filePath,
$sheetDataXMLFilePath,
$shouldPreserveEmptyRows,
$xmlReader,
XMLProcessor $xmlProcessor,
CellValueFormatter $cellValueFormatter,
RowManager $rowManager
RowManager $rowManager,
InternalEntityFactory $entityFactory
) {
$this->filePath = $filePath;
$this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath);
@ -108,6 +107,7 @@ final class RowIterator implements RowIteratorInterface
$this->xmlReader = $xmlReader;
$this->cellValueFormatter = $cellValueFormatter;
$this->rowManager = $rowManager;
$this->entityFactory = $entityFactory;
// Register all callbacks to process different nodes when reading the XML file
$this->xmlProcessor = $xmlProcessor;
@ -125,8 +125,9 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.rewind.php
*
* @throws IOException If the sheet data XML cannot be read
* @throws \OpenSpout\Common\Exception\IOException If the sheet data XML cannot be read
*/
#[\ReturnTypeWillChange]
public function rewind(): void
{
$this->xmlReader->close();
@ -150,14 +151,10 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.valid.php
*/
#[\ReturnTypeWillChange]
public function valid(): bool
{
$valid = !$this->hasReachedEndOfFile;
if (!$valid) {
$this->xmlReader->close();
}
return $valid;
return !$this->hasReachedEndOfFile;
}
/**
@ -165,9 +162,10 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.next.php
*
* @throws SharedStringNotFoundException If a shared string was not found
* @throws IOException If unable to read the sheet data XML
* @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
* @throws \OpenSpout\Common\Exception\IOException If unable to read the sheet data XML
*/
#[\ReturnTypeWillChange]
public function next(): void
{
++$this->nextRowIndexToBeProcessed;
@ -182,7 +180,8 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.current.php
*/
public function current(): Row
#[\ReturnTypeWillChange]
public function current(): ?Row
{
$rowToBeProcessed = $this->rowBuffer;
@ -194,12 +193,10 @@ final class RowIterator implements RowIteratorInterface
if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) {
// return empty row if mismatch between last processed row
// and the row that needs to be returned
$rowToBeProcessed = new Row([], null);
$rowToBeProcessed = $this->entityFactory->createRow();
}
}
\assert(null !== $rowToBeProcessed);
return $rowToBeProcessed;
}
@ -208,6 +205,7 @@ final class RowIterator implements RowIteratorInterface
*
* @see http://php.net/manual/en/iterator.key.php
*/
#[\ReturnTypeWillChange]
public function key(): int
{
// TODO: This should return $this->nextRowIndexToBeProcessed
@ -218,13 +216,22 @@ final class RowIterator implements RowIteratorInterface
$this->numReadRows;
}
/**
* Cleans up what was created to iterate over the object.
*/
#[\ReturnTypeWillChange]
public function end(): void
{
$this->xmlReader->close();
}
/**
* @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml
*
* @return string path of the XML file containing the sheet data,
* without the leading slash
*/
private function normalizeSheetDataXMLFilePath(string $sheetDataXMLFilePath): string
protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath)
{
return ltrim($sheetDataXMLFilePath, '/');
}
@ -241,40 +248,44 @@ final class RowIterator implements RowIteratorInterface
*
* @return bool whether we need data for the next row to be processed
*/
private function doesNeedDataForNextRowToBeProcessed(): bool
protected function doesNeedDataForNextRowToBeProcessed()
{
$hasReadAtLeastOneRow = (0 !== $this->lastRowIndexProcessed);
return
!$hasReadAtLeastOneRow
|| !$this->shouldPreserveEmptyRows
|| $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed;
|| $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed
;
}
/**
* @throws SharedStringNotFoundException If a shared string was not found
* @throws IOException If unable to read the sheet data XML
* @throws \OpenSpout\Reader\Exception\SharedStringNotFoundException If a shared string was not found
* @throws \OpenSpout\Common\Exception\IOException If unable to read the sheet data XML
*/
private function readDataForNextRow(): void
protected function readDataForNextRow()
{
$this->currentlyProcessedRow = new Row([], null);
$this->currentlyProcessedRow = $this->entityFactory->createRow();
try {
$this->xmlProcessor->readUntilStopped();
} catch (XMLProcessingException $exception) {
throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]");
}
$this->rowBuffer = $this->currentlyProcessedRow;
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node
*
* @return int A return code that indicates what action should the processor take next
*/
private function processDimensionStartingNode(XMLReader $xmlReader): int
protected function processDimensionStartingNode($xmlReader)
{
// Read dimensions of the sheet
$dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet)
\assert(null !== $dimensionRef);
if (1 === preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
if (preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) {
$this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1;
}
@ -282,11 +293,11 @@ final class RowIterator implements RowIteratorInterface
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node
*
* @return int A return code that indicates what action should the processor take next
*/
private function processRowStartingNode(XMLReader $xmlReader): int
protected function processRowStartingNode($xmlReader)
{
// Reset index of the last processed column
$this->lastColumnIndexProcessed = -1;
@ -297,30 +308,30 @@ final class RowIterator implements RowIteratorInterface
// Read spans info if present
$numberOfColumnsForRow = $this->numColumns;
$spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance
if (null !== $spans && '' !== $spans) {
if ($spans) {
[, $numberOfColumnsForRow] = explode(':', $spans);
$numberOfColumnsForRow = (int) $numberOfColumnsForRow;
}
$cells = array_fill(0, $numberOfColumnsForRow, Cell::fromValue(''));
$cells = array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell(''));
$this->currentlyProcessedRow->setCells($cells);
return XMLProcessor::PROCESSING_CONTINUE;
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node
*
* @return int A return code that indicates what action should the processor take next
*/
private function processCellStartingNode(XMLReader $xmlReader): int
protected function processCellStartingNode($xmlReader)
{
$currentColumnIndex = $this->getColumnIndex($xmlReader);
// NOTE: expand() will automatically decode all XML entities of the child nodes
/** @var \DOMElement $node */
$node = $xmlReader->expand();
\assert($node instanceof DOMElement);
$cell = $this->cellValueFormatter->extractAndFormatNodeValue($node);
$cell = $this->getCell($node);
$this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex);
$this->lastColumnIndexProcessed = $currentColumnIndex;
@ -331,10 +342,10 @@ final class RowIterator implements RowIteratorInterface
/**
* @return int A return code that indicates what action should the processor take next
*/
private function processRowEndingNode(): int
protected function processRowEndingNode()
{
// if the fetched row is empty and we don't want to preserve it..,
if (!$this->shouldPreserveEmptyRows && $this->currentlyProcessedRow->isEmpty()) {
if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) {
// ... skip it
return XMLProcessor::PROCESSING_CONTINUE;
}
@ -343,7 +354,7 @@ final class RowIterator implements RowIteratorInterface
// If needed, we fill the empty cells
if (0 === $this->numColumns) {
$this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow);
$this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow);
}
// at this point, we have all the data we need for the row
@ -354,7 +365,7 @@ final class RowIterator implements RowIteratorInterface
/**
* @return int A return code that indicates what action should the processor take next
*/
private function processWorksheetEndingNode(): int
protected function processWorksheetEndingNode()
{
// The closing "</worksheet>" marks the end of the file
$this->hasReachedEndOfFile = true;
@ -363,13 +374,13 @@ final class RowIterator implements RowIteratorInterface
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node
*
* @throws \OpenSpout\Common\Exception\InvalidArgumentException When the given cell index is invalid
*
* @return int Row index
*
* @throws InvalidArgumentException When the given cell index is invalid
*/
private function getRowIndex(XMLReader $xmlReader): int
protected function getRowIndex($xmlReader)
{
// Get "r" attribute if present (from something like <row r="3"...>
$currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX);
@ -380,13 +391,13 @@ final class RowIterator implements RowIteratorInterface
}
/**
* @param XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
* @param \OpenSpout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node
*
* @throws \OpenSpout\Common\Exception\InvalidArgumentException When the given cell index is invalid
*
* @return int Column index
*
* @throws InvalidArgumentException When the given cell index is invalid
*/
private function getColumnIndex(XMLReader $xmlReader): int
protected function getColumnIndex($xmlReader)
{
// Get "r" attribute if present (from something like <c r="A1"...>
$currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX);
@ -395,4 +406,24 @@ final class RowIterator implements RowIteratorInterface
CellHelper::getColumnIndexFromCellIndex($currentCellIndex) :
$this->lastColumnIndexProcessed + 1;
}
/**
* Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node.
*
* @param \DOMElement $node
*
* @return Cell The cell set with the associated with the cell
*/
protected function getCell($node)
{
try {
$cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node);
$cell = $this->entityFactory->createCell($cellValue);
} catch (InvalidValueException $exception) {
$cell = $this->entityFactory->createCell($exception->getInvalidValue());
$cell->setType(Cell::TYPE_ERROR);
}
return $cell;
}
}

View File

@ -0,0 +1,82 @@
<?php
namespace OpenSpout\Reader\XLSX;
use OpenSpout\Reader\SheetInterface;
/**
* Represents a sheet within a XLSX file.
*/
class Sheet implements SheetInterface
{
/** @var \OpenSpout\Reader\XLSX\RowIterator To iterate over sheet's rows */
protected $rowIterator;
/** @var int Index of the sheet, based on order in the workbook (zero-based) */
protected $index;
/** @var string Name of the sheet */
protected $name;
/** @var bool Whether the sheet was the active one */
protected $isActive;
/** @var bool Whether the sheet is visible */
protected $isVisible;
/**
* @param RowIterator $rowIterator The corresponding row iterator
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $sheetName Name of the sheet
* @param bool $isSheetActive Whether the sheet was defined as active
* @param bool $isSheetVisible Whether the sheet is visible
*/
public function __construct($rowIterator, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible)
{
$this->rowIterator = $rowIterator;
$this->index = $sheetIndex;
$this->name = $sheetName;
$this->isActive = $isSheetActive;
$this->isVisible = $isSheetVisible;
}
/**
* @return \OpenSpout\Reader\XLSX\RowIterator
*/
public function getRowIterator()
{
return $this->rowIterator;
}
/**
* @return int Index of the sheet, based on order in the workbook (zero-based)
*/
public function getIndex()
{
return $this->index;
}
/**
* @return string Name of the sheet
*/
public function getName()
{
return $this->name;
}
/**
* @return bool Whether the sheet was defined as active
*/
public function isActive()
{
return $this->isActive;
}
/**
* @return bool Whether the sheet is visible
*/
public function isVisible()
{
return $this->isVisible;
}
}

View File

@ -0,0 +1,113 @@
<?php
namespace OpenSpout\Reader\XLSX;
use OpenSpout\Reader\Exception\NoSheetsFoundException;
use OpenSpout\Reader\IteratorInterface;
use OpenSpout\Reader\XLSX\Manager\SheetManager;
/**
* Iterate over XLSX sheet.
*/
class SheetIterator implements IteratorInterface
{
/** @var \OpenSpout\Reader\XLSX\Sheet[] The list of sheet present in the file */
protected $sheets;
/** @var int The index of the sheet being read (zero-based) */
protected $currentSheetIndex;
/**
* @param SheetManager $sheetManager Manages sheets
*
* @throws \OpenSpout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file
*/
public function __construct($sheetManager)
{
// Fetch all available sheets
$this->sheets = $sheetManager->getSheets();
if (0 === \count($this->sheets)) {
throw new NoSheetsFoundException('The file must contain at least one sheet.');
}
}
/**
* Rewind the Iterator to the first element.
*
* @see http://php.net/manual/en/iterator.rewind.php
*/
#[\ReturnTypeWillChange]
public function rewind()
{
$this->currentSheetIndex = 0;
}
/**
* Checks if current position is valid.
*
* @see http://php.net/manual/en/iterator.valid.php
*
* @return bool
*/
#[\ReturnTypeWillChange]
public function valid()
{
return $this->currentSheetIndex < \count($this->sheets);
}
/**
* Move forward to next element.
*
* @see http://php.net/manual/en/iterator.next.php
*/
#[\ReturnTypeWillChange]
public function next()
{
// Using isset here because it is way faster than array_key_exists...
if (isset($this->sheets[$this->currentSheetIndex])) {
$currentSheet = $this->sheets[$this->currentSheetIndex];
$currentSheet->getRowIterator()->end();
++$this->currentSheetIndex;
}
}
/**
* Return the current element.
*
* @see http://php.net/manual/en/iterator.current.php
*
* @return \OpenSpout\Reader\XLSX\Sheet
*/
#[\ReturnTypeWillChange]
public function current()
{
return $this->sheets[$this->currentSheetIndex];
}
/**
* Return the key of the current element.
*
* @see http://php.net/manual/en/iterator.key.php
*
* @return int
*/
#[\ReturnTypeWillChange]
public function key()
{
return $this->currentSheetIndex + 1;
}
/**
* Cleans up what was created to iterate over the object.
*/
#[\ReturnTypeWillChange]
public function end()
{
// make sure we are not leaking memory in case the iteration stopped before the end
foreach ($this->sheets as $sheet) {
$sheet->getRowIterator()->end();
}
}
}

View File

@ -0,0 +1,34 @@
<?php
namespace OpenSpout\Writer\CSV\Manager;
use OpenSpout\Common\Manager\OptionsManagerAbstract;
use OpenSpout\Writer\Common\Entity\Options;
/**
* CSV Writer options manager.
*/
class OptionsManager extends OptionsManagerAbstract
{
/**
* {@inheritdoc}
*/
protected function getSupportedOptions()
{
return [
Options::FIELD_DELIMITER,
Options::FIELD_ENCLOSURE,
Options::SHOULD_ADD_BOM,
];
}
/**
* {@inheritdoc}
*/
protected function setDefaultOptions()
{
$this->setOption(Options::FIELD_DELIMITER, ',');
$this->setOption(Options::FIELD_ENCLOSURE, '"');
$this->setOption(Options::SHOULD_ADD_BOM, true);
}
}

View File

@ -0,0 +1,109 @@
<?php
namespace OpenSpout\Writer\CSV;
use OpenSpout\Common\Entity\Row;
use OpenSpout\Common\Exception\IOException;
use OpenSpout\Common\Helper\EncodingHelper;
use OpenSpout\Writer\Common\Entity\Options;
use OpenSpout\Writer\WriterAbstract;
/**
* This class provides support to write data to CSV files.
*/
class Writer extends WriterAbstract
{
/** Number of rows to write before flushing */
public const FLUSH_THRESHOLD = 500;
/** @var string Content-Type value for the header */
protected static $headerContentType = 'text/csv; charset=UTF-8';
/** @var int */
protected $lastWrittenRowIndex = 0;
/**
* Sets the field delimiter for the CSV.
*
* @param string $fieldDelimiter Character that delimits fields
*
* @return Writer
*/
public function setFieldDelimiter($fieldDelimiter)
{
$this->optionsManager->setOption(Options::FIELD_DELIMITER, $fieldDelimiter);
return $this;
}
/**
* Sets the field enclosure for the CSV.
*
* @param string $fieldEnclosure Character that enclose fields
*
* @return Writer
*/
public function setFieldEnclosure($fieldEnclosure)
{
$this->optionsManager->setOption(Options::FIELD_ENCLOSURE, $fieldEnclosure);
return $this;
}
/**
* Set if a BOM has to be added to the file.
*
* @param bool $shouldAddBOM
*
* @return Writer
*/
public function setShouldAddBOM($shouldAddBOM)
{
$this->optionsManager->setOption(Options::SHOULD_ADD_BOM, (bool) $shouldAddBOM);
return $this;
}
/**
* Opens the CSV streamer and makes it ready to accept data.
*/
protected function openWriter()
{
if ($this->optionsManager->getOption(Options::SHOULD_ADD_BOM)) {
// Adds UTF-8 BOM for Unicode compatibility
$this->globalFunctionsHelper->fputs($this->filePointer, EncodingHelper::BOM_UTF8);
}
}
/**
* Adds a row to the currently opened writer.
*
* @param Row $row The row containing cells and styles
*
* @throws IOException If unable to write data
*/
protected function addRowToWriter(Row $row)
{
$fieldDelimiter = $this->optionsManager->getOption(Options::FIELD_DELIMITER);
$fieldEnclosure = $this->optionsManager->getOption(Options::FIELD_ENCLOSURE);
$wasWriteSuccessful = $this->globalFunctionsHelper->fputcsv($this->filePointer, $row->getCells(), $fieldDelimiter, $fieldEnclosure);
if (false === $wasWriteSuccessful) {
throw new IOException('Unable to write data');
}
++$this->lastWrittenRowIndex;
if (0 === $this->lastWrittenRowIndex % self::FLUSH_THRESHOLD) {
$this->globalFunctionsHelper->fflush($this->filePointer);
}
}
/**
* Closes the CSV streamer, preventing any additional writing.
* If set, sets the headers and redirects output to the browser.
*/
protected function closeWriter()
{
$this->lastWrittenRowIndex = 0;
}
}

View File

@ -0,0 +1,52 @@
<?php
namespace OpenSpout\Writer\Common\Creator;
use OpenSpout\Writer\Common\Entity\Sheet;
use OpenSpout\Writer\Common\Entity\Workbook;
use OpenSpout\Writer\Common\Entity\Worksheet;
use OpenSpout\Writer\Common\Manager\SheetManager;
/**
* Factory to create internal entities.
*/
class InternalEntityFactory
{
/**
* @return Workbook
*/
public function createWorkbook()
{
return new Workbook();
}
/**
* @param string $worksheetFilePath
*
* @return Worksheet
*/
public function createWorksheet($worksheetFilePath, Sheet $externalSheet)
{
return new Worksheet($worksheetFilePath, $externalSheet);
}
/**
* @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based)
* @param string $associatedWorkbookId ID of the sheet's associated workbook
* @param SheetManager $sheetManager To manage sheets
*
* @return Sheet
*/
public function createSheet($sheetIndex, $associatedWorkbookId, $sheetManager)
{
return new Sheet($sheetIndex, $associatedWorkbookId, $sheetManager);
}
/**
* @return \ZipArchive
*/
public function createZipArchive()
{
return new \ZipArchive();
}
}

View File

@ -0,0 +1,23 @@
<?php
namespace OpenSpout\Writer\Common\Creator;
use OpenSpout\Common\Manager\OptionsManagerInterface;
use OpenSpout\Writer\Common\Manager\SheetManager;
use OpenSpout\Writer\Common\Manager\WorkbookManagerInterface;
/**
* Interface ManagerFactoryInterface.
*/
interface ManagerFactoryInterface
{
/**
* @return WorkbookManagerInterface
*/
public function createWorkbookManager(OptionsManagerInterface $optionsManager);
/**
* @return SheetManager
*/
public function createSheetManager();
}

Some files were not shown because too many files have changed in this diff Show More