Skip to content

Commit bfe3d05

Browse files
authored
Merge pull request #297 from mikehaertl/291-fix-update-info
Issue #291 Fix updateInfo() to accept any updateable data
2 parents c856d2d + d7cd2c8 commit bfe3d05

File tree

5 files changed

+281
-23
lines changed

5 files changed

+281
-23
lines changed

src/InfoFile.php

Lines changed: 149 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
<?php
22
namespace mikehaertl\pdftk;
33

4+
use Exception;
45
use mikehaertl\tmp\File;
56

67
/**
@@ -14,48 +15,176 @@
1415
*/
1516
class InfoFile extends File
1617
{
18+
/**
19+
* @var string[] list of valid keys for the document information directory of
20+
* the PDF. These will be converted into `InfoBegin... InfoKey... InvoValue`
21+
* blocks on the output.
22+
*
23+
* See section 14.3.3 in https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf
24+
*/
25+
public static $documentInfoFields = array(
26+
'Title',
27+
'Author',
28+
'Subject',
29+
'Keywords',
30+
'Creator',
31+
'Producer',
32+
'CreationDate',
33+
'ModDate',
34+
'Trapped',
35+
);
1736

1837
/**
1938
* Constructor
2039
*
21-
* @param array $data the form data as name => value
40+
* @param array|InfoFields $data the data in this format:
41+
* ```
42+
* [
43+
* 'Info' => [
44+
* 'Title' => '...',
45+
* 'Author' => '...',
46+
* 'Subject' => '...',
47+
* 'Keywords' => '...',
48+
* 'Creator' => '...',
49+
* 'Producer' => '...',
50+
* 'CreationDate' => '...',
51+
* 'ModDate' => '...',
52+
* 'Trapped' => '...',
53+
* ],
54+
* 'Bookmark' => [
55+
* [
56+
* 'Title' => '...',
57+
* 'Level' => ...,
58+
* 'PageNumber' => ...,
59+
* ],
60+
* ],
61+
* 'PageMedia' => [ ... ],
62+
* 'PageLabel' => [ ... ],
63+
* // ...
64+
* ]
65+
* ```
66+
* This is the same format as the InfoFields object that is returned
67+
* by `getData()` if you cast it to an array. You can also pass such an
68+
* (optionally modified) object as input. Some fields like 'NumberOfPages'
69+
* or 'PdfID0' are ignored as those are not part of the PDF's metadata.
70+
* All array elements are optional.
2271
* @param string|null $suffix the optional suffix for the tmp file
23-
* @param string|null $suffix the optional prefix for the tmp file. If null 'php_tmpfile_' is used.
24-
* @param string|null $directory directory where the file should be created. Autodetected if not provided.
25-
* @param string|null $encoding of the data. Default is 'UTF-8'.
72+
* @param string|null $suffix the optional prefix for the tmp file. If null
73+
* 'php_tmpfile_' is used.
74+
* @param string|null $directory directory where the file should be
75+
* created. Autodetected if not provided.
76+
* @param string|null $encoding of the data. Default is 'UTF-8'. If the
77+
* data has another encoding it will be converted to UTF-8. This requires
78+
* the mbstring extension to be installed.
79+
* @throws Exception on invalid data format or if mbstring extension is
80+
* missing and data must be converted
2681
*/
2782
public function __construct($data, $suffix = null, $prefix = null, $directory = null, $encoding = 'UTF-8')
2883
{
84+
if ($suffix === null) {
85+
$suffix = '.txt';
86+
}
87+
if ($prefix === null) {
88+
$prefix = 'php_pdftk_info_';
89+
}
2990
if ($directory === null) {
3091
$directory = self::getTempDir();
3192
}
32-
$suffix = '.txt';
33-
$prefix = 'php_pdftk_info_';
3493

35-
$this->_fileName = tempnam($directory, $prefix);
36-
$newName = $this->_fileName . $suffix;
37-
rename($this->_fileName, $newName);
94+
$tempName = tempnam($directory, $prefix);
95+
$newName = $tempName . $suffix;
96+
rename($tempName, $newName);
3897
$this->_fileName = $newName;
3998

40-
if (!function_exists('mb_convert_encoding')) {
41-
throw new \Exception('MB extension required.');
99+
if ($encoding !== 'UTF-8' && !function_exists('mb_convert_encoding')) {
100+
throw new Exception('mbstring extension required.');
42101
}
43102

44103
$fields = '';
45-
foreach ($data as $key => $value) {
46-
// Always convert to UTF-8
47-
if ($encoding !== 'UTF-8' && function_exists('mb_convert_encoding')) {
48-
$value = mb_convert_encoding($value, 'UTF-8', $encoding);
49-
$key = mb_convert_encoding($key, 'UTF-8', $encoding);
50-
$value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key);
51-
$key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value);
52-
}
53-
$fields .= "InfoBegin\nInfoKey: $key\nInfoValue: $value\n";
104+
$normalizedData = self::normalize($data);
105+
106+
foreach ($normalizedData as $block => $items) {
107+
$fields .= self::renderBlock($block, $items, $encoding);
54108
}
55109

56110
// Use fwrite, since file_put_contents() messes around with character encoding
57111
$fp = fopen($this->_fileName, 'w');
58112
fwrite($fp, $fields);
59113
fclose($fp);
60114
}
115+
116+
/**
117+
* Normalize the input data
118+
*
119+
* This also converts data from the legacy format (<0.13.0) to the new
120+
* input format described in the constructor.
121+
*
122+
* @param array $data the data to normalize
123+
* @return array a normalized array in the format described in the constructor
124+
*/
125+
private static function normalize($data)
126+
{
127+
$normalized = array();
128+
foreach ($data as $key => $value) {
129+
if (in_array($key, self::$documentInfoFields)) {
130+
$normalized['Info'][$key] = $value;
131+
} elseif (is_array($value)) {
132+
if (!isset($normalized[$key])) {
133+
$normalized[$key] = array();
134+
}
135+
$normalized[$key] = array_merge($normalized[$key], $value);
136+
}
137+
}
138+
return $normalized;
139+
}
140+
141+
/**
142+
* Render a set of block fields
143+
*
144+
* @param string $block like 'Info', 'Bookmark', etc.
145+
* @param array $items the field items to render
146+
* @param string $encoding the encoding of the item data
147+
* @return string the rendered fields
148+
*/
149+
private static function renderBlock($block, $items, $encoding)
150+
{
151+
$fields = '';
152+
foreach ($items as $key => $value) {
153+
if ($block === 'Info') {
154+
$fields .= self::renderField($block, $key, $value, $encoding, true);
155+
} else {
156+
$fields .= "{$block}Begin\n";
157+
foreach ($value as $subKey => $subValue) {
158+
$fields .= self::renderField($block, $subKey, $subValue, $encoding, false);
159+
}
160+
}
161+
}
162+
return $fields;
163+
}
164+
165+
/**
166+
* Render a field in a given input block
167+
*
168+
* @param string $prefix the prefix to use for the field
169+
* @param string $key the field key
170+
* @param string $value the field value
171+
* @param string $encoding the endoding of key and value
172+
* @param bool $isInfo whether it's an 'Info' field
173+
* @return string the rendered field
174+
*/
175+
private static function renderField($prefix, $key, $value, $encoding, $isInfo)
176+
{
177+
if ($encoding !== 'UTF-8') {
178+
$value = mb_convert_encoding($value, 'UTF-8', $encoding);
179+
$key = mb_convert_encoding($key, 'UTF-8', $encoding);
180+
$value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key);
181+
$key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value);
182+
}
183+
if ($isInfo) {
184+
return "InfoBegin\nInfoKey: $key\nInfoValue: $value\n";
185+
} else {
186+
return "{$prefix}{$key}: $value\n";
187+
}
188+
189+
}
61190
}

src/Pdf.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ public function fillForm($data, $encoding = 'UTF-8', $dropXfa = true, $format =
321321
public function updateInfo($data, $encoding = 'UTF-8')
322322
{
323323
$this->constrainSingleFile();
324-
if (is_array($data)) {
324+
if (is_array($data) || $data instanceof InfoFields) {
325325
$data = new InfoFile($data, null, null, $this->tempDir, $encoding);
326326
}
327327
$this->getCommand()

tests/InfoFileTest.php

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,100 @@
77
class InfoFileTest extends TestCase
88
{
99
public function testInfoFileCreation()
10+
{
11+
$data = array(
12+
'Info' => array(
13+
'Creator' => 'php-pdftk',
14+
'Subject' => 'öäüÖÄÜ',
15+
'Title' => 'Title x',
16+
),
17+
'PdfID0' => '8b93f76a0b28b720d0dee9a6eb2a780a',
18+
'PdfID1' => '8b93f76a0b28b720d0dee9a6eb2a780a',
19+
'NumberOfPages' => '5',
20+
'Bookmark' => array(
21+
array(
22+
'Title' => 'Title 1',
23+
'Level' => 1,
24+
'PageNumber' => 1,
25+
),
26+
array(
27+
'Title' => 'Title 2',
28+
'Level' => 2,
29+
'PageNumber' => 10,
30+
),
31+
),
32+
'PageMedia' => array(
33+
array(
34+
'Number' => '1',
35+
'Rotation' => '0',
36+
'Rect' => '0 0 595 842',
37+
'Dimensions' => '595 842'
38+
),
39+
),
40+
'PageLabel' => array(
41+
array(
42+
'NewIndex' => '1',
43+
'Start' => '1',
44+
'Prefix' => 'some name 1',
45+
'NumStyle' => 'NoNumber',
46+
),
47+
),
48+
);
49+
50+
$oInfoFile = new InfoFile($data, null, null, __DIR__);
51+
$sInfoFilename = $oInfoFile->getFileName();
52+
53+
$this->assertFileExists($sInfoFilename);
54+
$this->assertFileEquals(__DIR__ . '/files/InfoFileTest.txt', $sInfoFilename);
55+
}
56+
57+
public function testInfoFileCreationFromLegacyFormat()
1058
{
1159
$data = array(
1260
'Creator' => 'php-pdftk',
13-
'Subject' => "öäüÖÄÜ",
61+
'Subject' => 'öäüÖÄÜ',
62+
'NumberOfPages' => 17,
63+
'PdfID0' => '8b93f76a0b28b720d0dee9a6eb2a780a',
64+
'PdfID1' => '8b93f76a0b28b720d0dee9a6eb2a780a',
65+
'NumberOfPages' => '5',
66+
// Mix-in new format
67+
'Info' => array(
68+
'Title' => 'Title x',
69+
),
70+
'Bookmark' => array(
71+
array(
72+
'Title' => 'Title 1',
73+
'Level' => 1,
74+
'PageNumber' => 1,
75+
),
76+
array(
77+
'Title' => 'Title 2',
78+
'Level' => 2,
79+
'PageNumber' => 10,
80+
),
81+
),
82+
'PageMedia' => array(
83+
array(
84+
'Number' => '1',
85+
'Rotation' => '0',
86+
'Rect' => '0 0 595 842',
87+
'Dimensions' => '595 842'
88+
),
89+
),
90+
'PageLabel' => array(
91+
array(
92+
'NewIndex' => '1',
93+
'Start' => '1',
94+
'Prefix' => 'some name 1',
95+
'NumStyle' => 'NoNumber',
96+
),
97+
),
1498
);
1599

16100
$oInfoFile = new InfoFile($data, null, null, __DIR__);
17101
$sInfoFilename = $oInfoFile->getFileName();
18102

19103
$this->assertFileExists($sInfoFilename);
20-
$this->assertFileEquals(__DIR__ . "/files/InfoFileTest.txt", $sInfoFilename);
104+
$this->assertFileEquals(__DIR__ . '/files/InfoFileTest.txt', $sInfoFilename);
21105
}
22106
}

tests/PdfTest.php

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,30 @@ public function testCanGetData()
606606
$this->assertEquals($this->formDataArray, (array)$data);
607607
}
608608

609+
public function testCanUpdateInfoFromDumpedData()
610+
{
611+
$document1 = $this->getDocument1();
612+
$file = $this->getOutFile();
613+
614+
$pdf = new Pdf($document1);
615+
$data = $pdf->getData();
616+
$this->assertInstanceOf('\mikehaertl\pdftk\InfoFields', $data);
617+
$this->assertEquals($this->formDataArray, (array)$data);
618+
619+
$data['Info']['Creator'] = 'php-pdftk';
620+
621+
$pdf = new Pdf($document1);
622+
$this->assertInstanceOf('mikehaertl\pdftk\Pdf', $pdf->updateInfo($data));
623+
624+
$this->assertTrue($pdf->saveAs($file));
625+
626+
$this->assertFileExists($file);
627+
628+
$pdf = new Pdf($file);
629+
$data = $pdf->getData();
630+
$this->assertEquals('php-pdftk', $data['Info']['Creator']);
631+
}
632+
609633
public function testCanGetDataFields()
610634
{
611635
$form = $this->getForm();

tests/files/InfoFileTest.txt

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,24 @@ InfoValue: php-pdftk
44
InfoBegin
55
InfoKey: Subject
66
InfoValue: öäüÖÄÜ
7+
InfoBegin
8+
InfoKey: Title
9+
InfoValue: Title x
10+
BookmarkBegin
11+
BookmarkTitle: Title 1
12+
BookmarkLevel: 1
13+
BookmarkPageNumber: 1
14+
BookmarkBegin
15+
BookmarkTitle: Title 2
16+
BookmarkLevel: 2
17+
BookmarkPageNumber: 10
18+
PageMediaBegin
19+
PageMediaNumber: 1
20+
PageMediaRotation: 0
21+
PageMediaRect: 0 0 595 842
22+
PageMediaDimensions: 595 842
23+
PageLabelBegin
24+
PageLabelNewIndex: 1
25+
PageLabelStart: 1
26+
PageLabelPrefix: some name 1
27+
PageLabelNumStyle: NoNumber

0 commit comments

Comments
 (0)