|
1 | 1 | <?php
|
2 | 2 | namespace mikehaertl\pdftk;
|
3 | 3 |
|
| 4 | +use Exception; |
4 | 5 | use mikehaertl\tmp\File;
|
5 | 6 |
|
6 | 7 | /**
|
|
14 | 15 | */
|
15 | 16 | class InfoFile extends File
|
16 | 17 | {
|
| 18 | + /** |
| 19 | + * @var string[] list of valid keys for the document information directory of |
| 20 | + * the PDF. These will be converted into `InfoBegin... InfoKey... InvoValue` |
| 21 | + * blocks on the output. |
| 22 | + * |
| 23 | + * See section 14.3.3 in https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf |
| 24 | + */ |
| 25 | + public static $documentInfoFields = array( |
| 26 | + 'Title', |
| 27 | + 'Author', |
| 28 | + 'Subject', |
| 29 | + 'Keywords', |
| 30 | + 'Creator', |
| 31 | + 'Producer', |
| 32 | + 'CreationDate', |
| 33 | + 'ModDate', |
| 34 | + 'Trapped', |
| 35 | + ); |
17 | 36 |
|
18 | 37 | /**
|
19 | 38 | * Constructor
|
20 | 39 | *
|
21 |
| - * @param array $data the form data as name => value |
| 40 | + * @param array|InfoFields $data the data in this format: |
| 41 | + * ``` |
| 42 | + * [ |
| 43 | + * 'Info' => [ |
| 44 | + * 'Title' => '...', |
| 45 | + * 'Author' => '...', |
| 46 | + * 'Subject' => '...', |
| 47 | + * 'Keywords' => '...', |
| 48 | + * 'Creator' => '...', |
| 49 | + * 'Producer' => '...', |
| 50 | + * 'CreationDate' => '...', |
| 51 | + * 'ModDate' => '...', |
| 52 | + * 'Trapped' => '...', |
| 53 | + * ], |
| 54 | + * 'Bookmark' => [ |
| 55 | + * [ |
| 56 | + * 'Title' => '...', |
| 57 | + * 'Level' => ..., |
| 58 | + * 'PageNumber' => ..., |
| 59 | + * ], |
| 60 | + * ], |
| 61 | + * 'PageMedia' => [ ... ], |
| 62 | + * 'PageLabel' => [ ... ], |
| 63 | + * // ... |
| 64 | + * ] |
| 65 | + * ``` |
| 66 | + * This is the same format as the InfoFields object that is returned |
| 67 | + * by `getData()` if you cast it to an array. You can also pass such an |
| 68 | + * (optionally modified) object as input. Some fields like 'NumberOfPages' |
| 69 | + * or 'PdfID0' are ignored as those are not part of the PDF's metadata. |
| 70 | + * All array elements are optional. |
22 | 71 | * @param string|null $suffix the optional suffix for the tmp file
|
23 |
| - * @param string|null $suffix the optional prefix for the tmp file. If null 'php_tmpfile_' is used. |
24 |
| - * @param string|null $directory directory where the file should be created. Autodetected if not provided. |
25 |
| - * @param string|null $encoding of the data. Default is 'UTF-8'. |
| 72 | + * @param string|null $suffix the optional prefix for the tmp file. If null |
| 73 | + * 'php_tmpfile_' is used. |
| 74 | + * @param string|null $directory directory where the file should be |
| 75 | + * created. Autodetected if not provided. |
| 76 | + * @param string|null $encoding of the data. Default is 'UTF-8'. If the |
| 77 | + * data has another encoding it will be converted to UTF-8. This requires |
| 78 | + * the mbstring extension to be installed. |
| 79 | + * @throws Exception on invalid data format or if mbstring extension is |
| 80 | + * missing and data must be converted |
26 | 81 | */
|
27 | 82 | public function __construct($data, $suffix = null, $prefix = null, $directory = null, $encoding = 'UTF-8')
|
28 | 83 | {
|
| 84 | + if ($suffix === null) { |
| 85 | + $suffix = '.txt'; |
| 86 | + } |
| 87 | + if ($prefix === null) { |
| 88 | + $prefix = 'php_pdftk_info_'; |
| 89 | + } |
29 | 90 | if ($directory === null) {
|
30 | 91 | $directory = self::getTempDir();
|
31 | 92 | }
|
32 |
| - $suffix = '.txt'; |
33 |
| - $prefix = 'php_pdftk_info_'; |
34 | 93 |
|
35 |
| - $this->_fileName = tempnam($directory, $prefix); |
36 |
| - $newName = $this->_fileName . $suffix; |
37 |
| - rename($this->_fileName, $newName); |
| 94 | + $tempName = tempnam($directory, $prefix); |
| 95 | + $newName = $tempName . $suffix; |
| 96 | + rename($tempName, $newName); |
38 | 97 | $this->_fileName = $newName;
|
39 | 98 |
|
40 |
| - if (!function_exists('mb_convert_encoding')) { |
41 |
| - throw new \Exception('MB extension required.'); |
| 99 | + if ($encoding !== 'UTF-8' && !function_exists('mb_convert_encoding')) { |
| 100 | + throw new Exception('mbstring extension required.'); |
42 | 101 | }
|
43 | 102 |
|
44 | 103 | $fields = '';
|
45 |
| - foreach ($data as $key => $value) { |
46 |
| - // Always convert to UTF-8 |
47 |
| - if ($encoding !== 'UTF-8' && function_exists('mb_convert_encoding')) { |
48 |
| - $value = mb_convert_encoding($value, 'UTF-8', $encoding); |
49 |
| - $key = mb_convert_encoding($key, 'UTF-8', $encoding); |
50 |
| - $value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key); |
51 |
| - $key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value); |
52 |
| - } |
53 |
| - $fields .= "InfoBegin\nInfoKey: $key\nInfoValue: $value\n"; |
| 104 | + $normalizedData = self::normalize($data); |
| 105 | + |
| 106 | + foreach ($normalizedData as $block => $items) { |
| 107 | + $fields .= self::renderBlock($block, $items, $encoding); |
54 | 108 | }
|
55 | 109 |
|
56 | 110 | // Use fwrite, since file_put_contents() messes around with character encoding
|
57 | 111 | $fp = fopen($this->_fileName, 'w');
|
58 | 112 | fwrite($fp, $fields);
|
59 | 113 | fclose($fp);
|
60 | 114 | }
|
| 115 | + |
| 116 | + /** |
| 117 | + * Normalize the input data |
| 118 | + * |
| 119 | + * This also converts data from the legacy format (<0.13.0) to the new |
| 120 | + * input format described in the constructor. |
| 121 | + * |
| 122 | + * @param array $data the data to normalize |
| 123 | + * @return array a normalized array in the format described in the constructor |
| 124 | + */ |
| 125 | + private static function normalize($data) |
| 126 | + { |
| 127 | + $normalized = array(); |
| 128 | + foreach ($data as $key => $value) { |
| 129 | + if (in_array($key, self::$documentInfoFields)) { |
| 130 | + $normalized['Info'][$key] = $value; |
| 131 | + } elseif (is_array($value)) { |
| 132 | + if (!isset($normalized[$key])) { |
| 133 | + $normalized[$key] = array(); |
| 134 | + } |
| 135 | + $normalized[$key] = array_merge($normalized[$key], $value); |
| 136 | + } |
| 137 | + } |
| 138 | + return $normalized; |
| 139 | + } |
| 140 | + |
| 141 | + /** |
| 142 | + * Render a set of block fields |
| 143 | + * |
| 144 | + * @param string $block like 'Info', 'Bookmark', etc. |
| 145 | + * @param array $items the field items to render |
| 146 | + * @param string $encoding the encoding of the item data |
| 147 | + * @return string the rendered fields |
| 148 | + */ |
| 149 | + private static function renderBlock($block, $items, $encoding) |
| 150 | + { |
| 151 | + $fields = ''; |
| 152 | + foreach ($items as $key => $value) { |
| 153 | + if ($block === 'Info') { |
| 154 | + $fields .= self::renderField($block, $key, $value, $encoding, true); |
| 155 | + } else { |
| 156 | + $fields .= "{$block}Begin\n"; |
| 157 | + foreach ($value as $subKey => $subValue) { |
| 158 | + $fields .= self::renderField($block, $subKey, $subValue, $encoding, false); |
| 159 | + } |
| 160 | + } |
| 161 | + } |
| 162 | + return $fields; |
| 163 | + } |
| 164 | + |
| 165 | + /** |
| 166 | + * Render a field in a given input block |
| 167 | + * |
| 168 | + * @param string $prefix the prefix to use for the field |
| 169 | + * @param string $key the field key |
| 170 | + * @param string $value the field value |
| 171 | + * @param string $encoding the endoding of key and value |
| 172 | + * @param bool $isInfo whether it's an 'Info' field |
| 173 | + * @return string the rendered field |
| 174 | + */ |
| 175 | + private static function renderField($prefix, $key, $value, $encoding, $isInfo) |
| 176 | + { |
| 177 | + if ($encoding !== 'UTF-8') { |
| 178 | + $value = mb_convert_encoding($value, 'UTF-8', $encoding); |
| 179 | + $key = mb_convert_encoding($key, 'UTF-8', $encoding); |
| 180 | + $value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key); |
| 181 | + $key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value); |
| 182 | + } |
| 183 | + if ($isInfo) { |
| 184 | + return "InfoBegin\nInfoKey: $key\nInfoValue: $value\n"; |
| 185 | + } else { |
| 186 | + return "{$prefix}{$key}: $value\n"; |
| 187 | + } |
| 188 | + |
| 189 | + } |
61 | 190 | }
|
0 commit comments