1: <?php
2: /**
3: * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
4: * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
5: *
6: * Licensed under The MIT License
7: * For full copyright and license information, please see the LICENSE.txt
8: * Redistributions of files must retain the above copyright notice.
9: *
10: * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
11: * @link https://cakephp.org CakePHP(tm) Project
12: * @since 3.0.0
13: * @license https://opensource.org/licenses/mit-license.php MIT License
14: */
15: namespace Cake\I18n\Parser;
16:
17: use Cake\I18n\Translator;
18:
19: /**
20: * Parses file in PO format
21: *
22: * @copyright Copyright (c) 2010, Union of RAD http://union-of-rad.org (http://lithify.me/)
23: * @copyright Copyright (c) 2012, Clemens Tolboom
24: * @copyright Copyright (c) 2014, Fabien Potencier https://github.com/symfony/Translation/blob/master/LICENSE
25: */
26: class PoFileParser
27: {
28: /**
29: * Parses portable object (PO) format.
30: *
31: * From https://www.gnu.org/software/gettext/manual/gettext.html#PO-Files
32: * we should be able to parse files having:
33: *
34: * white-space
35: * # translator-comments
36: * #. extracted-comments
37: * #: reference...
38: * #, flag...
39: * #| msgid previous-untranslated-string
40: * msgid untranslated-string
41: * msgstr translated-string
42: *
43: * extra or different lines are:
44: *
45: * #| msgctxt previous-context
46: * #| msgid previous-untranslated-string
47: * msgctxt context
48: *
49: * #| msgid previous-untranslated-string-singular
50: * #| msgid_plural previous-untranslated-string-plural
51: * msgid untranslated-string-singular
52: * msgid_plural untranslated-string-plural
53: * msgstr[0] translated-string-case-0
54: * ...
55: * msgstr[N] translated-string-case-n
56: *
57: * The definition states:
58: * - white-space and comments are optional.
59: * - msgid "" that an empty singleline defines a header.
60: *
61: * This parser sacrifices some features of the reference implementation the
62: * differences to that implementation are as follows.
63: * - Translator and extracted comments are treated as being the same type.
64: * - Message IDs are allowed to have other encodings as just US-ASCII.
65: *
66: * Items with an empty id are ignored.
67: *
68: * @param string $resource The file name to parse
69: *
70: * @return array
71: */
72: public function parse($resource)
73: {
74: $stream = fopen($resource, 'rb');
75:
76: $defaults = [
77: 'ids' => [],
78: 'translated' => null
79: ];
80:
81: $messages = [];
82: $item = $defaults;
83: $stage = null;
84:
85: while ($line = fgets($stream)) {
86: $line = trim($line);
87:
88: if ($line === '') {
89: // Whitespace indicated current item is done
90: $this->_addMessage($messages, $item);
91: $item = $defaults;
92: $stage = null;
93: } elseif (substr($line, 0, 7) === 'msgid "') {
94: // We start a new msg so save previous
95: $this->_addMessage($messages, $item);
96: $item['ids']['singular'] = substr($line, 7, -1);
97: $stage = ['ids', 'singular'];
98: } elseif (substr($line, 0, 8) === 'msgstr "') {
99: $item['translated'] = substr($line, 8, -1);
100: $stage = ['translated'];
101: } elseif (substr($line, 0, 9) === 'msgctxt "') {
102: $item['context'] = substr($line, 9, -1);
103: $stage = ['context'];
104: } elseif ($line[0] === '"') {
105: switch (count($stage)) {
106: case 2:
107: $item[$stage[0]][$stage[1]] .= substr($line, 1, -1);
108: break;
109:
110: case 1:
111: $item[$stage[0]] .= substr($line, 1, -1);
112: break;
113: }
114: } elseif (substr($line, 0, 14) === 'msgid_plural "') {
115: $item['ids']['plural'] = substr($line, 14, -1);
116: $stage = ['ids', 'plural'];
117: } elseif (substr($line, 0, 7) === 'msgstr[') {
118: $size = strpos($line, ']');
119: $row = (int)substr($line, 7, 1);
120: $item['translated'][$row] = substr($line, $size + 3, -1);
121: $stage = ['translated', $row];
122: }
123: }
124: // save last item
125: $this->_addMessage($messages, $item);
126: fclose($stream);
127:
128: return $messages;
129: }
130:
131: /**
132: * Saves a translation item to the messages.
133: *
134: * @param array $messages The messages array being collected from the file
135: * @param array $item The current item being inspected
136: * @return void
137: */
138: protected function _addMessage(array &$messages, array $item)
139: {
140: if (empty($item['ids']['singular']) && empty($item['ids']['plural'])) {
141: return;
142: }
143:
144: $singular = stripcslashes($item['ids']['singular']);
145: $context = isset($item['context']) ? $item['context'] : null;
146: $translation = $item['translated'];
147:
148: if (is_array($translation)) {
149: $translation = $translation[0];
150: }
151:
152: $translation = stripcslashes($translation);
153:
154: if ($context !== null && !isset($messages[$singular]['_context'][$context])) {
155: $messages[$singular]['_context'][$context] = $translation;
156: } elseif (!isset($messages[$singular]['_context'][''])) {
157: $messages[$singular]['_context'][''] = $translation;
158: }
159:
160: if (isset($item['ids']['plural'])) {
161: $plurals = $item['translated'];
162: // PO are by definition indexed so sort by index.
163: ksort($plurals);
164:
165: // Make sure every index is filled.
166: end($plurals);
167: $count = key($plurals);
168:
169: // Fill missing spots with an empty string.
170: $empties = array_fill(0, $count + 1, '');
171: $plurals += $empties;
172: ksort($plurals);
173:
174: $plurals = array_map('stripcslashes', $plurals);
175: $key = stripcslashes($item['ids']['plural']);
176:
177: if ($context !== null) {
178: $messages[Translator::PLURAL_PREFIX . $key]['_context'][$context] = $plurals;
179: } else {
180: $messages[Translator::PLURAL_PREFIX . $key]['_context'][''] = $plurals;
181: }
182: }
183: }
184: }
185: