Skip to content

Commit f91754b

Browse files
authored
Enclose white spaces in references (#1105)
Since version 0.22 gettext encloses file names in references which contain white spaces or tabs within First Strong Isolate (U+2068) and Pop Directional Isolate (U+2069). This commit adds the same behavior for Babel.
1 parent b42344d commit f91754b

File tree

2 files changed

+179
-2
lines changed

2 files changed

+179
-2
lines changed

babel/messages/pofile.py

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,50 @@ def denormalize(string: str) -> str:
8080
return unescape(string)
8181

8282

83+
def _extract_locations(line: str) -> list[str]:
84+
"""Extract locations from location comments.
85+
86+
Locations are extracted while properly handling First Strong
87+
Isolate (U+2068) and Pop Directional Isolate (U+2069), used by
88+
gettext to enclose filenames with spaces and tabs in their names.
89+
"""
90+
if "\u2068" not in line and "\u2069" not in line:
91+
return line.lstrip().split()
92+
93+
locations = []
94+
location = ""
95+
in_filename = False
96+
for c in line:
97+
if c == "\u2068":
98+
if in_filename:
99+
raise ValueError("location comment contains more First Strong Isolate "
100+
"characters, than Pop Directional Isolate characters")
101+
in_filename = True
102+
continue
103+
elif c == "\u2069":
104+
if not in_filename:
105+
raise ValueError("location comment contains more Pop Directional Isolate "
106+
"characters, than First Strong Isolate characters")
107+
in_filename = False
108+
continue
109+
elif c == " ":
110+
if in_filename:
111+
location += c
112+
elif location:
113+
locations.append(location)
114+
location = ""
115+
else:
116+
location += c
117+
else:
118+
if location:
119+
if in_filename:
120+
raise ValueError("location comment contains more First Strong Isolate "
121+
"characters, than Pop Directional Isolate characters")
122+
locations.append(location)
123+
124+
return locations
125+
126+
83127
class PoFileError(Exception):
84128
"""Exception thrown by PoParser when an invalid po file is encountered."""
85129

@@ -269,7 +313,7 @@ def _process_comment(self, line) -> None:
269313
self._finish_current_message()
270314

271315
if line[1:].startswith(':'):
272-
for location in line[2:].lstrip().split():
316+
for location in _extract_locations(line[2:]):
273317
pos = location.rfind(':')
274318
if pos >= 0:
275319
try:
@@ -307,7 +351,10 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
307351
if line[1:].startswith('~'):
308352
self._process_message_line(lineno, line[2:].lstrip(), obsolete=True)
309353
else:
310-
self._process_comment(line)
354+
try:
355+
self._process_comment(line)
356+
except ValueError as exc:
357+
self._invalid_pofile(line, lineno, str(exc))
311358
else:
312359
self._process_message_line(lineno, line)
313360

@@ -474,6 +521,23 @@ def normalize(string: str, prefix: str = '', width: int = 76) -> str:
474521
return '""\n' + '\n'.join([(prefix + escape(line)) for line in lines])
475522

476523

524+
def _enclose_filename_if_necessary(filename: str) -> str:
525+
"""Enclose filenames which include white spaces or tabs.
526+
527+
Do the same as gettext and enclose filenames which contain white
528+
spaces or tabs with First Strong Isolate (U+2068) and Pop
529+
Directional Isolate (U+2069).
530+
"""
531+
if " " not in filename and "\t" not in filename:
532+
return filename
533+
534+
if not filename.startswith("\u2068"):
535+
filename = "\u2068" + filename
536+
if not filename.endswith("\u2069"):
537+
filename += "\u2069"
538+
return filename
539+
540+
477541
def write_po(
478542
fileobj: SupportsWrite[bytes],
479543
catalog: Catalog,
@@ -626,6 +690,7 @@ def _format_message(message, prefix=''):
626690

627691
for filename, lineno in locations:
628692
location = filename.replace(os.sep, '/')
693+
location = _enclose_filename_if_necessary(location)
629694
if lineno and include_lineno:
630695
location = f"{location}:{lineno:d}"
631696
if location not in locs:

tests/messages/test_pofile.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from babel.core import Locale
2020
from babel.messages import pofile
2121
from babel.messages.catalog import Catalog, Message
22+
from babel.messages.pofile import _enclose_filename_if_necessary, _extract_locations
2223
from babel.util import FixedOffsetTimezone
2324

2425

@@ -438,6 +439,19 @@ def test_missing_plural_in_the_middle(self):
438439
assert message.string[1] == ''
439440
assert message.string[2] == 'Vohs [text]'
440441

442+
def test_with_location(self):
443+
buf = StringIO('''\
444+
#: main.py:1 \u2068filename with whitespace.py\u2069:123
445+
msgid "foo"
446+
msgstr "bar"
447+
''')
448+
catalog = pofile.read_po(buf, locale='de_DE')
449+
assert len(catalog) == 1
450+
message = catalog['foo']
451+
assert message.string == 'bar'
452+
assert message.locations == [("main.py", 1), ("filename with whitespace.py", 123)]
453+
454+
441455
def test_abort_invalid_po_file(self):
442456
invalid_po = '''
443457
msgctxt ""
@@ -841,6 +855,59 @@ def test_no_include_lineno(self):
841855
msgid "foo"
842856
msgstr ""'''
843857

858+
def test_white_space_in_location(self):
859+
catalog = Catalog()
860+
catalog.add('foo', locations=[('main.py', 1)])
861+
catalog.add('foo', locations=[('utils b.py', 3)])
862+
buf = BytesIO()
863+
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
864+
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
865+
msgid "foo"
866+
msgstr ""'''
867+
868+
def test_white_space_in_location_already_enclosed(self):
869+
catalog = Catalog()
870+
catalog.add('foo', locations=[('main.py', 1)])
871+
catalog.add('foo', locations=[('\u2068utils b.py\u2069', 3)])
872+
buf = BytesIO()
873+
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
874+
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
875+
msgid "foo"
876+
msgstr ""'''
877+
878+
def test_tab_in_location(self):
879+
catalog = Catalog()
880+
catalog.add('foo', locations=[('main.py', 1)])
881+
catalog.add('foo', locations=[('utils\tb.py', 3)])
882+
buf = BytesIO()
883+
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
884+
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
885+
msgid "foo"
886+
msgstr ""'''
887+
888+
def test_tab_in_location_already_enclosed(self):
889+
catalog = Catalog()
890+
catalog.add('foo', locations=[('main.py', 1)])
891+
catalog.add('foo', locations=[('\u2068utils\tb.py\u2069', 3)])
892+
buf = BytesIO()
893+
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
894+
assert buf.getvalue().strip() == b'''#: main.py:1 \xe2\x81\xa8utils b.py\xe2\x81\xa9:3
895+
msgid "foo"
896+
msgstr ""'''
897+
898+
899+
class RoundtripPoTestCase(unittest.TestCase):
900+
901+
def test_enclosed_filenames_in_location_comment(self):
902+
catalog = Catalog()
903+
catalog.add("foo", lineno=2, locations=[("main 1.py", 1)], string="")
904+
catalog.add("bar", lineno=6, locations=[("other.py", 2)], string="")
905+
catalog.add("baz", lineno=10, locations=[("main 1.py", 3), ("other.py", 4)], string="")
906+
buf = BytesIO()
907+
pofile.write_po(buf, catalog, omit_header=True, include_lineno=True)
908+
buf.seek(0)
909+
catalog2 = pofile.read_po(buf)
910+
assert True is catalog.is_identical(catalog2)
844911

845912
class PofileFunctionsTestCase(unittest.TestCase):
846913

@@ -864,6 +931,51 @@ def test_denormalize_on_msgstr_without_empty_first_line(self):
864931
assert expected_denormalized == pofile.denormalize(f'""\n{msgstr}')
865932

866933

934+
@pytest.mark.parametrize(("line", "locations"), [
935+
("\u2068file1.po\u2069", ["file1.po"]),
936+
("file1.po \u2068file 2.po\u2069 file3.po", ["file1.po", "file 2.po", "file3.po"]),
937+
("file1.po:1 \u2068file 2.po\u2069:2 file3.po:3", ["file1.po:1", "file 2.po:2", "file3.po:3"]),
938+
("\u2068file1.po\u2069:1 \u2068file\t2.po\u2069:2 file3.po:3",
939+
["file1.po:1", "file\t2.po:2", "file3.po:3"]),
940+
("file1.po file2.po", ["file1.po", "file2.po"]),
941+
("file1.po \u2068\u2069 file2.po", ["file1.po", "file2.po"]),
942+
])
943+
def test_extract_locations_valid_location_comment(line, locations):
944+
assert locations == _extract_locations(line)
945+
946+
947+
@pytest.mark.parametrize(("line",), [
948+
("\u2068file 1.po",),
949+
("file 1.po\u2069",),
950+
("\u2069file 1.po\u2068",),
951+
("\u2068file 1.po:1 \u2068file 2.po\u2069:2",),
952+
("\u2068file 1.po\u2069:1 file 2.po\u2069:2",),
953+
])
954+
def test_extract_locations_invalid_location_comment(line):
955+
with pytest.raises(ValueError):
956+
_extract_locations(line)
957+
958+
959+
@pytest.mark.parametrize(("filename",), [
960+
("file.po",),
961+
("file_a.po",),
962+
("file-a.po",),
963+
("file\n.po",),
964+
("\u2068file.po\u2069",),
965+
("\u2068file a.po\u2069",),
966+
])
967+
def test_enclose_filename_if_necessary_no_change(filename):
968+
assert filename == _enclose_filename_if_necessary(filename)
969+
970+
971+
@pytest.mark.parametrize(("filename",), [
972+
("file a.po",),
973+
("file\ta.po",),
974+
])
975+
def test_enclose_filename_if_necessary_enclosed(filename):
976+
assert "\u2068" + filename + "\u2069" == _enclose_filename_if_necessary(filename)
977+
978+
867979
def test_unknown_language_roundtrip():
868980
buf = StringIO(r'''
869981
msgid ""

0 commit comments

Comments
 (0)