Skip to content

Commit 4df7e66

Browse files
committed
Allow extracting nested calls in Javascript
Currently the Javascript extractor does not support nested gettext calls at all. The extraction code was refactored a bit to resemble the Python code as much as possible and support this use-case.
1 parent 71b33d0 commit 4df7e66

File tree

2 files changed

+156
-77
lines changed

2 files changed

+156
-77
lines changed

babel/messages/extract.py

Lines changed: 131 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -706,54 +706,109 @@ def extract_javascript(
706706
:param lineno: line number offset (for parsing embedded fragments)
707707
"""
708708
from babel.messages.jslexer import Token, tokenize, unquote_string
709-
funcname = message_lineno = None
710-
messages = []
711-
last_argument = None
712-
translator_comments = []
713-
concatenate_next = False
709+
714710
encoding = options.get('encoding', 'utf-8')
715-
last_token = None
716-
call_stack = -1
717711
dotted = any('.' in kw for kw in keywords)
712+
last_token = None
713+
# Keep the stack of all function calls and its related contextual variables,
714+
# so we can handle nested gettext calls.
715+
function_stack = []
716+
# Keep track of whether we're in a class or function definition
717+
in_def = False
718+
# Keep track of whether we're in a block of translator comments
719+
in_translator_comments = False
720+
# Keep track of the last encountered translator comments
721+
translator_comments = []
722+
# Keep track of the (split) strings encountered
723+
message_buffer = []
724+
718725
for token in tokenize(
719726
fileobj.read().decode(encoding),
720-
jsx=options.get("jsx", True),
721-
template_string=options.get("template_string", True),
727+
jsx=options.get('jsx', True),
728+
template_string=options.get('template_string', True),
722729
dotted=dotted,
723730
lineno=lineno,
724731
):
725-
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
726-
funcname and # have a keyword...
727-
(last_token and last_token.type == 'name') and # we've seen nothing after the keyword...
728-
token.type == 'template_string' # this is a template string
732+
if token.type == 'name' and token.value in ('class', 'function'):
733+
# We're entering a class or function definition
734+
in_def = True
735+
736+
elif in_def and token.type == 'operator' and token.value in ('(', '{'):
737+
# We're in a class or function definition and should not do anything
738+
in_def = False
739+
continue
740+
741+
elif (
742+
last_token
743+
and last_token.type == 'name'
744+
and last_token.value in keywords
745+
and token.type == 'template_string'
729746
):
730-
message_lineno = token.lineno
731-
messages = [unquote_string(token.value)]
732-
call_stack = 0
747+
# Turn keyword`foo` expressions into keyword("foo") function calls
748+
string_value = unquote_string(token.value)
749+
cur_translator_comments = translator_comments
750+
if function_stack and function_stack[-1]['function_line_no'] == last_token.lineno:
751+
# If our current function call is on the same line as the previous one,
752+
# copy their translator comments, since they also apply to us.
753+
cur_translator_comments = function_stack[-1]['translator_comments']
754+
755+
# We add all information needed later for the current function call
756+
function_stack.append({
757+
'function_line_no': last_token.lineno,
758+
'function_name': last_token.value,
759+
'message_line_no': token.lineno,
760+
'messages': [string_value],
761+
'translator_comments': cur_translator_comments,
762+
})
763+
translator_comments = []
764+
765+
# We act as if we are closing the function call now
733766
token = Token('operator', ')', token.lineno)
734767

735-
if options.get('parse_template_string') and not funcname and token.type == 'template_string':
768+
if (
769+
options.get('parse_template_string')
770+
and (not last_token or last_token.type != 'name' or last_token.value not in keywords)
771+
and token.type == 'template_string'
772+
):
736773
yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno)
737774

738775
elif token.type == 'operator' and token.value == '(':
739-
if funcname:
740-
message_lineno = token.lineno
741-
call_stack += 1
776+
if last_token.type == 'name':
777+
# We're entering a function call
778+
cur_translator_comments = translator_comments
779+
if function_stack and function_stack[-1]['function_line_no'] == token.lineno:
780+
# If our current function call is on the same line as the previous one,
781+
# copy their translator comments, since they also apply to us.
782+
cur_translator_comments = function_stack[-1]['translator_comments']
783+
784+
# We add all information needed later for the current function call
785+
function_stack.append({
786+
'function_line_no': token.lineno,
787+
'function_name': last_token.value,
788+
'message_line_no': None,
789+
'messages': [],
790+
'translator_comments': cur_translator_comments,
791+
})
792+
translator_comments = []
742793

743-
elif call_stack == -1 and token.type == 'linecomment':
794+
elif token.type == 'linecomment':
795+
# Strip the comment token from the line
744796
value = token.value[2:].strip()
745-
if translator_comments and \
746-
translator_comments[-1][0] == token.lineno - 1:
797+
if in_translator_comments and translator_comments[-1][0] == token.lineno - 1:
798+
# We're already inside a translator comment, continue appending
747799
translator_comments.append((token.lineno, value))
748800
continue
749801

750802
for comment_tag in comment_tags:
751803
if value.startswith(comment_tag):
752-
translator_comments.append((token.lineno, value.strip()))
804+
# Comment starts with one of the comment tags,
805+
# so let's start capturing it
806+
in_translator_comments = True
807+
translator_comments.append((token.lineno, value))
753808
break
754809

755810
elif token.type == 'multilinecomment':
756-
# only one multi-line comment may precede a translation
811+
# Only one multi-line comment may precede a translation
757812
translator_comments = []
758813
value = token.value[2:-2].strip()
759814
for comment_tag in comment_tags:
@@ -763,68 +818,67 @@ def extract_javascript(
763818
lines[0] = lines[0].strip()
764819
lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
765820
for offset, line in enumerate(lines):
766-
translator_comments.append((token.lineno + offset,
767-
line))
821+
translator_comments.append((token.lineno + offset, line))
768822
break
769823

770-
elif funcname and call_stack == 0:
824+
elif function_stack and function_stack[-1]['function_name'] in keywords:
825+
# We're inside a translation function call
771826
if token.type == 'operator' and token.value == ')':
772-
if last_argument is not None:
773-
messages.append(last_argument)
774-
if len(messages) > 1:
775-
messages = tuple(messages)
776-
elif messages:
777-
messages = messages[0]
827+
# The call has ended, so we yield the translatable term(s)
828+
messages = function_stack[-1]['messages']
829+
line_no = (
830+
function_stack[-1]['message_line_no']
831+
or function_stack[-1]['function_line_no']
832+
)
833+
cur_translator_comments = function_stack[-1]['translator_comments']
834+
835+
if message_buffer:
836+
messages.append(''.join(message_buffer))
837+
message_buffer.clear()
778838
else:
779-
messages = None
839+
messages.append(None)
780840

781-
# Comments don't apply unless they immediately precede the
782-
# message
783-
if translator_comments and \
784-
translator_comments[-1][0] < message_lineno - 1:
785-
translator_comments = []
841+
messages = tuple(messages) if len(messages) > 1 else messages[0]
842+
if (
843+
cur_translator_comments
844+
and cur_translator_comments[-1][0] < line_no - 1
845+
):
846+
# The translator comments are not immediately preceding the current
847+
# term, so we skip them.
848+
cur_translator_comments = []
786849

787-
if messages is not None:
788-
yield (message_lineno, funcname, messages,
789-
[comment[1] for comment in translator_comments])
850+
yield (
851+
line_no,
852+
function_stack[-1]['function_name'],
853+
messages,
854+
[comment[1] for comment in cur_translator_comments],
855+
)
790856

791-
funcname = message_lineno = last_argument = None
792-
concatenate_next = False
793-
translator_comments = []
794-
messages = []
795-
call_stack = -1
857+
function_stack.pop()
796858

797859
elif token.type in ('string', 'template_string'):
798-
new_value = unquote_string(token.value)
799-
if concatenate_next:
800-
last_argument = (last_argument or '') + new_value
801-
concatenate_next = False
860+
# We've encountered a string inside a translation function call
861+
string_value = unquote_string(token.value)
862+
if not function_stack[-1]['message_line_no']:
863+
function_stack[-1]['message_line_no'] = token.lineno
864+
if string_value is not None:
865+
message_buffer.append(string_value)
866+
867+
elif token.type == 'operator' and token.value == ',':
868+
# End of a function call argument
869+
if message_buffer:
870+
function_stack[-1]['messages'].append(''.join(message_buffer))
871+
message_buffer.clear()
802872
else:
803-
last_argument = new_value
804-
805-
elif token.type == 'operator':
806-
if token.value == ',':
807-
if last_argument is not None:
808-
messages.append(last_argument)
809-
last_argument = None
810-
else:
811-
messages.append(None)
812-
concatenate_next = False
813-
elif token.value == '+':
814-
concatenate_next = True
815-
816-
elif call_stack > 0 and token.type == 'operator' \
817-
and token.value == ')':
818-
call_stack -= 1
819-
820-
elif funcname and call_stack == -1:
821-
funcname = None
822-
823-
elif call_stack == -1 and token.type == 'name' and \
824-
token.value in keywords and \
825-
(last_token is None or last_token.type != 'name' or
826-
last_token.value != 'function'):
827-
funcname = token.value
873+
function_stack[-1]['messages'].append(None)
874+
875+
elif function_stack and token.type == 'operator' and token.value == ')':
876+
function_stack.pop()
877+
878+
if in_translator_comments and translator_comments[-1][0] < token.lineno:
879+
# We have a newline in between the comments, so they don't belong
880+
# together anymore
881+
in_translator_comments = False
828882

829883
last_token = token
830884

tests/messages/test_js_extract.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,28 @@ def test_inside_nested_template_string():
191191
)
192192

193193
assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)]
194+
195+
def test_nested_gettext_calls():
196+
buf = BytesIO(b"""\
197+
gettext("Hello %s", gettext("User"));
198+
gettext("Hello %(user)s", { user: gettext("User") });
199+
gettext("Hello %s", dummy.dummyFunction(gettext("User")));
200+
gettext(
201+
"Hello %(user)s",
202+
{ user: dummy.dummyFunction(gettext("User")) },
203+
);
204+
""")
205+
messages = list(
206+
extract.extract('javascript', buf, {"gettext": None}, [], {}),
207+
)
208+
209+
assert messages == [
210+
(1, 'User', [], None),
211+
(1, 'Hello %s', [], None),
212+
(2, 'User', [], None),
213+
(2, 'Hello %(user)s', [], None),
214+
(3, 'User', [], None),
215+
(3, 'Hello %s', [], None),
216+
(6, 'User', [], None),
217+
(5, 'Hello %(user)s', [], None),
218+
]

0 commit comments

Comments
 (0)