Skip to content

Commit 9131a83

Browse files
committed
Allow extracting nested calls in Javascript
Currently the Javascript extractor does not support nested gettext calls at all. The extraction code was refactored a bit to resemble the Python code as much as possible and support this use-case.
1 parent 4eea9dd commit 9131a83

File tree

2 files changed

+156
-77
lines changed

2 files changed

+156
-77
lines changed

babel/messages/extract.py

+131-77
Original file line numberDiff line numberDiff line change
@@ -704,54 +704,109 @@ def extract_javascript(
704704
:param lineno: line number offset (for parsing embedded fragments)
705705
"""
706706
from babel.messages.jslexer import Token, tokenize, unquote_string
707-
funcname = message_lineno = None
708-
messages = []
709-
last_argument = None
710-
translator_comments = []
711-
concatenate_next = False
707+
712708
encoding = options.get('encoding', 'utf-8')
713-
last_token = None
714-
call_stack = -1
715709
dotted = any('.' in kw for kw in keywords)
710+
last_token = None
711+
# Keep the stack of all function calls and its related contextual variables,
712+
# so we can handle nested gettext calls.
713+
function_stack = []
714+
# Keep track of whether we're in a class or function definition
715+
in_def = False
716+
# Keep track of whether we're in a block of translator comments
717+
in_translator_comments = False
718+
# Keep track of the last encountered translator comments
719+
translator_comments = []
720+
# Keep track of the (split) strings encountered
721+
message_buffer = []
722+
716723
for token in tokenize(
717724
fileobj.read().decode(encoding),
718-
jsx=options.get("jsx", True),
719-
template_string=options.get("template_string", True),
725+
jsx=options.get('jsx', True),
726+
template_string=options.get('template_string', True),
720727
dotted=dotted,
721728
lineno=lineno,
722729
):
723-
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
724-
funcname and # have a keyword...
725-
(last_token and last_token.type == 'name') and # we've seen nothing after the keyword...
726-
token.type == 'template_string' # this is a template string
730+
if token.type == 'name' and token.value in ('class', 'function'):
731+
# We're entering a class or function definition
732+
in_def = True
733+
734+
elif in_def and token.type == 'operator' and token.value in ('(', '{'):
735+
# We're in a class or function definition and should not do anything
736+
in_def = False
737+
continue
738+
739+
elif (
740+
last_token
741+
and last_token.type == 'name'
742+
and last_token.value in keywords
743+
and token.type == 'template_string'
727744
):
728-
message_lineno = token.lineno
729-
messages = [unquote_string(token.value)]
730-
call_stack = 0
745+
# Turn keyword`foo` expressions into keyword("foo") function calls
746+
string_value = unquote_string(token.value)
747+
cur_translator_comments = translator_comments
748+
if function_stack and function_stack[-1]['function_line_no'] == last_token.lineno:
749+
# If our current function call is on the same line as the previous one,
750+
# copy their translator comments, since they also apply to us.
751+
cur_translator_comments = function_stack[-1]['translator_comments']
752+
753+
# We add all information needed later for the current function call
754+
function_stack.append({
755+
'function_line_no': last_token.lineno,
756+
'function_name': last_token.value,
757+
'message_line_no': token.lineno,
758+
'messages': [string_value],
759+
'translator_comments': cur_translator_comments,
760+
})
761+
translator_comments = []
762+
763+
# We act as if we are closing the function call now
731764
token = Token('operator', ')', token.lineno)
732765

733-
if options.get('parse_template_string') and not funcname and token.type == 'template_string':
766+
if (
767+
options.get('parse_template_string')
768+
and (not last_token or last_token.type != 'name' or last_token.value not in keywords)
769+
and token.type == 'template_string'
770+
):
734771
yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno)
735772

736773
elif token.type == 'operator' and token.value == '(':
737-
if funcname:
738-
message_lineno = token.lineno
739-
call_stack += 1
774+
if last_token.type == 'name':
775+
# We're entering a function call
776+
cur_translator_comments = translator_comments
777+
if function_stack and function_stack[-1]['function_line_no'] == token.lineno:
778+
# If our current function call is on the same line as the previous one,
779+
# copy their translator comments, since they also apply to us.
780+
cur_translator_comments = function_stack[-1]['translator_comments']
781+
782+
# We add all information needed later for the current function call
783+
function_stack.append({
784+
'function_line_no': token.lineno,
785+
'function_name': last_token.value,
786+
'message_line_no': None,
787+
'messages': [],
788+
'translator_comments': cur_translator_comments,
789+
})
790+
translator_comments = []
740791

741-
elif call_stack == -1 and token.type == 'linecomment':
792+
elif token.type == 'linecomment':
793+
# Strip the comment token from the line
742794
value = token.value[2:].strip()
743-
if translator_comments and \
744-
translator_comments[-1][0] == token.lineno - 1:
795+
if in_translator_comments and translator_comments[-1][0] == token.lineno - 1:
796+
# We're already inside a translator comment, continue appending
745797
translator_comments.append((token.lineno, value))
746798
continue
747799

748800
for comment_tag in comment_tags:
749801
if value.startswith(comment_tag):
750-
translator_comments.append((token.lineno, value.strip()))
802+
# Comment starts with one of the comment tags,
803+
# so let's start capturing it
804+
in_translator_comments = True
805+
translator_comments.append((token.lineno, value))
751806
break
752807

753808
elif token.type == 'multilinecomment':
754-
# only one multi-line comment may precede a translation
809+
# Only one multi-line comment may precede a translation
755810
translator_comments = []
756811
value = token.value[2:-2].strip()
757812
for comment_tag in comment_tags:
@@ -761,68 +816,67 @@ def extract_javascript(
761816
lines[0] = lines[0].strip()
762817
lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
763818
for offset, line in enumerate(lines):
764-
translator_comments.append((token.lineno + offset,
765-
line))
819+
translator_comments.append((token.lineno + offset, line))
766820
break
767821

768-
elif funcname and call_stack == 0:
822+
elif function_stack and function_stack[-1]['function_name'] in keywords:
823+
# We're inside a translation function call
769824
if token.type == 'operator' and token.value == ')':
770-
if last_argument is not None:
771-
messages.append(last_argument)
772-
if len(messages) > 1:
773-
messages = tuple(messages)
774-
elif messages:
775-
messages = messages[0]
825+
# The call has ended, so we yield the translatable term(s)
826+
messages = function_stack[-1]['messages']
827+
line_no = (
828+
function_stack[-1]['message_line_no']
829+
or function_stack[-1]['function_line_no']
830+
)
831+
cur_translator_comments = function_stack[-1]['translator_comments']
832+
833+
if message_buffer:
834+
messages.append(''.join(message_buffer))
835+
message_buffer.clear()
776836
else:
777-
messages = None
837+
messages.append(None)
778838

779-
# Comments don't apply unless they immediately precede the
780-
# message
781-
if translator_comments and \
782-
translator_comments[-1][0] < message_lineno - 1:
783-
translator_comments = []
839+
messages = tuple(messages) if len(messages) > 1 else messages[0]
840+
if (
841+
cur_translator_comments
842+
and cur_translator_comments[-1][0] < line_no - 1
843+
):
844+
# The translator comments are not immediately preceding the current
845+
# term, so we skip them.
846+
cur_translator_comments = []
784847

785-
if messages is not None:
786-
yield (message_lineno, funcname, messages,
787-
[comment[1] for comment in translator_comments])
848+
yield (
849+
line_no,
850+
function_stack[-1]['function_name'],
851+
messages,
852+
[comment[1] for comment in cur_translator_comments],
853+
)
788854

789-
funcname = message_lineno = last_argument = None
790-
concatenate_next = False
791-
translator_comments = []
792-
messages = []
793-
call_stack = -1
855+
function_stack.pop()
794856

795857
elif token.type in ('string', 'template_string'):
796-
new_value = unquote_string(token.value)
797-
if concatenate_next:
798-
last_argument = (last_argument or '') + new_value
799-
concatenate_next = False
858+
# We've encountered a string inside a translation function call
859+
string_value = unquote_string(token.value)
860+
if not function_stack[-1]['message_line_no']:
861+
function_stack[-1]['message_line_no'] = token.lineno
862+
if string_value is not None:
863+
message_buffer.append(string_value)
864+
865+
elif token.type == 'operator' and token.value == ',':
866+
# End of a function call argument
867+
if message_buffer:
868+
function_stack[-1]['messages'].append(''.join(message_buffer))
869+
message_buffer.clear()
800870
else:
801-
last_argument = new_value
802-
803-
elif token.type == 'operator':
804-
if token.value == ',':
805-
if last_argument is not None:
806-
messages.append(last_argument)
807-
last_argument = None
808-
else:
809-
messages.append(None)
810-
concatenate_next = False
811-
elif token.value == '+':
812-
concatenate_next = True
813-
814-
elif call_stack > 0 and token.type == 'operator' \
815-
and token.value == ')':
816-
call_stack -= 1
817-
818-
elif funcname and call_stack == -1:
819-
funcname = None
820-
821-
elif call_stack == -1 and token.type == 'name' and \
822-
token.value in keywords and \
823-
(last_token is None or last_token.type != 'name' or
824-
last_token.value != 'function'):
825-
funcname = token.value
871+
function_stack[-1]['messages'].append(None)
872+
873+
elif function_stack and token.type == 'operator' and token.value == ')':
874+
function_stack.pop()
875+
876+
if in_translator_comments and translator_comments[-1][0] < token.lineno:
877+
# We have a newline in between the comments, so they don't belong
878+
# together anymore
879+
in_translator_comments = False
826880

827881
last_token = token
828882

tests/messages/test_js_extract.py

+25
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,28 @@ def test_inside_nested_template_string():
191191
)
192192

193193
assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)]
194+
195+
def test_nested_gettext_calls():
196+
buf = BytesIO(b"""\
197+
gettext("Hello %s", gettext("User"));
198+
gettext("Hello %(user)s", { user: gettext("User") });
199+
gettext("Hello %s", dummy.dummyFunction(gettext("User")));
200+
gettext(
201+
"Hello %(user)s",
202+
{ user: dummy.dummyFunction(gettext("User")) },
203+
);
204+
""")
205+
messages = list(
206+
extract.extract('javascript', buf, {"gettext": None}, [], {}),
207+
)
208+
209+
assert messages == [
210+
(1, 'User', [], None),
211+
(1, 'Hello %s', [], None),
212+
(2, 'User', [], None),
213+
(2, 'Hello %(user)s', [], None),
214+
(3, 'User', [], None),
215+
(3, 'Hello %s', [], None),
216+
(6, 'User', [], None),
217+
(5, 'Hello %(user)s', [], None),
218+
]

0 commit comments

Comments
 (0)