@@ -706,54 +706,109 @@ def extract_javascript(
706
706
:param lineno: line number offset (for parsing embedded fragments)
707
707
"""
708
708
from babel .messages .jslexer import Token , tokenize , unquote_string
709
- funcname = message_lineno = None
710
- messages = []
711
- last_argument = None
712
- translator_comments = []
713
- concatenate_next = False
709
+
714
710
encoding = options .get ('encoding' , 'utf-8' )
715
- last_token = None
716
- call_stack = - 1
717
711
dotted = any ('.' in kw for kw in keywords )
712
+ last_token = None
713
+ # Keep the stack of all function calls and its related contextual variables,
714
+ # so we can handle nested gettext calls.
715
+ function_stack = []
716
+ # Keep track of whether we're in a class or function definition
717
+ in_def = False
718
+ # Keep track of whether we're in a block of translator comments
719
+ in_translator_comments = False
720
+ # Keep track of the last encountered translator comments
721
+ translator_comments = []
722
+ # Keep track of the (split) strings encountered
723
+ message_buffer = []
724
+
718
725
for token in tokenize (
719
726
fileobj .read ().decode (encoding ),
720
- jsx = options .get (" jsx" , True ),
721
- template_string = options .get (" template_string" , True ),
727
+ jsx = options .get (' jsx' , True ),
728
+ template_string = options .get (' template_string' , True ),
722
729
dotted = dotted ,
723
730
lineno = lineno ,
724
731
):
725
- if ( # Turn keyword`foo` expressions into keyword("foo") calls:
726
- funcname and # have a keyword...
727
- (last_token and last_token .type == 'name' ) and # we've seen nothing after the keyword...
728
- token .type == 'template_string' # this is a template string
732
+ if token .type == 'name' and token .value in ('class' , 'function' ):
733
+ # We're entering a class or function definition
734
+ in_def = True
735
+
736
+ elif in_def and token .type == 'operator' and token .value in ('(' , '{' ):
737
+ # We're in a class or function definition and should not do anything
738
+ in_def = False
739
+ continue
740
+
741
+ elif (
742
+ last_token
743
+ and last_token .type == 'name'
744
+ and last_token .value in keywords
745
+ and token .type == 'template_string'
729
746
):
730
- message_lineno = token .lineno
731
- messages = [unquote_string (token .value )]
732
- call_stack = 0
747
+ # Turn keyword`foo` expressions into keyword("foo") function calls
748
+ string_value = unquote_string (token .value )
749
+ cur_translator_comments = translator_comments
750
+ if function_stack and function_stack [- 1 ]['function_line_no' ] == last_token .lineno :
751
+ # If our current function call is on the same line as the previous one,
752
+ # copy their translator comments, since they also apply to us.
753
+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
754
+
755
+ # We add all information needed later for the current function call
756
+ function_stack .append ({
757
+ 'function_line_no' : last_token .lineno ,
758
+ 'function_name' : last_token .value ,
759
+ 'message_line_no' : token .lineno ,
760
+ 'messages' : [string_value ],
761
+ 'translator_comments' : cur_translator_comments ,
762
+ })
763
+ translator_comments = []
764
+
765
+ # We act as if we are closing the function call now
733
766
token = Token ('operator' , ')' , token .lineno )
734
767
735
- if options .get ('parse_template_string' ) and not funcname and token .type == 'template_string' :
768
+ if (
769
+ options .get ('parse_template_string' )
770
+ and (not last_token or last_token .type != 'name' or last_token .value not in keywords )
771
+ and token .type == 'template_string'
772
+ ):
736
773
yield from parse_template_string (token .value , keywords , comment_tags , options , token .lineno )
737
774
738
775
elif token .type == 'operator' and token .value == '(' :
739
- if funcname :
740
- message_lineno = token .lineno
741
- call_stack += 1
776
+ if last_token .type == 'name' :
777
+ # We're entering a function call
778
+ cur_translator_comments = translator_comments
779
+ if function_stack and function_stack [- 1 ]['function_line_no' ] == token .lineno :
780
+ # If our current function call is on the same line as the previous one,
781
+ # copy their translator comments, since they also apply to us.
782
+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
783
+
784
+ # We add all information needed later for the current function call
785
+ function_stack .append ({
786
+ 'function_line_no' : token .lineno ,
787
+ 'function_name' : last_token .value ,
788
+ 'message_line_no' : None ,
789
+ 'messages' : [],
790
+ 'translator_comments' : cur_translator_comments ,
791
+ })
792
+ translator_comments = []
742
793
743
- elif call_stack == - 1 and token .type == 'linecomment' :
794
+ elif token .type == 'linecomment' :
795
+ # Strip the comment token from the line
744
796
value = token .value [2 :].strip ()
745
- if translator_comments and \
746
- translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
797
+ if in_translator_comments and translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
798
+ # We're already inside a translator comment, continue appending
747
799
translator_comments .append ((token .lineno , value ))
748
800
continue
749
801
750
802
for comment_tag in comment_tags :
751
803
if value .startswith (comment_tag ):
752
- translator_comments .append ((token .lineno , value .strip ()))
804
+ # Comment starts with one of the comment tags,
805
+ # so let's start capturing it
806
+ in_translator_comments = True
807
+ translator_comments .append ((token .lineno , value ))
753
808
break
754
809
755
810
elif token .type == 'multilinecomment' :
756
- # only one multi-line comment may precede a translation
811
+ # Only one multi-line comment may precede a translation
757
812
translator_comments = []
758
813
value = token .value [2 :- 2 ].strip ()
759
814
for comment_tag in comment_tags :
@@ -763,68 +818,67 @@ def extract_javascript(
763
818
lines [0 ] = lines [0 ].strip ()
764
819
lines [1 :] = dedent ('\n ' .join (lines [1 :])).splitlines ()
765
820
for offset , line in enumerate (lines ):
766
- translator_comments .append ((token .lineno + offset ,
767
- line ))
821
+ translator_comments .append ((token .lineno + offset , line ))
768
822
break
769
823
770
- elif funcname and call_stack == 0 :
824
+ elif function_stack and function_stack [- 1 ]['function_name' ] in keywords :
825
+ # We're inside a translation function call
771
826
if token .type == 'operator' and token .value == ')' :
772
- if last_argument is not None :
773
- messages .append (last_argument )
774
- if len (messages ) > 1 :
775
- messages = tuple (messages )
776
- elif messages :
777
- messages = messages [0 ]
827
+ # The call has ended, so we yield the translatable term(s)
828
+ messages = function_stack [- 1 ]['messages' ]
829
+ line_no = (
830
+ function_stack [- 1 ]['message_line_no' ]
831
+ or function_stack [- 1 ]['function_line_no' ]
832
+ )
833
+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
834
+
835
+ if message_buffer :
836
+ messages .append ('' .join (message_buffer ))
837
+ message_buffer .clear ()
778
838
else :
779
- messages = None
839
+ messages . append ( None )
780
840
781
- # Comments don't apply unless they immediately precede the
782
- # message
783
- if translator_comments and \
784
- translator_comments [- 1 ][0 ] < message_lineno - 1 :
785
- translator_comments = []
841
+ messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
842
+ if (
843
+ cur_translator_comments
844
+ and cur_translator_comments [- 1 ][0 ] < line_no - 1
845
+ ):
846
+ # The translator comments are not immediately preceding the current
847
+ # term, so we skip them.
848
+ cur_translator_comments = []
786
849
787
- if messages is not None :
788
- yield (message_lineno , funcname , messages ,
789
- [comment [1 ] for comment in translator_comments ])
850
+ yield (
851
+ line_no ,
852
+ function_stack [- 1 ]['function_name' ],
853
+ messages ,
854
+ [comment [1 ] for comment in cur_translator_comments ],
855
+ )
790
856
791
- funcname = message_lineno = last_argument = None
792
- concatenate_next = False
793
- translator_comments = []
794
- messages = []
795
- call_stack = - 1
857
+ function_stack .pop ()
796
858
797
859
elif token .type in ('string' , 'template_string' ):
798
- new_value = unquote_string (token .value )
799
- if concatenate_next :
800
- last_argument = (last_argument or '' ) + new_value
801
- concatenate_next = False
860
+ # We've encountered a string inside a translation function call
861
+ string_value = unquote_string (token .value )
862
+ if not function_stack [- 1 ]['message_line_no' ]:
863
+ function_stack [- 1 ]['message_line_no' ] = token .lineno
864
+ if string_value is not None :
865
+ message_buffer .append (string_value )
866
+
867
+ elif token .type == 'operator' and token .value == ',' :
868
+ # End of a function call argument
869
+ if message_buffer :
870
+ function_stack [- 1 ]['messages' ].append ('' .join (message_buffer ))
871
+ message_buffer .clear ()
802
872
else :
803
- last_argument = new_value
804
-
805
- elif token .type == 'operator' :
806
- if token .value == ',' :
807
- if last_argument is not None :
808
- messages .append (last_argument )
809
- last_argument = None
810
- else :
811
- messages .append (None )
812
- concatenate_next = False
813
- elif token .value == '+' :
814
- concatenate_next = True
815
-
816
- elif call_stack > 0 and token .type == 'operator' \
817
- and token .value == ')' :
818
- call_stack -= 1
819
-
820
- elif funcname and call_stack == - 1 :
821
- funcname = None
822
-
823
- elif call_stack == - 1 and token .type == 'name' and \
824
- token .value in keywords and \
825
- (last_token is None or last_token .type != 'name' or
826
- last_token .value != 'function' ):
827
- funcname = token .value
873
+ function_stack [- 1 ]['messages' ].append (None )
874
+
875
+ elif function_stack and token .type == 'operator' and token .value == ')' :
876
+ function_stack .pop ()
877
+
878
+ if in_translator_comments and translator_comments [- 1 ][0 ] < token .lineno :
879
+ # We have a newline in between the comments, so they don't belong
880
+ # together anymore
881
+ in_translator_comments = False
828
882
829
883
last_token = token
830
884
0 commit comments