30
30
Mapping ,
31
31
MutableSequence ,
32
32
)
33
+ from dataclasses import dataclass
33
34
from functools import lru_cache
34
35
from os .path import relpath
35
36
from textwrap import dedent
36
- from tokenize import COMMENT , NAME , NL , OP , STRING , generate_tokens
37
+ from tokenize import COMMENT , NAME , OP , STRING , generate_tokens
37
38
from typing import TYPE_CHECKING , Any
38
39
39
40
from babel .messages ._compat import find_entrypoints
@@ -99,6 +100,15 @@ def tell(self) -> int: ...
99
100
FSTRING_END = getattr (tokenize , "FSTRING_END" , None )
100
101
101
102
103
+ @dataclass
104
+ class FunctionStackItem :
105
+ function_lineno : int
106
+ function_name : str
107
+ message_lineno : int | None
108
+ messages : list [str | None ]
109
+ translator_comments : list [tuple [int , str ]]
110
+
111
+
102
112
def _strip_comment_tags (comments : MutableSequence [str ], tags : Iterable [str ]):
103
113
"""Helper function for `extract` that strips comment tags from strings
104
114
in a list of comment lines. This functions operates in-place.
@@ -507,14 +517,6 @@ def extract_python(
507
517
:param options: a dictionary of additional options (optional)
508
518
:rtype: ``iterator``
509
519
"""
510
- funcname = lineno = message_lineno = None
511
- call_stack = - 1
512
- buf = []
513
- messages = []
514
- translator_comments = []
515
- in_def = in_translator_comments = False
516
- comment_tag = None
517
-
518
520
encoding = parse_encoding (fileobj ) or options .get ('encoding' , 'UTF-8' )
519
521
future_flags = parse_future_flags (fileobj , encoding )
520
522
next_line = lambda : fileobj .readline ().decode (encoding )
@@ -525,108 +527,148 @@ def extract_python(
525
527
# currently parsing one.
526
528
current_fstring_start = None
527
529
530
+ # Keep the stack of all function calls and its related contextual variables,
531
+ # so we can handle nested gettext calls.
532
+ function_stack : list [FunctionStackItem ] = []
533
+ # Keep the last encountered function/variable name for when we encounter
534
+ # an opening parenthesis
535
+ last_name = None
536
+ # Keep track of whether we're in a class or function definition
537
+ in_def = False
538
+ # Keep track of whether we're in a block of translator comments
539
+ in_translator_comments = False
540
+ # Keep track of the last encountered translator comments
541
+ translator_comments = []
542
+ # Keep track of the (split) strings encountered
543
+ message_buffer = []
544
+
528
545
for tok , value , (lineno , _ ), _ , _ in tokens :
529
- if call_stack == - 1 and tok == NAME and value in ('def' , 'class' ):
546
+ if tok == NAME and value in ('def' , 'class' ):
547
+ # We're entering a class or function definition
530
548
in_def = True
531
- elif tok == OP and value == '(' :
532
- if in_def :
533
- # Avoid false positives for declarations such as:
534
- # def gettext(arg='message'):
535
- in_def = False
536
- continue
537
- if funcname :
538
- call_stack += 1
539
- elif in_def and tok == OP and value == ':' :
540
- # End of a class definition without parens
549
+ continue
550
+
551
+ elif in_def and tok == OP and value in ('(' , ':' ):
552
+ # We're in a class or function definition and should not do anything
541
553
in_def = False
542
554
continue
543
- elif call_stack == - 1 and tok == COMMENT :
555
+
556
+ elif tok == OP and value == '(' and last_name :
557
+ # We're entering a function call
558
+ cur_translator_comments = translator_comments
559
+ if function_stack and function_stack [- 1 ].function_lineno == lineno :
560
+ # If our current function call is on the same line as the previous one,
561
+ # copy their translator comments, since they also apply to us.
562
+ cur_translator_comments = function_stack [- 1 ].translator_comments
563
+
564
+ # We add all information needed later for the current function call
565
+ function_stack .append (FunctionStackItem (
566
+ function_lineno = lineno ,
567
+ function_name = last_name ,
568
+ message_lineno = None ,
569
+ messages = [],
570
+ translator_comments = cur_translator_comments ,
571
+ ))
572
+ translator_comments = []
573
+ message_buffer .clear ()
574
+
575
+ elif tok == COMMENT :
544
576
# Strip the comment token from the line
545
577
value = value [1 :].strip ()
546
- if in_translator_comments and \
547
- translator_comments [- 1 ][0 ] == lineno - 1 :
578
+ if in_translator_comments and translator_comments [- 1 ][0 ] == lineno - 1 :
548
579
# We're already inside a translator comment, continue appending
549
580
translator_comments .append ((lineno , value ))
550
581
continue
551
- # If execution reaches this point, let's see if comment line
552
- # starts with one of the comment tags
582
+
553
583
for comment_tag in comment_tags :
554
584
if value .startswith (comment_tag ):
585
+ # Comment starts with one of the comment tags,
586
+ # so let's start capturing it
555
587
in_translator_comments = True
556
588
translator_comments .append ((lineno , value ))
557
589
break
558
- elif funcname and call_stack == 0 :
559
- nested = (tok == NAME and value in keywords )
560
- if (tok == OP and value == ')' ) or nested :
561
- if buf :
562
- messages .append ('' .join (buf ))
563
- del buf [:]
590
+
591
+ elif function_stack and function_stack [- 1 ].function_name in keywords :
592
+ # We're inside a translation function call
593
+ if tok == OP and value == ')' :
594
+ # The call has ended, so we yield the translatable term(s)
595
+ messages = function_stack [- 1 ].messages
596
+ lineno = (
597
+ function_stack [- 1 ].message_lineno
598
+ or function_stack [- 1 ].function_lineno
599
+ )
600
+ cur_translator_comments = function_stack [- 1 ].translator_comments
601
+
602
+ if message_buffer :
603
+ messages .append ('' .join (message_buffer ))
604
+ message_buffer .clear ()
564
605
else :
565
606
messages .append (None )
566
607
567
608
messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
568
- # Comments don't apply unless they immediately
569
- # precede the message
570
- if translator_comments and \
571
- translator_comments [- 1 ][0 ] < message_lineno - 1 :
572
- translator_comments = []
609
+ if (
610
+ cur_translator_comments
611
+ and cur_translator_comments [- 1 ][0 ] < lineno - 1
612
+ ):
613
+ # The translator comments are not immediately preceding the current
614
+ # term, so we skip them.
615
+ cur_translator_comments = []
616
+
617
+ yield (
618
+ lineno ,
619
+ function_stack [- 1 ].function_name ,
620
+ messages ,
621
+ [comment [1 ] for comment in cur_translator_comments ],
622
+ )
623
+
624
+ function_stack .pop ()
573
625
574
- yield (message_lineno , funcname , messages ,
575
- [comment [1 ] for comment in translator_comments ])
576
-
577
- funcname = lineno = message_lineno = None
578
- call_stack = - 1
579
- messages = []
580
- translator_comments = []
581
- in_translator_comments = False
582
- if nested :
583
- funcname = value
584
626
elif tok == STRING :
585
- val = _parse_python_string (value , encoding , future_flags )
586
- if val is not None :
587
- if not message_lineno :
588
- message_lineno = lineno
589
- buf .append (val )
627
+ # We've encountered a string inside a translation function call
628
+ string_value = _parse_python_string (value , encoding , future_flags )
629
+ if not function_stack [- 1 ].message_lineno :
630
+ function_stack [- 1 ].message_lineno = lineno
631
+ if string_value is not None :
632
+ message_buffer .append (string_value )
590
633
591
634
# Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
592
635
elif tok == FSTRING_START :
593
636
current_fstring_start = value
594
- if not message_lineno :
595
- message_lineno = lineno
596
637
elif tok == FSTRING_MIDDLE :
597
638
if current_fstring_start is not None :
598
639
current_fstring_start += value
599
640
elif tok == FSTRING_END :
600
641
if current_fstring_start is not None :
601
642
fstring = current_fstring_start + value
602
- val = _parse_python_string (fstring , encoding , future_flags )
603
- if val is not None :
604
- buf .append (val )
643
+ string_value = _parse_python_string (fstring , encoding , future_flags )
644
+ if string_value is not None :
645
+ message_buffer .append (string_value )
605
646
606
647
elif tok == OP and value == ',' :
607
- if buf :
608
- messages .append ('' .join (buf ))
609
- del buf [:]
648
+ # End of a function call argument
649
+ if message_buffer :
650
+ function_stack [- 1 ].messages .append ('' .join (message_buffer ))
651
+ message_buffer .clear ()
610
652
else :
611
- messages .append (None )
612
- if translator_comments :
613
- # We have translator comments, and since we're on a
614
- # comma(,) user is allowed to break into a new line
615
- # Let's increase the last comment's lineno in order
616
- # for the comment to still be a valid one
617
- old_lineno , old_comment = translator_comments .pop ()
618
- translator_comments .append ((old_lineno + 1 , old_comment ))
619
-
620
- elif tok != NL and not message_lineno :
621
- message_lineno = lineno
622
- elif call_stack > 0 and tok == OP and value == ')' :
623
- call_stack -= 1
624
- elif funcname and call_stack == - 1 :
625
- funcname = None
626
- elif tok == NAME and value in keywords :
627
- funcname = value
653
+ function_stack [- 1 ].messages .append (None )
628
654
629
- if current_fstring_start is not None and tok not in {FSTRING_START , FSTRING_MIDDLE }:
655
+ elif function_stack and tok == OP and value == ')' :
656
+ function_stack .pop ()
657
+
658
+ if in_translator_comments and translator_comments [- 1 ][0 ] < lineno :
659
+ # We have a newline in between the comments, so they don't belong
660
+ # together anymore
661
+ in_translator_comments = False
662
+
663
+ if tok == NAME :
664
+ last_name = value
665
+ if function_stack and not function_stack [- 1 ].message_lineno :
666
+ function_stack [- 1 ].message_lineno = lineno
667
+
668
+ if (
669
+ current_fstring_start is not None
670
+ and tok not in {FSTRING_START , FSTRING_MIDDLE }
671
+ ):
630
672
# In Python 3.12, tokens other than FSTRING_* mean the
631
673
# f-string is dynamic, so we don't wan't to extract it.
632
674
# And if it's FSTRING_END, we've already handled it above.
0 commit comments