@@ -106,47 +106,113 @@ def preserve_html_and_escape_text(text):
106
106
escaped_text += html .escape (text [last_end :])
107
107
return escaped_text
108
108
109
- # markdown to html parsing ( v0.737.2)
109
+ # v0.7615
110
110
def markdown_to_html (text ):
111
- try :
112
- # Handle the code blocks with optional language specification first
113
- def replace_codeblock (match ):
114
- codeblock = match .group (2 ) # Get the actual code inside the block
115
- language = match .group (1 ) # Get the language identifier
116
- escaped_code = html .escape (codeblock .strip ())
117
- if language :
118
- return f'<pre><code class="language-{ language } ">{ escaped_code } </code></pre>'
119
- else :
120
- return f'<pre><code>{ escaped_code } </code></pre>'
121
-
122
- # Replace code blocks with <pre><code> tags
123
- text = re .sub (r'```(\w+)?\n([\s\S]*?)```' , replace_codeblock , text )
124
-
125
- # Now handle Markdown links and convert them to HTML
126
- def replace_markdown_link (match ):
127
- link_text = match .group (1 ) # The text to display
128
- url = match .group (2 ) # The URL
129
- return f'<a href="{ html .escape (url )} ">{ html .escape (link_text )} </a>'
130
-
131
- # Replace Markdown links [text](url) with HTML <a> tags
132
- text = re .sub (r'\[([^\]]+)\]\(([^)]+)\)' , replace_markdown_link , text )
133
-
134
- # Handle inline code and other markdown elements
135
- text = re .sub (r'\*\*(.*?)\*\*' , r'<b>\1</b>' , text )
136
- text = re .sub (r'\*(.*?)\*' , r'<i>\1</i>' , text )
137
- text = re .sub (r'_(.*?)_' , r'<i>\1</i>' , text )
138
- text = re .sub (r'`([^`]*)`' , r'<code>\1</code>' , text )
139
- text = re .sub (r'######\s*(.*)' , r'➤ <b>\1</b>' , text )
140
- text = re .sub (r'#####\s*(.*)' , r'➤ <b>\1</b>' , text )
141
- text = re .sub (r'####\s*(.*)' , r'➤ <b>\1</b>' , text )
142
- text = re .sub (r'###\s*(.*)' , r'➤ <b>\1</b>' , text )
143
- text = re .sub (r'##\s*(.*)' , r'➤ <b>\1</b>' , text )
144
- text = re .sub (r'#\s*(.*)' , r'➤ <b>\1</b>' , text )
145
-
146
- return text
111
+ """
112
+ Convert a simple subset of Markdown to HTML,
113
+ ensuring that code blocks are extracted first so they
114
+ don't get accidentally transformed by heading/bold/italic rules.
115
+ """
116
+ # 1) Extract code blocks into placeholders
117
+ code_blocks = []
118
+
119
+ def extract_codeblock (match ):
120
+ language = match .group (1 ) or "" # i.e. "python"
121
+ code_body = match .group (2 ) # the code text
122
+ code_blocks .append ((language , code_body ))
123
+ placeholder_index = len (code_blocks ) - 1
124
+ # Return a placeholder token like [CODEBLOCK_0]
125
+ return f"[CODEBLOCK_{ placeholder_index } ]"
126
+
127
+ # Regex: triple backticks with optional language
128
+ # Use DOTALL ([\s\S]) so it can capture newlines
129
+ text = re .sub (
130
+ r'```(\w+)?\n([\s\S]*?)```' ,
131
+ extract_codeblock ,
132
+ text
133
+ )
134
+
135
+ # 2) Now do the normal Markdown parsing on whatever’s left (outside code blocks)
136
+
137
+ # Headings: only match at the start of lines (via ^) and multiline
138
+ text = re .sub (r'^(######)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
139
+ text = re .sub (r'^(#####)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
140
+ text = re .sub (r'^(####)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
141
+ text = re .sub (r'^(###)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
142
+ text = re .sub (r'^(##)\s+(.*)' , r'➤ <b>\2</b>' , text , flags = re .MULTILINE )
143
+ text = re .sub (r'^#\s+(.*)' , r'➤ <b>\1</b>' , text , flags = re .MULTILINE )
144
+
145
+ # Links of the form [text](url)
146
+ def replace_markdown_link (m ):
147
+ link_text = m .group (1 )
148
+ url = m .group (2 )
149
+ # Escape any HTML entities in the URL or text
150
+ return f'<a href="{ html .escape (url )} ">{ html .escape (link_text )} </a>'
151
+ text = re .sub (r'\[([^\]]+)\]\(([^)]+)\)' , replace_markdown_link , text )
152
+
153
+ # Bold
154
+ text = re .sub (r'\*\*(.*?)\*\*' , r'<b>\1</b>' , text )
155
+
156
+ # Italics: also handle both `*text*` and `_text_`
157
+ text = re .sub (r'\*(.*?)\*' , r'<i>\1</i>' , text )
158
+ text = re .sub (r'_(.*?)_' , r'<i>\1</i>' , text )
159
+
160
+ # Inline code with single backticks
161
+ text = re .sub (r'`([^`]*)`' , r'<code>\1</code>' , text )
162
+
163
+ # 3) Re‐insert the code blocks
164
+ for i , (language , code_body ) in enumerate (code_blocks ):
165
+ escaped_code = html .escape (code_body .strip ())
166
+ if language :
167
+ block_html = f'<pre><code class="language-{ language } ">{ escaped_code } </code></pre>'
168
+ else :
169
+ block_html = f'<pre><code>{ escaped_code } </code></pre>'
170
+ # Replace [CODEBLOCK_i] with the final <pre><code> block
171
+ text = text .replace (f"[CODEBLOCK_{ i } ]" , block_html , 1 )
147
172
148
- except Exception as e :
149
- return str (e )
173
+ return text
174
+
175
+ # # markdown to html parsing (v0.737.2)
176
+ # def markdown_to_html(text):
177
+ # try:
178
+ # # Handle the code blocks with optional language specification first
179
+ # def replace_codeblock(match):
180
+ # codeblock = match.group(2) # Get the actual code inside the block
181
+ # language = match.group(1) # Get the language identifier
182
+ # escaped_code = html.escape(codeblock.strip())
183
+ # if language:
184
+ # return f'<pre><code class="language-{language}">{escaped_code}</code></pre>'
185
+ # else:
186
+ # return f'<pre><code>{escaped_code}</code></pre>'
187
+
188
+ # # Replace code blocks with <pre><code> tags
189
+ # text = re.sub(r'```(\w+)?\n([\s\S]*?)```', replace_codeblock, text)
190
+
191
+ # # Now handle Markdown links and convert them to HTML
192
+ # def replace_markdown_link(match):
193
+ # link_text = match.group(1) # The text to display
194
+ # url = match.group(2) # The URL
195
+ # return f'<a href="{html.escape(url)}">{html.escape(link_text)}</a>'
196
+
197
+ # # Replace Markdown links [text](url) with HTML <a> tags
198
+ # text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_markdown_link, text)
199
+
200
+ # # Handle inline code and other markdown elements
201
+ # text = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', text)
202
+ # text = re.sub(r'\*(.*?)\*', r'<i>\1</i>', text)
203
+ # text = re.sub(r'_(.*?)_', r'<i>\1</i>', text)
204
+ # text = re.sub(r'`([^`]*)`', r'<code>\1</code>', text)
205
+ # text = re.sub(r'######\s*(.*)', r'➤ <b>\1</b>', text)
206
+ # text = re.sub(r'#####\s*(.*)', r'➤ <b>\1</b>', text)
207
+ # text = re.sub(r'####\s*(.*)', r'➤ <b>\1</b>', text)
208
+ # text = re.sub(r'###\s*(.*)', r'➤ <b>\1</b>', text)
209
+ # text = re.sub(r'##\s*(.*)', r'➤ <b>\1</b>', text)
210
+ # text = re.sub(r'#\s*(.*)', r'➤ <b>\1</b>', text)
211
+
212
+ # return text
213
+
214
+ # except Exception as e:
215
+ # return str(e)
150
216
151
217
# Check and update the global rate limit.
152
218
def check_global_rate_limit (max_requests_per_minute , global_request_count , rate_limit_reset_time ):
0 commit comments