Skip to content

Commit a8d971f

Browse files
add check version diff tools (#2050)
## Versions - [ ] dev - [ ] 3.0 - [ ] 2.1 - [ ] 2.0 ## Languages - [ ] Chinese - [ ] English ## Docs Checklist - [ ] Checked by AI - [ ] Test Cases Built
1 parent db77c1d commit a8d971f

File tree

1 file changed

+140
-0
lines changed

1 file changed

+140
-0
lines changed

check_docs_version_diff.py

+140
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import json
2+
import os
3+
import filecmp
4+
5+
# Define a list of keywords to exclude
6+
exclude_keywords = ["sql-manual", "releasenotes", "ecosystem", "admin-manual", "faq",
7+
"data-operate", "query-data", "table-design", "gettingStarted",
8+
"query-acceleration", "lakehouse", "compute-storage-decoupled",
9+
"benchmark", "db-connect", "deploy-on-kubernetes"]
10+
11+
version_21_prefix_cn = "./ii18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/"
12+
version_30_prefix_cn = "./i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/"
13+
version_dev_prefix_cn = "./i18n/zh-CN/docusaurus-plugin-content-docs/current/"
14+
15+
version_21_prefix_en = "./versioned_docs/version-2.1/"
16+
version_30_prefix_en = "./versioned_docs/version-3.0/"
17+
version_dev_prefix_en = "./docs/"
18+
19+
20+
21+
def extract_items_from_file(file_path):
22+
"""Read the JSON file, extract 'items', and filter them based on exclusion criteria."""
23+
result = [] # List to store the filtered items
24+
25+
try:
26+
with open(file_path, 'r', encoding='utf-8') as file:
27+
data = json.load(file) # Load the JSON data
28+
29+
# Recursive function to extract items and filter them
30+
def extract_items(data):
31+
"""Recursively extract 'items' and store them in result, excluding specified keywords."""
32+
if isinstance(data, list):
33+
for item in data:
34+
extract_items(item) # Process each item in the list
35+
36+
elif isinstance(data, dict):
37+
if "items" in data:
38+
# Add valid items (not containing excluded keywords)
39+
result.extend([item for item in data["items"] if isinstance(item, str) and not any(keyword in item for keyword in exclude_keywords)])
40+
41+
# Recursively process each key in the dictionary
42+
for key in data:
43+
extract_items(data[key])
44+
45+
extract_items(data) # Start extracting items from the loaded JSON data
46+
47+
return result # Return the list of filtered items
48+
49+
except (FileNotFoundError, json.JSONDecodeError) as e:
50+
print(f"Error: {e}")
51+
return []
52+
except Exception as e:
53+
print(f"An unexpected error occurred: {e}")
54+
return []
55+
56+
57+
def diff_doc_cn(directories):
58+
"""Generate three paths for each directory using predefined version prefixes and check file existence and content differences."""
59+
for directory in directories:
60+
# Construct the paths for each version
61+
path_v21 = os.path.join(version_21_prefix_cn, directory + ".md")
62+
path_v30 = os.path.join(version_30_prefix_cn, directory + ".md")
63+
path_dev = os.path.join(version_dev_prefix_cn, directory + ".md")
64+
65+
# Check if the file is missing in v21 or v30
66+
if not os.path.exists(path_v21):
67+
print(f"Missing in version 2.1: {path_v21}")
68+
69+
if not os.path.exists(path_v30):
70+
print(f"Missing in version 3.0: {path_v30}")
71+
72+
if not os.path.exists(path_dev):
73+
print(f"Missing in current (dev) version: {path_dev}")
74+
75+
# Compare path_v21 with path_dev if path_v21 exists
76+
if os.path.exists(path_v21) and os.path.exists(path_dev):
77+
if not filecmp.cmp(path_v21, path_dev, shallow=False):
78+
print(f"File mismatch between v21 and dev for: {directory}")
79+
print("path_dev: " + path_dev)
80+
print("path_v21: " + path_v21)
81+
print("-" * 50)
82+
83+
# Compare path_v30 with path_dev if path_v30 exists
84+
if os.path.exists(path_v30) and os.path.exists(path_dev):
85+
if not filecmp.cmp(path_v30, path_dev, shallow=False):
86+
print(f"File mismatch between v30 and dev for: {directory}")
87+
print("path_dev: " + path_dev)
88+
print("path_v30: " + path_v30)
89+
print("-" * 50)
90+
91+
92+
def diff_doc_en(directories):
93+
"""Generate three paths for each directory using predefined version prefixes and check file existence and content differences."""
94+
for directory in directories:
95+
# Construct the paths for each version
96+
path_v21 = os.path.join(version_21_prefix_en, directory + ".md")
97+
path_v30 = os.path.join(version_30_prefix_en, directory + ".md")
98+
path_dev = os.path.join(version_dev_prefix_en, directory + ".md")
99+
100+
# Check if the file is missing in v21 or v30
101+
if not os.path.exists(path_v21):
102+
print(f"Missing in version 2.1: {path_v21}")
103+
104+
if not os.path.exists(path_v30):
105+
print(f"Missing in version 3.0: {path_v30}")
106+
107+
if not os.path.exists(path_dev):
108+
print(f"Missing in current (dev) version: {path_dev}")
109+
110+
# Compare path_v21 with path_dev if path_v21 exists
111+
if os.path.exists(path_v21) and os.path.exists(path_dev):
112+
if not filecmp.cmp(path_v21, path_dev, shallow=False):
113+
print(f"File mismatch between v21 and dev for: {directory}")
114+
print("path_dev: " + path_dev)
115+
print("path_v21: " + path_v21)
116+
print("-" * 50)
117+
118+
# Compare path_v30 with path_dev if path_v30 exists
119+
if os.path.exists(path_v30) and os.path.exists(path_dev):
120+
if not filecmp.cmp(path_v30, path_dev, shallow=False):
121+
print(f"File mismatch between v30 and dev for: {directory}")
122+
print("path_dev: " + path_dev)
123+
print("path_v30: " + path_v30)
124+
print("-" * 50)
125+
126+
127+
128+
129+
130+
if __name__ == "__main__":
131+
# Fixed file path to sidebars.json
132+
file_path = "./sidebars.json" # Fixed file path
133+
134+
file_list = extract_items_from_file(file_path) # Extract items and get the result
135+
136+
# Call diff_doc to check for mismatched documents
137+
print("Checking CN Doc >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
138+
diff_doc_cn(file_list)
139+
print("Checking EN Doc >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
140+
diff_doc_en(file_list)

0 commit comments

Comments
 (0)