需求

TeX 文件中手动输入的列表项不会自动按照字母顺序进行排序,需要借助程序节省排序的工作量。同样的,对于数学论文,一般使用英文标点,每次手动替换也比较麻烦。

利用 Copilot 自动生成了 python 代码,在半个小时之内就完成了编写,非常的方便。不禁让人思考,编程作为与计算机沟通的语言,已经被大语言模型完美的实现了,那么想要超越AI,就需要更加深入和充满智慧才行。

实现

利用以下代码可以实现识别 description 列表环境,并对 \item[] 中的项按字母排序。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import re
"""
This script sorts the items within LaTeX description environments in a given .tex file.
Functions:
sort_description_items(tex_content):
Sorts the items within all description environments in the provided LaTeX content.
Args:
tex_content (str): The content of the LaTeX file as a string.
Returns:
str: The LaTeX content with sorted description items.
main(input_file):
Reads the LaTeX file, sorts the description items, and writes the sorted content back to the file.
Args:
input_file (str): The path to the LaTeX file to be processed.
Usage:
python sortDescription.py <input_file>
<input_file>: The path to the LaTeX file to be processed.
"""
import sys

def sort_description_items(tex_content):
pattern = re.compile(r'(\\begin\{description\}(?:\[.*?\])?)(.*?)(\\end\{description\})', re.DOTALL)
descriptions = pattern.findall(tex_content)

for begin, content, end in descriptions:
# 保护所有列表环境(enumerate/itemize/enumrate),替换为占位符
list_env_pattern = r'(\\begin\{(enumerate|itemize|enumrate)\}.*?\\end\{\2\})'
list_env_dict = {}
def replace_list(match):
token = f"%%LIST_{len(list_env_dict)}%%"
list_env_dict[token] = match.group(0)
return token

protected_content = re.sub(list_env_pattern, replace_list, content, flags=re.DOTALL)

# 匹配 description 环境中的顶级 \item
items = re.findall(r'\\item\[(.*?)\](.*?)(?=\\item|\Z)', protected_content, re.DOTALL)
sorted_items = sorted(items, key=lambda x: x[0])
sorted_content = ''.join([f'\\item[{item[0]}]{item[1]}' for item in sorted_items])

# 恢复之前保护的列表环境内容
for token, env_text in list_env_dict.items():
sorted_content = sorted_content.replace(token, env_text)

sorted_desc = begin + '\n' + sorted_content + end
tex_content = tex_content.replace(begin + content + end, sorted_desc)

return tex_content

def main(input_file):
with open(input_file, 'r', encoding='utf-8') as file:
tex_content = file.read()

sorted_tex_content = sort_description_items(tex_content)

with open(input_file, 'w', encoding='utf-8') as file:
file.write(sorted_tex_content)

if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python sortDescription.py <input_file>")
else:
main(sys.argv[1])

同样利用如下的代码可以实现自动替换标点:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import sys
import re

"""
该脚本用于替换给定 .tex 文件中的中文逗号“,”为英文逗号加空格", ",
以及中文句号“。”为英文句号加空格". ".
用法:
python replaceChineseComma.py <input_file>
<input_file>: 要处理的 .tex 文件路径.
"""

def replace_chinese_punctuation(tex_content):
# 替换中文逗号为英文逗号加空格
text = re.sub(r',', ', ', tex_content)
# 替换中文句号为英文句号加空格
text = re.sub(r'。', '. ', text)
return text

def main(input_file):
with open(input_file, 'r', encoding='utf-8') as file:
tex_content = file.read()

new_content = replace_chinese_punctuation(tex_content)

with open(input_file, 'w', encoding='utf-8') as file:
file.write(new_content)

if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python replaceChineseComma.py <input_file>")
else:
main(sys.argv[1])

在工作区主目录下的 .vscode 中添加 tasks.json 文件,加入以下代码即可将python文件作为任务,更方便的调用。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
{
"version": "2.0.0",
"tasks": [
{
"label": "sort-description-items",
"type": "shell",
"command": "python",
"args": [
"${workspaceFolder}/sortDescription.py",
"${file}"
],
"presentation": {
"echo": true,
"reveal": "silent",
"focus": false,
"panel": "shared"
},
"problemMatcher": []
},
{
"label": "replace-chinese-punctuation",
"type": "shell",
"command": "python",
"args": [
"${workspaceFolder}/replaceChinese.py",
"${file}"
],
"presentation": {
"echo": true,
"reveal": "silent",
"focus": false,
"panel": "shared"
},
"problemMatcher": []
}
]
}