Markdown 图片路径批量处理脚本

——专为 MkDocs 等静态站点生成器设计的智能路径转换工具

📌 功能概述

本脚本用于自动化处理 Markdown 文档中的 <img> 标签图片路径，解决本地开发预览与线上网站部署时路径不一致的痛点问题。

✨ 核心特性

双向智能转换：支持正向（添加 ../）和反向（移除 ../）路径转换
代码块保护：自动识别并跳过代码块（...）内的所有内容
语法兼容：仅处理 <img> HTML 标签，完全忽略 ![]() Markdown 语法
灵活输入：支持单个文件处理和目录递归处理
安全输出：可选择原地修改或输出到新位置，保护原始文件
结构保持：目录模式下完整保留原有的文件夹层级结构

🌟脚本代码

img_path_processor.py代码如下：


x
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3

4
import re
5
import argparse
6
from pathlib import Path
7

8
def split_code_blocks(content: str) -> list:
9
    """
10
    将内容分割为代码块和非代码块部分
11
    """
12
    code_block_pattern = r'(```(?:[a-zA-Z0-9_+-]*\s*)?\n[\s\S]*?```(?:\n|$))'
13
    parts = re.split(code_block_pattern, content)
14
    
15
    result = []
16
    for i, part in enumerate(parts):
17
        if i % 2 == 0:
18
            result.append(('text', part))
19
        else:
20
            result.append(('code', part))
21
    
22
    return result
23

24
def process_non_code_content(content: str, reverse: bool) -> str:
25
    """
26
    处理非代码块内容中的 <img> 标签
27
    """
28
    img_pattern = r'<img\s+src=["\']([^"\']+)["\'][^>]*>'
29
    
30
    def replace_img_path(match):
31
        full_match = match.group(0)
32
        src_path = match.group(1)
33
        
34
        if reverse:
35
            if src_path.startswith('../'):
36
                new_src = src_path[3:]
37
                if f'src="{src_path}"' in full_match:
38
                    return full_match.replace(f'src="{src_path}"', f'src="{new_src}"')
39
                elif f"src='{src_path}'" in full_match:
40
                    return full_match.replace(f"src='{src_path}'", f"src='{new_src}'")
41
                else:
42
                    return re.sub(r'src=["\']' + re.escape(src_path) + r'["\']', 
43
                                f'src="{new_src}"', full_match, count=1)
44
            else:
45
                return full_match
46
        else:
47
            if not src_path.startswith('../'):
48
                new_src = '../' + src_path
49
                if f'src="{src_path}"' in full_match:
50
                    return full_match.replace(f'src="{src_path}"', f'src="{new_src}"')
51
                elif f"src='{src_path}'" in full_match:
52
                    return full_match.replace(f"src='{src_path}'", f"src='{new_src}'")
53
                else:
54
                    return re.sub(r'src=["\']' + re.escape(src_path) + r'["\']', 
55
                                f'src="{new_src}"', full_match, count=1)
56
            else:
57
                return full_match
58
    
59
    return re.sub(img_pattern, replace_img_path, content, flags=re.IGNORECASE)
60

61
def process_md_content(content: str, reverse: bool) -> str:
62
    """
63
    处理完整的 Markdown 内容（排除代码块）
64
    """
65
    parts = split_code_blocks(content)
66
    modified_parts = []
67
    for part_type, part_content in parts:
68
        if part_type == 'text':
69
            processed_content = process_non_code_content(part_content, reverse)
70
            modified_parts.append(processed_content)
71
        else:
72
            modified_parts.append(part_content)
73
    
74
    return ''.join(modified_parts)
75

76
def process_single_file(input_file: Path, output_file: Path, reverse: bool) -> bool:
77
    """
78
    处理单个 Markdown 文件
79
    """
80
    try:
81
        with open(input_file, 'r', encoding='utf-8') as f:
82
            content = f.read()
83
        
84
        processed_content = process_md_content(content, reverse)
85
        
86
        # 创建输出目录（如果需要）
87
        output_file.parent.mkdir(parents=True, exist_ok=True)
88
        
89
        with open(output_file, 'w', encoding='utf-8') as f:
90
            f.write(processed_content)
91
        
92
        return content != processed_content
93
        
94
    except Exception as e:
95
        print(f"处理文件 {input_file} 时出错: {e}")
96
        return False
97

98
def process_directory(input_dir: Path, output_dir: Path, reverse: bool):
99
    """
100
    遍历目录下的所有 .md 文件并处理
101
    """
102
    if not input_dir.exists():
103
        print(f"错误: 输入目录 {input_dir} 不存在")
104
        return
105
    
106
    md_files = list(input_dir.rglob("*.md"))
107
    
108
    if not md_files:
109
        print(f"在目录 {input_dir} 中未找到 .md 文件")
110
        return
111
    
112
    modified_count = 0
113
    total_count = len(md_files)
114
    
115
    print(f"找到 {total_count} 个 .md 文件")
116
    print(f"输入目录: {input_dir}")
117
    if output_dir:
118
        print(f"输出目录: {output_dir}")
119
        output_dir.mkdir(parents=True, exist_ok=True)
120
    else:
121
        print("输出模式: 原地修改")
122
    print("-" * 60)
123
    
124
    for md_file in md_files:
125
        if output_dir:
126
            relative_path = md_file.relative_to(input_dir)
127
            output_file = output_dir / relative_path
128
        else:
129
            output_file = md_file
130
        
131
        if process_single_file(md_file, output_file, reverse):
132
            modified_count += 1
133
            if output_dir:
134
                print(f"✓ 已处理: {relative_path}")
135
            else:
136
                print(f"✓ 已修改: {md_file.relative_to(input_dir)}")
137
        else:
138
            if not output_dir:
139
                print(f"○ 无变化: {md_file.relative_to(input_dir)}")
140
    
141
    if output_dir:
142
        print(f"\n处理完成！共处理了 {modified_count}/{total_count} 个文件")
143
        print(f"输出文件位于: {output_dir}")
144
    else:
145
        print(f"\n处理完成！共修改了 {modified_count}/{total_count} 个文件")
146

147
def main():
148
    parser = argparse.ArgumentParser(
149
        description="批量处理 Markdown 文件中的 <img> 标签路径（支持单个文件和目录）",
150
        formatter_class=argparse.RawDescriptionHelpFormatter,
151
        epilog="""
152
使用示例:
153

154
【单个文件处理】
155
  # 原地修改单个文件（反向转换）
156
  python img_path_processor.py --file document.md --reverse
157
  
158
  # 处理单个文件并输出到新文件
159
  python img_path_processor.py --file input.md --reverse --output output.md
160

161
【目录处理】
162
  # 原地修改目录（正向转换）
163
  python img_path_processor.py --forward
164
  
165
  # 目录输出到新位置
166
  python img_path_processor.py ./docs --reverse --output ./processed
167
        """
168
    )
169
    
170
    # 创建互斥组：文件模式 vs 目录模式
171
    mode_group = parser.add_mutually_exclusive_group()
172
    mode_group.add_argument(
173
        '--file',
174
        '-f',
175
        type=str,
176
        help='处理单个 Markdown 文件'
177
    )
178
    mode_group.add_argument(
179
        'input_path',
180
        nargs='?',
181
        default='.',
182
        help='输入目录路径（默认为当前目录）'
183
    )
184
    
185
    parser.add_argument(
186
        '--output',
187
        '-o',
188
        type=str,
189
        help='输出路径（文件模式：输出文件路径；目录模式：输出目录路径）'
190
    )
191
    
192
    group = parser.add_mutually_exclusive_group(required=True)
193
    group.add_argument(
194
        '--forward',
195
        action='store_true',
196
        help='正向转换：为图片路径添加 "../" 前缀'
197
    )
198
    group.add_argument(
199
        '--reverse',
200
        action='store_true',
201
        help='反向转换：移除图片路径开头的 "../" 前缀'
202
    )
203
    
204
    args = parser.parse_args()
205
    
206
    reverse = args.reverse
207
    
208
    if args.file:
209
        # 单个文件模式
210
        input_file = Path(args.file).resolve()
211
        if not input_file.exists():
212
            print(f"错误: 文件 {input_file} 不存在")
213
            return
214
        
215
        if not input_file.suffix.lower() == '.md':
216
            print(f"警告: 文件 {input_file} 不是 .md 文件，但仍会尝试处理")
217
        
218
        if args.output:
219
            output_file = Path(args.output).resolve()
220
        else:
221
            output_file = input_file  # 原地修改
222
        
223
        print(f"处理单个文件: {input_file}")
224
        print(f"转换模式: {'反向转换（移除 ../）' if reverse else '正向转换（添加 ../）'}")
225
        if args.output:
226
            print(f"输出文件: {output_file}")
227
        else:
228
            print("输出模式: 原地修改")
229
        print("注意: 代码块（```...```）内的内容将被忽略")
230
        print("-" * 60)
231
        
232
        if process_single_file(input_file, output_file, reverse):
233
            if args.output:
234
                print(f"✓ 处理完成！输出文件: {output_file}")
235
            else:
236
                print("✓ 文件已修改！")
237
        else:
238
            if not args.output:
239
                print("○ 文件无需修改")
240
            else:
241
                print("✓ 处理完成（内容无变化）")
242
                
243
    else:
244
        # 目录模式
245
        input_dir = Path(args.input_path).resolve()
246
        output_dir = Path(args.output).resolve() if args.output else None
247
        
248
        print(f"开始处理 Markdown 文件")
249
        print(f"转换模式: {'反向转换（移除 ../）' if reverse else '正向转换（添加 ../）'}")
250
        print("注意: 代码块（```...```）内的内容将被忽略")
251
        
252
        process_directory(input_dir, output_dir, reverse)
253

254
if __name__ == "__main__":
255
    main()

🚀 使用方法

基本语法


xxxxxxxxxx
1
1
python img_path_processor.py [选项]

重要：--forward 和 --reverse 必须且只能选择其一

📁 处理模式

模式一：单个文件处理

原地修改（直接修改原文件）


xxxxxxxxxx
5
1
# 反向转换：移除 ../ 前缀
2
python img_path_processor.py --file document.md --reverse
3

4
# 正向转换：添加 ../ 前缀  
5
python img_path_processor.py --file ./docs/readme.md --forward

安全输出（生成新文件）


xxxxxxxxxx
5
1
# 处理 input.md，结果保存到 output.md
2
python img_path_processor.py --file input.md --reverse --output output.md
3

4
# 使用短参数
5
python img_path_processor.py -f source.md --forward -o target.md

模式二：目录批量处理

原地修改（修改目录内所有文件）


xxxxxxxxxx
5
1
# 当前目录反向转换
2
python img_path_processor.py --reverse
3

4
# 指定目录正向转换
5
python img_path_processor.py ./docs --forward

安全输出（输出到新目录）


xxxxxxxxxx
5
1
# 反向转换，输出到 processed 目录
2
python img_path_processor.py --reverse --output ./processed
3

4
# 指定输入输出目录
5
python img_path_processor.py ./source_docs --forward --output ./build_docs

🔁 转换规则详解

正向转换（`--forward`）

适用场景：准备网站部署版本
操作：添加图片路径开头的 ../（如果尚未包含）

转换示例：


xxxxxxxxxx
5
1
<!-- 转换前 -->
2
<img src="assets/image.png" alt="示例图片" />
3

4
<!-- 转换后 -->
5
<img src="../assets/image.png" alt="示例图片" />

反向转换（`--reverse`）

适用场景：恢复本地开发版本
操作：为图片路径删除 ../ 前缀

转换示例：


xxxxxxxxxx
5
1
<!-- 转换前 -->
2
<img src="../assets/image.png" alt="示例图片" />
3

4
<!-- 转换后 -->
5
<img src="assets/image.png" alt="示例图片" />

🛡️ 智能保护机制

1. 代码块内容完全跳过


xxxxxxxxxx
7
1
这是一个正常图片：
2
<img src="../assets/normal.png" />  <!-- 会被处理 -->
3

4
代码块中的内容不受影响：
5
```html
6
<img src="../assets/code-image.png" />  <!-- 完全不会被处理 -->
7
```


xxxxxxxxxx
1
1
print('<img src="../assets/python-code.png" />')  <!-- 也不会被处理 -->

2. Markdown 语法图片自动忽略


xxxxxxxxxx
3
1
![这是Markdown图片](../assets/markdown-image.png)  <!-- 不会被处理 -->
2

3
<img src="../assets/html-image.png" />  <!-- 只有这个会被处理 -->

3. 引号格式兼容

同时支持单引号和双引号：


xxxxxxxxxx
2
1
<img src="../assets/image1.png" />
2
<img src='../assets/image2.png' />

📊 参数说明

参数	简写	类型	必需	说明
`--file`	`-f`	字符串	否	单个文件模式：指定要处理的 Markdown 文件
`input_path`	-	路径	否	目录模式：输入目录路径（默认当前目录）
`--output`	`-o`	路径	否	输出路径（文件模式：输出文件；目录模式：输出目录）
`--forward`	-	标志	是	正向转换：添加 `../` 前缀
`--reverse`	-	标志	是	反向转换：移除 `../` 前缀

注意：--file 和 input_path 互斥，只能使用其中一种模式

🎯 典型使用场景

场景 1：MkDocs 工作流


xxxxxxxxxx
6
1
# 1. 本地开发：使用 assets/ 路径
2
# 2. 部署前：生成包含 ../ 的版本
3
python img_path_processor.py ./docs --forward --output ./site_docs
4

5
# 3. 部署后：如需修改，恢复本地版本
6
python img_path_processor.py ./site_docs --reverse --output ./docs

场景 2：快速修复单个文档


xxxxxxxxxx
3
1
# 修复某个文档的图片路径问题
2
python img_path_processor.py --file problem_doc.md --forward
3
python img_path_processor.py --file problem_doc.md --reverse

场景 3：CI/CD 自动化


xxxxxxxxxx
2
1
# 在构建脚本中使用
2
python img_path_processor.py --forward --output ./_build

💡 输出信息解读

脚本执行时会显示清晰的状态信息：

✓ 已修改/已处理：文件内容被成功转换
○ 无变化：原地修改模式下，文件无需任何改动
统计摘要：显示处理总数和实际修改数量
错误提示：文件不存在或读写错误时的详细信息

⚠️ 注意事项

文件编码：默认使用 UTF-8 编码，确保文档编码一致
路径验证：脚本会自动创建不存在的输出目录
扩展名检查：处理非 .md 文件时会显示警告（但仍会尝试处理）
备份建议：原地修改前建议备份重要文件
相对路径：转换操作基于当前工作目录的相对路径逻辑

📝 版本信息

适用环境：Python 3.6+
依赖库：仅使用标准库（os, re, argparse, pathlib）
设计理念：安全、可靠、无副作用的 Markdown 路径处理工具

本工具专为解决 MkDocs 用户的实际痛点而设计，让本地开发与线上部署无缝切换！