在处理 Markdown 文章时,有时候我们需要提取文章中的标题,以便生成目录或进行其他处理。本文将介绍如何通过编程的方式提取 Markdown 文章的标题,包括使用正则表达式和简单的字符串处理。
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MarkdownParser {
public static void main(String[] args) {
try {
Path filePath = Paths.get("/文件路径/xxx.md");
String markdownContent = new String(Files.readAllBytes(filePath));
List<TOCEntry> toc = extractMarkdownTOC(markdownContent);
for (TOCEntry entry : toc) {
System.out.println(entry);
}
} catch (IOException e) {
e.printStackTrace();
}
}
private static List<TOCEntry> extractMarkdownTOC(String markdownContent) {
List<TOCEntry> toc = new ArrayList<>();
String[] lines = markdownContent.split("\n");
Pattern pattern = Pattern.compile("^(#+)\\s+(.*)$");
for (String line : lines) {
Matcher matcher = pattern.matcher(line);
if (matcher.matches()) {
int level = matcher.group(1).length();
String title = matcher.group(2).trim();
toc.add(new TOCEntry(level, title));
}
// if (matcher.matches()) {
// int level = matcher.group(1).length();
// if (level <= 2) { // 仅提取一级和二级标题
// String title = matcher.group(2).trim();
// toc.add(new TOCEntry(level, title));
// }
// }
}
return toc;
}
static class TOCEntry {
private int level;
private String title;
public TOCEntry(int level, String title) {
this.level = level;
this.title = title;
}
@Override
public String toString() {
StringBuilder indentation = new StringBuilder();
for (int i = 0; i < level - 1; i++) {
indentation.append(" ");
}
return indentation + "- " + title;
}
}
}