public static List<ArticleOutline> getLinkNodesNew(String html) {
if (StringUtils.isBlank(html)) {
return new ArrayList<>();
}
Document document = Jsoup.parseBodyFragment(html);
Elements hTags = document.select("h1,h2,h3,h4,h5,h6");
return getLinkNodes(hTags);
}
public static List<ArticleOutline> getLinkNodes(Elements allTag) {
List<ArticleOutline> result = new ArrayList<>();
for (int i = 0; i < allTag.size(); i++) {
Element elementI = allTag.get(i);
Integer hTagNumI = getHTagNum(elementI.tagName());
Elements childrenElements = new Elements();
for (int j = i + 1; j < allTag.size(); j++) {
Element elementJ = allTag.get(j);
if (getHTagNum(allTag.get(j).tagName()) <= hTagNumI) {
break;
}
i = j;
childrenElements.add(elementJ);
}
ArticleOutline linkDto = new ArticleOutline(elementI.text(), elementI.id(), elementI.tagName(), new ArrayList<>());
if (childrenElements.isEmpty()) {
result.add(linkDto);
continue;
} else {
linkDto.setChildren(getLinkNodes(childrenElements));
}
result.add(linkDto);
}
return result;
}
public static Integer getHTagNum(String tagName) {
return Integer.valueOf(tagName.replace("h", ""));
}
@Data
@AllArgsConstructor
@NoArgsConstructor
public class ArticleOutline implements Serializable {
private String label;
private String id;
private String type;
private List<ArticleOutline> children;
}