使用fork/join 词云统计
创建线程池
ForkJoinPool forkJoinPool = new ForkJoinPool();
Map<String, Integer> map = forkJoinPool.invoke(new ForkRecursiveTask(crawlerData));
fork 主线程
- extends RecursiveTask<Map<String, Integer>> 继承 RecursiveTask 重写 compute
- 设置子线程(join )批量执行的数量 提交到 主线程
- 执行子线程
package com.ikfti.task;
import com.ikfti.model.OriginalPostVo;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ForkJoinTask;
import java.util.concurrent.RecursiveTask;
public class ForkRecursiveTask extends RecursiveTask<Map<String, Integer>> {
private static final long serialVersionUID = 1L;
private List<OriginalPostVo> originalPostVoList = null;
public ForkRecursiveTask(List<OriginalPostVo> originalPostVos) {
originalPostVoList=originalPostVos;
}
@Override
protected Map<String, Integer> compute() {
List<ForkJoinTask<Map<String, Integer>>> tasks = new ArrayList<>();
List<String> stringList= new ArrayList<>();
for (OriginalPostVo originalPostVo : originalPostVoList) {
if(StringUtils.isNotBlank(originalPostVo.getTitle() )||StringUtils.isNotBlank(originalPostVo.getContent() )){
stringList.add(originalPostVo.getTitle()+originalPostVo.getContent());
}
}
List<List<String>> lists = groupList(stringList);
for (List<String> list : lists) {
WordCloudTask cloudTask= new WordCloudTask(list);
tasks.add(cloudTask.fork());
}
Map<String, Integer> result = new HashMap<>();
for(ForkJoinTask<Map<String, Integer>> task : tasks){
Map<String, Integer> map = task.join();
for(String key : map.keySet()){
if(result.containsKey(key)){
result.put(key, result.get(key) + map.get(key));
} else {
result.put(key, map.get(key));
}
}
}
return result;
}
public static List<List<String>> groupList(List<String> list) {
List<List<String>> listGroup = new ArrayList<List<String>>();
int listSize = list.size();
int toIndex = 200;
for (int i = 0; i < list.size(); i += 200) {
if (i + 200 > listSize) {
toIndex = listSize - i;
}
List<String> newList = list.subList(i, i + toIndex);
listGroup.add(newList);
}
return listGroup;
}
public static void main(String[] args) {
List<String> list = new ArrayList<>();
for (int i = 0; i < 101; i++) {
list.add(i+"");
}
List<List<String>> lists = groupList(list);
System.out.println("list:" + list.toString());
System.out.println(lists);
}
}
join子线程
- 继承extends RecursiveTask 重写 compute
- 业务逻辑处理
package com.ikfti.task;
import com.ikfti.model.OriginalPostVo;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.RecursiveTask;
public class WordCloudTask extends RecursiveTask<Map<String, Integer>> {
private static final long serialVersionUID = 1L;
private final List<String> contents;
public WordCloudTask(List<String> file) {
contents =file;
}
@Override
protected Map<String, Integer> compute() {
Map<String, Integer> map = new HashMap<>();
for(String content : contents){
IKSegmenter ikSegmenter = new IKSegmenter(new StringReader(content),true);
try {
Lexeme lexeme;
while ((lexeme = ikSegmenter.next()) != null) {
final String text = lexeme.getLexemeText();
if (text.length() > 1) {
if (map.containsKey(text)) {
map.put(text, map.get(text) + 1);
} else {
map.put(text, 1);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
return map;
}
}
效果图

- 参考 https://blog.youkuaiyun.com/mn960mn/article/details/52595844?utm_source=blogxgwz7