根据目前学习,中文断句 standford nlp可以实现中文分词和断句,下面有不同api的例子,大家可以试试
package com.example.utils;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader;
import java.nio.charset.Charset;
import java.util.List;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import edu.stanford.nlp.util.CoreMap;
import opennlp.tools.sentdetect.SentenceDetectorEvaluator;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.sentdetect.SentenceSample;
import opennlp.tools.sentdetect.SentenceSampleStream;
import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
/**
* Filename: NlpSBD.java
* Description: 语句边界消岐 sentence boundary disambiguation
* Copyright: Copyright (c) 2019 All Rights Reserved.
* @author: wangk
* @version: 1.0
* Create at: 2019年5月7日 上午9:26:36
*
* Modification History:
* Date Author Version Description
* ------------------------------------------------------------------
* 2019年5月7日 wangk 1.0 1.0 Version
*
*/
public class NlpSBD {
//除了NLP API 外,还有java类有两个方法可以用,试用简单的文本断句,1使用正则2,使用breakIterator类,可以搜索一下
static String paragraph = "A simple approach to create a class to hold and remove stopwords. Let's IBM. this is a cat.";
static String chineseLanguage = "第一个括号子表达式捕获 Web 地址的协议部分。 该子表达式匹配在冒号和两个正斜杠前面的任何单词。";
public static void main(String[] args) {
NlpSBD ns = new NlpSBD();
//ns.sentDetect(paragraph);
//ns.trainText();
//System. out .println( " 内存信息 :" + toMemoryInfo ());
ns.StanfordCoreNLP(chineseLanguage);
}
/**
* 获取当前 jvm 的内存信息
*
* @return
*/
public static String toMemoryInfo() {
Runtime currRuntime = Runtime.getRuntime ();
int nFreeMemory = ( int ) (currRuntime.freeMemory() / 1024 / 1024);
int nTotalMemory = ( int ) (currRuntime.totalMemory() / 1024 / 1024);
return nFreeMemory + "M/" + nTotalMemory + "M(free/total)" ;
}
/**
* @Description: