TextExtract(3)NLP Token Name Finder
All the models in http://opennlp.sourceforge.net/models-1.5/
For Name Finders, we have things as follow:
Date name finder model en-ner-date.bin
Location name finder model en-ner-location.bin
Money name finder model en-ner-money.bin
Organization name finder model en-ner-organization.bin
Percentage name finder model en-ner-percentage.bin
Person name finder model en-ner-person.bin
Time name finder model en-ner-time.bin
package com.sillycat.resumeparse;
import java.io.IOException;
import java.io.InputStream;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.Span;
public class OpenNLPPersonNameMain {
public static void main(String[] args) {
String[] data = new String[] { "John", "Smith", "works", "for", "the",
"United", "Nations", "." };
InputStream modelIn = OpenNLPParserMain.class.getClassLoader()
.getResourceAsStream("models/en-ner-person.bin");
TokenNameFinderModel model = null;
try {
model = new TokenNameFinderModel(modelIn);
} catch (IOException e) {
e.printStackTrace();
} finally {
if (modelIn != null) {
try {
modelIn.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
NameFinderME tokenNameFinder = new NameFinderME(model);
Span[] spans = tokenNameFinder.find(data);
double[] probs = tokenNameFinder.probs();
for (int i = 0; i < spans.length; i++) {
int start = spans[i].getStart();
int end = spans[i].getEnd();
StringBuilder buffer = new StringBuilder();
for (int j = start; j < end; j++) {
buffer.append(data[j]);
if (j != (end - 1)) {
buffer.append(' ');
}
}
String value = buffer.toString();
System.out.println(value + " " + probs[i] + " ");
}
}
}
The results will be John Smith 0.789394314903262
References:
http://sillycat.iteye.com/admin/blogs/2248952
All the models in http://opennlp.sourceforge.net/models-1.5/
For Name Finders, we have things as follow:
Date name finder model en-ner-date.bin
Location name finder model en-ner-location.bin
Money name finder model en-ner-money.bin
Organization name finder model en-ner-organization.bin
Percentage name finder model en-ner-percentage.bin
Person name finder model en-ner-person.bin
Time name finder model en-ner-time.bin
package com.sillycat.resumeparse;
import java.io.IOException;
import java.io.InputStream;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.Span;
public class OpenNLPPersonNameMain {
public static void main(String[] args) {
String[] data = new String[] { "John", "Smith", "works", "for", "the",
"United", "Nations", "." };
InputStream modelIn = OpenNLPParserMain.class.getClassLoader()
.getResourceAsStream("models/en-ner-person.bin");
TokenNameFinderModel model = null;
try {
model = new TokenNameFinderModel(modelIn);
} catch (IOException e) {
e.printStackTrace();
} finally {
if (modelIn != null) {
try {
modelIn.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
NameFinderME tokenNameFinder = new NameFinderME(model);
Span[] spans = tokenNameFinder.find(data);
double[] probs = tokenNameFinder.probs();
for (int i = 0; i < spans.length; i++) {
int start = spans[i].getStart();
int end = spans[i].getEnd();
StringBuilder buffer = new StringBuilder();
for (int j = start; j < end; j++) {
buffer.append(data[j]);
if (j != (end - 1)) {
buffer.append(' ');
}
}
String value = buffer.toString();
System.out.println(value + " " + probs[i] + " ");
}
}
}
The results will be John Smith 0.789394314903262
References:
http://sillycat.iteye.com/admin/blogs/2248952
本文介绍如何利用OpenNLP库实现姓名识别,通过加载特定的命名实体识别模型,对给定文本进行姓名提取,并输出识别结果及其置信度。
707

被折叠的 条评论
为什么被折叠?



