https://github.com/larsmans/seqlearn
"""
Generic sequence prediction script using CoNLL format.
"""
from __future__ import print_function
import fileinput
from glob import glob
import sys
from seqlearn.datasets import load_conll
from seqlearn.evaluation import bio_f_score
from seqlearn.perceptron import StructuredPerceptron
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib
if __name__ == "__main__":
X_train, y_train, lengths_train = load_conll(sys.argv[1], features, 100000, True)
#describe(X_train, lengths_train)
X_test, y_test, lengths_test = load_conll(sys.argv[2], features, 100000, True)
#describe(X_test, lengths_test)
clf = StructuredPerceptron(verbose=True, max_iter=5)
print("Training %s" % clf)
clf.fit(X_train, y_train, lengths_train)
joblib.dump(clf, 'trunk.pkl')
import time
xclf = joblib.load('trunk.pkl')
start = time.time()
y_pred = xclf.predict(X_test, lengths_test)
end = time.time()
print((end-start)*1000)
print("Accuracy: %.3f" % (100 * accuracy_score(y_test, y_pred)))
CoNLL format
load_conll
ref : http://larsmans.github.io/seqlearn/reference.html