use of edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector in project cogcomp-nlp by CogComp.
the class ChunkerTrain method trainModelsWithParser.
/**
* Trains the chunker models with the specified training data
*
* @param parser Parser for the training data. Initialized in trainModels(String trainingData)
*/
public void trainModelsWithParser(Parser parser) {
Chunker.isTraining = true;
// Run the learner
for (int i = 1; i <= iter; i++) {
LinkedVector ex;
while ((ex = (LinkedVector) parser.next()) != null) {
for (int j = 0; j < ex.size(); j++) {
chunker.learn(ex.get(j));
}
}
parser.reset();
chunker.doneWithRound();
logger.info("Iteration number : " + i);
}
chunker.doneLearning();
}
use of edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector in project cogcomp-nlp by CogComp.
the class ChunkerTrain method trainModelsWithParser.
public void trainModelsWithParser(Parser parser, String modeldir, String modelname, double dev_ratio) {
Chunker.isTraining = true;
double tmpF1 = 0;
double bestF1 = 0;
int bestIter = 0;
double[] F1array = new double[iter];
String lcpath = modeldir + File.separator + modelname + ".lc";
String lexpath = modeldir + File.separator + modelname + ".lex";
// Get the total number of training set
int cnt = 0;
LinkedVector ex;
while ((ex = (LinkedVector) parser.next()) != null) {
cnt++;
}
parser.reset();
// Get the boundary between train and dev
long idx = Math.round(cnt * (1 - dev_ratio));
if (idx < 0)
idx = 0;
if (idx > cnt)
idx = cnt;
// Run the learner and save F1 for each iteration
for (int i = 1; i <= iter; i++) {
cnt = 0;
while ((ex = (LinkedVector) parser.next()) != null) {
for (int j = 0; j < ex.size(); j++) {
chunker.learn(ex.get(j));
}
if (cnt >= idx)
break;
else
cnt++;
}
chunker.doneWithRound();
writeModelsToDisk(modeldir, modelname);
// Test on dev set
BIOTester tester = new BIOTester(new Chunker(lcpath, lexpath), new ChunkLabel(), new ChildrenFromVectors(parser));
double[] result = tester.test().getOverallStats();
tmpF1 = result[2];
F1array[i - 1] = tmpF1;
System.out.println("Iteration number : " + i + ". F1 score on devset: " + tmpF1);
parser.reset();
}
// Get the best F1 score and corresponding iter
for (int i = 0; i < iter; i++) {
if (F1array[i] > bestF1) {
bestF1 = F1array[i];
bestIter = i + 1;
}
}
System.out.println("Best #Iter = " + bestIter + " (F1=" + bestF1 + ")");
System.out.println("Rerun the learner using best #Iter...");
// Rerun the learner
for (int i = 1; i <= bestIter; i++) {
while ((ex = (LinkedVector) parser.next()) != null) {
for (int j = 0; j < ex.size(); j++) {
chunker.learn(ex.get(j));
}
}
parser.reset();
chunker.doneWithRound();
System.out.println("Iteration number : " + i);
}
chunker.doneLearning();
}
use of edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector in project cogcomp-nlp by CogComp.
the class Reuters2003Parser method next.
/**
* Produces the next object parsed from the input file; in this case, that object is guaranteed
* to be a <code>LinkedVector</code> populated by <code>Token</code>s representing a sentence.
**/
public Object next() {
String[] line = (String[]) super.next();
while (line != null && (line.length < 2 || line[4].equals("-X-"))) line = (String[]) super.next();
if (line == null)
return null;
if (line[3].charAt(0) == 'I')
line[3] = "B" + line[3].substring(1);
Token t = new Token(new Word(line[5], line[4]), null, line[3]);
String previous = line[3];
for (line = (String[]) super.next(); line != null && line.length > 0; line = (String[]) super.next()) {
if (line[3].charAt(0) == 'I' && !previous.endsWith(line[3].substring(2)))
line[3] = "B" + line[3].substring(1);
t.next = new Token(new Word(line[5], line[4]), t, line[3]);
t = (Token) t.next;
previous = line[3];
}
return new LinkedVector(t);
}
use of edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector in project cogcomp-nlp by CogComp.
the class POSBracketToToken method next.
/**
* Returns the next labeled word in the data.
*/
public Object next() {
if (currentWord == null) {
LinkedVector vector = (LinkedVector) super.next();
while (vector != null && vector.size() == 0) vector = (LinkedVector) super.next();
if (vector == null)
return null;
Word w = (Word) vector.get(0);
Token t = currentWord = new Token(w, null, w.partOfSpeech);
t.partOfSpeech = null;
while (w.next != null) {
w = (Word) w.next;
t.next = new Token(w, t, w.partOfSpeech);
t.partOfSpeech = null;
t = (Token) t.next;
}
}
Token result = currentWord;
currentWord = (Token) currentWord.next;
return result;
}
use of edu.illinois.cs.cogcomp.lbjava.parse.LinkedVector in project cogcomp-nlp by CogComp.
the class PlainToTokenParser method next.
/**
* This method returns {@link Token}s until the input is exhausted, at
* which point it returns <code>null</code>.
**/
public Object next() {
while (next == null) {
LinkedVector words = (LinkedVector) parser.next();
if (words == null)
return null;
Word w = (Word) words.get(0);
Token t = new Token(w, null, null);
for (w = (Word) w.next; w != null; w = (Word) w.next) {
t.next = new Token(w, t, null);
t = (Token) t.next;
}
LinkedVector tokens = new LinkedVector(t);
next = (Token) tokens.get(0);
}
Token result = next;
next = (Token) next.next;
return result;
}
Aggregations