use of edu.umd.cloud9.util.array.ArrayListOfInts in project mavuno by metzlerd.
the class DIRTExtractor method extractPairs.
private List<ArrayListOfInts> extractPairs(ArrayListOfInts[] children, int root, List<TratzParsedTokenWritable> tokens, Text[] chunks) {
List<ArrayListOfInts> partialPaths = new ArrayList<ArrayListOfInts>();
int[] childrenIds = children[root].getArray();
// process non-leaf node
for (int i = 0; i < children[root].size(); i++) {
List<ArrayListOfInts> paths = extractPairs(children, childrenIds[i], tokens, chunks);
for (ArrayListOfInts path : paths) {
path.add(root);
}
// connect these paths with others that go through the current node
for (ArrayListOfInts previousPath : partialPaths) {
if (previousPath.size() <= 1) {
// path must consist of more than just a root
continue;
}
for (ArrayListOfInts path : paths) {
if (path.size() <= 1) {
// path must consist of more than just a root
continue;
}
mDependPairs.addAll(getContext(previousPath, path, tokens, chunks));
mDependPairs.addAll(getContext(path, previousPath, tokens, chunks));
}
}
// add the current set of paths to the partial paths
partialPaths.addAll(paths);
}
// start new path from current node
ArrayListOfInts leaf = new ArrayListOfInts();
leaf.add(root);
partialPaths.add(leaf);
return partialPaths;
}
use of edu.umd.cloud9.util.array.ArrayListOfInts in project mavuno by metzlerd.
the class DIRTExtractor method loadDependPairs.
private void loadDependPairs() {
// clear dependency pairs
mDependPairs.clear();
// get sentence
SentenceWritable<TratzParsedTokenWritable> sentence = mSentIter.next();
// get sentence tokens
List<TratzParsedTokenWritable> tokens = sentence.getTokens();
// get chunk ids
int[] chunkIds = NLProcTools.getChunkIds(tokens);
// get mapping from positions to chunks
Text[] chunks = new Text[tokens.size()];
Text curChunk = null;
for (int i = 0; i < tokens.size(); i++) {
Text text = tokens.get(i).getToken();
if (i == 0 || (i > 0 && chunkIds[i] != chunkIds[i - 1])) {
curChunk = new Text(text);
} else {
curChunk.append(MavunoUtils.SPACE_BYTES, 0, MavunoUtils.SPACE_BYTES_LENGTH);
curChunk.append(text.getBytes(), 0, text.getLength());
}
chunks[i] = curChunk;
}
// populate parse tree
ArrayListOfInts[] children = new ArrayListOfInts[tokens.size() + 1];
for (int i = 0; i < tokens.size() + 1; i++) {
children[i] = new ArrayListOfInts();
}
for (int i = 0; i < tokens.size(); i++) {
TratzParsedTokenWritable t = tokens.get(i);
// ignore punctuation
if (!t.getDependType().equals(PUNCTUATION_TYPE)) {
children[t.getDependIndex()].add(i + 1);
}
}
// extract (context, pattern) pairs from parse tree
for (int i = 0; i < children[0].size(); i++) {
extractPairs(children, children[0].get(i), tokens, chunks);
}
// get iterator
mDependPairsIter = mDependPairs.iterator();
}
Aggregations