Search in sources :

Example 1 with ArrayListOfInts

use of edu.umd.cloud9.util.array.ArrayListOfInts in project mavuno by metzlerd.

the class DIRTExtractor method extractPairs.

private List<ArrayListOfInts> extractPairs(ArrayListOfInts[] children, int root, List<TratzParsedTokenWritable> tokens, Text[] chunks) {
    List<ArrayListOfInts> partialPaths = new ArrayList<ArrayListOfInts>();
    int[] childrenIds = children[root].getArray();
    // process non-leaf node
    for (int i = 0; i < children[root].size(); i++) {
        List<ArrayListOfInts> paths = extractPairs(children, childrenIds[i], tokens, chunks);
        for (ArrayListOfInts path : paths) {
            path.add(root);
        }
        // connect these paths with others that go through the current node
        for (ArrayListOfInts previousPath : partialPaths) {
            if (previousPath.size() <= 1) {
                // path must consist of more than just a root
                continue;
            }
            for (ArrayListOfInts path : paths) {
                if (path.size() <= 1) {
                    // path must consist of more than just a root
                    continue;
                }
                mDependPairs.addAll(getContext(previousPath, path, tokens, chunks));
                mDependPairs.addAll(getContext(path, previousPath, tokens, chunks));
            }
        }
        // add the current set of paths to the partial paths
        partialPaths.addAll(paths);
    }
    // start new path from current node
    ArrayListOfInts leaf = new ArrayListOfInts();
    leaf.add(root);
    partialPaths.add(leaf);
    return partialPaths;
}
Also used : ArrayListOfInts(edu.umd.cloud9.util.array.ArrayListOfInts) ArrayList(java.util.ArrayList)

Example 2 with ArrayListOfInts

use of edu.umd.cloud9.util.array.ArrayListOfInts in project mavuno by metzlerd.

the class DIRTExtractor method loadDependPairs.

private void loadDependPairs() {
    // clear dependency pairs
    mDependPairs.clear();
    // get sentence
    SentenceWritable<TratzParsedTokenWritable> sentence = mSentIter.next();
    // get sentence tokens
    List<TratzParsedTokenWritable> tokens = sentence.getTokens();
    // get chunk ids
    int[] chunkIds = NLProcTools.getChunkIds(tokens);
    // get mapping from positions to chunks
    Text[] chunks = new Text[tokens.size()];
    Text curChunk = null;
    for (int i = 0; i < tokens.size(); i++) {
        Text text = tokens.get(i).getToken();
        if (i == 0 || (i > 0 && chunkIds[i] != chunkIds[i - 1])) {
            curChunk = new Text(text);
        } else {
            curChunk.append(MavunoUtils.SPACE_BYTES, 0, MavunoUtils.SPACE_BYTES_LENGTH);
            curChunk.append(text.getBytes(), 0, text.getLength());
        }
        chunks[i] = curChunk;
    }
    // populate parse tree
    ArrayListOfInts[] children = new ArrayListOfInts[tokens.size() + 1];
    for (int i = 0; i < tokens.size() + 1; i++) {
        children[i] = new ArrayListOfInts();
    }
    for (int i = 0; i < tokens.size(); i++) {
        TratzParsedTokenWritable t = tokens.get(i);
        // ignore punctuation
        if (!t.getDependType().equals(PUNCTUATION_TYPE)) {
            children[t.getDependIndex()].add(i + 1);
        }
    }
    // extract (context, pattern) pairs from parse tree
    for (int i = 0; i < children[0].size(); i++) {
        extractPairs(children, children[0].get(i), tokens, chunks);
    }
    // get iterator
    mDependPairsIter = mDependPairs.iterator();
}
Also used : TratzParsedTokenWritable(edu.isi.mavuno.util.TratzParsedTokenWritable) ArrayListOfInts(edu.umd.cloud9.util.array.ArrayListOfInts) Text(org.apache.hadoop.io.Text)

Aggregations

ArrayListOfInts (edu.umd.cloud9.util.array.ArrayListOfInts)2 TratzParsedTokenWritable (edu.isi.mavuno.util.TratzParsedTokenWritable)1 ArrayList (java.util.ArrayList)1 Text (org.apache.hadoop.io.Text)1