Search in sources :

Example 1 with Record

use of edu.illinois.cs.cogcomp.thrift.curator.Record in project cogcomp-nlp by CogComp.

the class CuratorClient method addRecordViewFromCurator.

/**
 * Does the network call to the Curator and fetches a record that has a particular view.
 *
 * @param text The raw text (this will be used if {@link #respectTokenization} is false.
 * @param sentences The list of tokenized sentences (will be {@code null} if
 *        {@link #respectTokenization} is true.
 * @param viewName The view to get (according to the Curator lingo.)
 * @return A {@link edu.illinois.cs.cogcomp.thrift.curator.Record} with the requested view
 */
private Record addRecordViewFromCurator(String text, List<String> sentences, String viewName) throws ServiceUnavailableException, AnnotationFailedException, TException, SocketException {
    viewName = convertCuratorViewName(viewName);
    TTransport transport = new TSocket(this.curatorHost, this.curatorPort);
    logger.debug("Calling curator on host '" + curatorHost + "', port '" + curatorPort + "' for view '" + viewName + "'...");
    try {
        ((TSocket) transport).getSocket().setReuseAddress(true);
    } catch (SocketException e) {
        logger.error("Unable to setReuseAddress!", e);
        throw e;
    }
    transport = new TFramedTransport(transport);
    TProtocol protocol = new TBinaryProtocol(transport);
    transport.open();
    Curator.Client client = new Curator.Client(protocol);
    Record newRecord;
    if (respectTokenization) {
        newRecord = client.wsprovide(viewName, sentences, forceUpdate);
    } else {
        newRecord = client.provide(viewName, text, forceUpdate);
    }
    transport.close();
    return newRecord;
}
Also used : SocketException(java.net.SocketException) TBinaryProtocol(org.apache.thrift.protocol.TBinaryProtocol) TProtocol(org.apache.thrift.protocol.TProtocol) TFramedTransport(org.apache.thrift.transport.TFramedTransport) Curator(edu.illinois.cs.cogcomp.thrift.curator.Curator) Record(edu.illinois.cs.cogcomp.thrift.curator.Record) TTransport(org.apache.thrift.transport.TTransport) TSocket(org.apache.thrift.transport.TSocket)

Example 2 with Record

use of edu.illinois.cs.cogcomp.thrift.curator.Record in project cogcomp-nlp by CogComp.

the class CuratorClient method getTextAnnotation.

/**
 * Creates a new
 * {@link edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation} for the
 * specified {@code text} belonging to the {@code corpusId} with id {@code textId}. This method
 * calls the Curator to get the tokenization and the sentences unless the CuratorClient's
 * {@link #respectTokenization} field is set to {@code true}, in which case it generates
 * sentence and label views based on newlines and whitespace characters. (<b>NB:</b> tabs will
 * be treated as tokens!) Consecutive whitespace characters will not generate empty tokens, but
 * the token offsets and sentence offsets will count all whitespace characters.
 * <p>
 * <b> Note: </b> The {@code Record} returned by this method will not have any views except the
 * {@code Sentence} and {@code Token} view. To get other views from the Curator, call the
 * appropriate {@link CuratorClient} functions (e.g TODO).
 *
 * @param corpusId Identifier for the corpus
 * @param textId Identifier for the text
 * @param text The raw text
 * @return A {@code TextAnnotation} with
 *         {@link edu.illinois.cs.cogcomp.core.datastructures.ViewNames#TOKENS} and
 *         {@link edu.illinois.cs.cogcomp.core.datastructures.ViewNames#SENTENCE} views.
 */
public TextAnnotation getTextAnnotation(String corpusId, String textId, String text) throws ServiceUnavailableException, AnnotationFailedException, TException, SocketException {
    Record record = getRecord(text);
    final Labeling tokensLabeling = record.getLabelViews().get(ViewNames.TOKENS);
    final Labeling sentenceLabeling = record.getLabelViews().get(ViewNames.SENTENCE);
    return CuratorDataStructureInterface.getTextAnnotationFromRecord(corpusId, textId, record, tokensLabeling, sentenceLabeling);
}
Also used : Record(edu.illinois.cs.cogcomp.thrift.curator.Record)

Example 3 with Record

use of edu.illinois.cs.cogcomp.thrift.curator.Record in project cogcomp-nlp by CogComp.

the class CuratorClient method getRecord.

/**
 * Creates a new {@link edu.illinois.cs.cogcomp.thrift.curator.Record} for the specified
 * {@code text}. This method calls the Curator to get the tokenization and the sentences unless
 * the CuratorClient's {@link #respectTokenization} field is set to {@code true}, in which case
 * it generates sentence and label views based on newlines and whitespace characters.
 * (<b>NB:</b> tabs will be treated as tokens!) Consecutive whitespace characters will not
 * generate empty tokens, but the token offsets and sentence offsets will count all whitespace
 * characters.
 * <p>
 * <b> Note: </b> The {@code Record} returned by this method will not have any views except the
 * {@code Sentence} and {@code Token} view. To get other views from the Curator, call the
 * appropriate {@link CuratorClient} functions (e.g TODO).
 *
 * @param text The text (tokenized or not)
 * @return A {@link edu.illinois.cs.cogcomp.thrift.curator.Record} with
 *         {@link edu.illinois.cs.cogcomp.core.datastructures.ViewNames#TOKENS} and
 *         {@link edu.illinois.cs.cogcomp.core.datastructures.ViewNames#SENTENCE} views.
 */
private Record getRecord(String text) throws ServiceUnavailableException, AnnotationFailedException, TException, SocketException {
    // Instantiate a basic record for a given text with a curator-compatible identifier
    // and initialized empty view collections
    Record record = new Record();
    record.setRawText(text);
    record.setLabelViews(new TreeMap<String, Labeling>());
    record.setParseViews(new TreeMap<String, Forest>());
    record.setClusterViews(new TreeMap<String, Clustering>());
    record.setViews(new TreeMap<String, View>());
    record.setIdentifier(Identifier.getId(text, respectTokenization));
    if (respectTokenization) {
        List<String> inputs = new LinkedList<>();
        String[] sentences = text.split(System.getProperty("line.separator"));
        for (String sentence : sentences) if (sentence.length() > 0)
            inputs.add(sentence);
        Labeling sents = RecordUtils.sentences(inputs);
        record.getLabelViews().put(ViewNames.SENTENCE, sents);
        Labeling tokens = RecordUtils.tokenize(inputs);
        record.getLabelViews().put(ViewNames.TOKENS, tokens);
    } else {
        addRecordView(record, ViewNames.TOKENS);
        addRecordView(record, ViewNames.SENTENCE);
    }
    return record;
}
Also used : LinkedList(java.util.LinkedList) Record(edu.illinois.cs.cogcomp.thrift.curator.Record)

Example 4 with Record

use of edu.illinois.cs.cogcomp.thrift.curator.Record in project cogcomp-nlp by CogComp.

the class CuratorClient method getTextAnnotationView.

public edu.illinois.cs.cogcomp.core.datastructures.textannotation.View getTextAnnotationView(TextAnnotation ta, String viewName) throws TException, AnnotationFailedException, ServiceUnavailableException, SocketException {
    edu.illinois.cs.cogcomp.core.datastructures.textannotation.View view;
    Record record = addRecordViewFromCurator(ta.getText(), TextAnnotationUtilities.getSentenceList(ta), viewName);
    ViewTypes viewType = ViewNames.getViewType(viewName);
    if (viewType == ViewTypes.TOKEN_LABEL_VIEW) {
        Labeling labeling = record.getLabelViews().get(convertCuratorViewName(viewName));
        view = CuratorDataStructureInterface.alignLabelingToTokenLabelView(viewName, ta, labeling);
    } else if (viewType == ViewTypes.SPAN_LABEL_VIEW) {
        boolean allowOverlappingSpans = false;
        if (viewName.equals(ViewNames.WIKIFIER))
            allowOverlappingSpans = true;
        Labeling labeling = record.getLabelViews().get(convertCuratorViewName(viewName));
        view = CuratorDataStructureInterface.alignLabelingToSpanLabelView(viewName, ta, labeling, allowOverlappingSpans);
    } else if (viewType == ViewTypes.DEPENDENCY_VIEW) {
        Forest depForest = record.getParseViews().get(convertCuratorViewName(viewName));
        if (depForest.trees.size() > TextAnnotationUtilities.getSentenceList(ta).size())
            throw new AnnotationFailedException("mismatched number of trees and sentences.");
        view = CuratorDataStructureInterface.alignForestToDependencyView(viewName, ta, depForest);
    } else if (viewType == ViewTypes.PARSE_VIEW) {
        Forest parseForest = record.getParseViews().get(convertCuratorViewName(viewName));
        if (parseForest.trees.size() > TextAnnotationUtilities.getSentenceList(ta).size())
            throw new AnnotationFailedException("mismatched number of trees and sentences.");
        view = CuratorDataStructureInterface.alignForestToParseTreeView(viewName, ta, parseForest);
    } else if (viewType == ViewTypes.PREDICATE_ARGUMENT_VIEW) {
        Forest forest = record.getParseViews().get(convertCuratorViewName(viewName));
        view = CuratorDataStructureInterface.alignForestToPredicateArgumentView(viewName, ta, forest);
    } else if (viewType == ViewTypes.COREF_VIEW) {
        Clustering corefClustering = record.getClusterViews().get(convertCuratorViewName(viewName));
        view = CuratorDataStructureInterface.alignClusteringToCoreferenceView(viewName, ta, corefClustering);
    } else
        throw new AnnotationFailedException("Unrecognised view type " + viewType);
    return view;
}
Also used : ViewTypes(edu.illinois.cs.cogcomp.core.datastructures.ViewTypes) Record(edu.illinois.cs.cogcomp.thrift.curator.Record)

Example 5 with Record

use of edu.illinois.cs.cogcomp.thrift.curator.Record in project cogcomp-nlp by CogComp.

the class CuratorClient method addRecordView.

/**
 * Adds a view to a {@link edu.illinois.cs.cogcomp.thrift.curator.Record}.
 *
 * @param record The {@link edu.illinois.cs.cogcomp.thrift.curator.Record} to annotate
 * @param viewName The view to add
 */
private void addRecordView(Record record, String viewName) throws TException, AnnotationFailedException, ServiceUnavailableException, SocketException {
    Record newRecord = addRecordViewFromCurator(record.getRawText(), RecordUtils.getSentenceList(record), viewName);
    if (ViewNames.getViewType(viewName) == ViewTypes.TOKEN_LABEL_VIEW || ViewNames.getViewType(viewName) == ViewTypes.SPAN_LABEL_VIEW) {
        Map<String, Labeling> labelViews = newRecord.getLabelViews();
        record.labelViews.put(viewName, labelViews.get(convertCuratorViewName(viewName)));
    } else if (ViewNames.getViewType(viewName) == ViewTypes.COREF_VIEW) {
        Map<String, Clustering> clusterViews = newRecord.getClusterViews();
        record.clusterViews.put(viewName, clusterViews.get(convertCuratorViewName(viewName)));
    } else if (ViewNames.getViewType(viewName) == ViewTypes.DEPENDENCY_VIEW || ViewNames.getViewType(viewName) == ViewTypes.PARSE_VIEW || ViewNames.getViewType(viewName) == ViewTypes.PREDICATE_ARGUMENT_VIEW) {
        Map<String, Forest> parseViews = newRecord.getParseViews();
        record.parseViews.put(viewName, parseViews.get(convertCuratorViewName(viewName)));
    }
}
Also used : Record(edu.illinois.cs.cogcomp.thrift.curator.Record) TreeMap(java.util.TreeMap) Map(java.util.Map)

Aggregations

Record (edu.illinois.cs.cogcomp.thrift.curator.Record)5 ViewTypes (edu.illinois.cs.cogcomp.core.datastructures.ViewTypes)1 Curator (edu.illinois.cs.cogcomp.thrift.curator.Curator)1 SocketException (java.net.SocketException)1 LinkedList (java.util.LinkedList)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 TBinaryProtocol (org.apache.thrift.protocol.TBinaryProtocol)1 TProtocol (org.apache.thrift.protocol.TProtocol)1 TFramedTransport (org.apache.thrift.transport.TFramedTransport)1 TSocket (org.apache.thrift.transport.TSocket)1 TTransport (org.apache.thrift.transport.TTransport)1