Search in sources :

Example 21 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class LineChartSink method open.

@Override
public void open() throws TexeraException {
    if (cursor != CLOSED) {
        return;
    }
    if (inputOperator == null) {
        throw new TexeraException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
    }
    inputOperator.open();
    Schema schema = inputOperator.getOutputSchema();
    Attribute nameColumn = schema.getAttribute(predicate.getNameColumn());
    AttributeType nameColumnType = nameColumn.getType();
    if (!nameColumnType.equals(AttributeType.STRING) && !nameColumnType.equals(AttributeType.TEXT)) {
        throw new DataflowException("Type of name column should be string or text.");
    }
    attributes.add(nameColumn);
    List<String> dataColumns = predicate.getDataColumn();
    for (String name : dataColumns) {
        Attribute dataColumn = schema.getAttribute(name);
        AttributeType dataColumnType = dataColumn.getType();
        if (!dataColumnType.equals(AttributeType.DOUBLE) && !dataColumnType.equals(AttributeType.INTEGER)) {
            throw new DataflowException(("Type of data column should be integer or double."));
        }
        attributes.add(dataColumn);
    }
    outputSchema = new Schema.Builder().add(attributes).build();
    cursor = OPENED;
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) TexeraException(edu.uci.ics.texera.api.exception.TexeraException)

Example 22 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class DictionaryMatcher method appendConjunctionMatchingSpans4Dictionary.

private List<Span> appendConjunctionMatchingSpans4Dictionary(Tuple inputTuple, List<String> attributeNames, List<Set<String>> queryTokenSetList, List<String> queryList) throws DataflowException {
    List<Span> matchingResults = new ArrayList<>();
    ListField<Span> payloadField = inputTuple.getField(SchemaConstants.PAYLOAD);
    List<Span> payload = payloadField.getValue();
    Map<Integer, List<Span>> relevantSpansMap = filterRelevantSpans(payload, queryTokenSetList);
    for (String attributeName : attributeNames) {
        AttributeType attributeType = inputTuple.getSchema().getAttribute(attributeName).getType();
        String fieldValue = inputTuple.getField(attributeName).getValue().toString();
        // types other than TEXT and STRING: throw Exception for now
        if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
            throw new DataflowException("KeywordMatcher: Fields other than STRING and TEXT are not supported yet");
        }
        // for STRING type, check if the dictionary entries contains the complete fieldValue
        if (attributeType == AttributeType.STRING) {
            if (queryList.contains(fieldValue)) {
                Span span = new Span(attributeName, 0, fieldValue.length(), fieldValue, fieldValue);
                matchingResults.add(span);
            }
        }
        // for TEXT type, every token in the query should be present in span
        if (attributeType == AttributeType.TEXT) {
            for (int index : relevantSpansMap.keySet()) {
                List<Span> fieldSpanList = relevantSpansMap.get(index).stream().filter(span -> span.getAttributeName().equals(attributeName)).collect(Collectors.toList());
                if (DataflowUtils.isAllQueryTokensPresent(fieldSpanList, queryTokenSetList.get(index))) {
                    matchingResults.addAll(fieldSpanList);
                }
            }
        }
    }
    return matchingResults;
}
Also used : ListField(edu.uci.ics.texera.api.field.ListField) java.util(java.util) Tuple(edu.uci.ics.texera.api.tuple.Tuple) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) Collectors(java.util.stream.Collectors) Span(edu.uci.ics.texera.api.span.Span) Matcher(java.util.regex.Matcher) SchemaConstants(edu.uci.ics.texera.api.constants.SchemaConstants) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) AbstractSingleInputOperator(edu.uci.ics.texera.dataflow.common.AbstractSingleInputOperator) ErrorMessages(edu.uci.ics.texera.api.constants.ErrorMessages) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) Pattern(java.util.regex.Pattern) DataflowUtils(edu.uci.ics.texera.dataflow.utils.DataflowUtils) KeywordMatchingType(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatchingType) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) Span(edu.uci.ics.texera.api.span.Span)

Example 23 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class DataWriter method getLuceneDocument.

/*
     * Converts a Texera tuple to a Lucene document
     */
private static Document getLuceneDocument(Tuple tuple) {
    List<IField> fields = tuple.getFields();
    List<Attribute> attributes = tuple.getSchema().getAttributes();
    Document doc = new Document();
    for (int count = 0; count < fields.size(); count++) {
        IField field = fields.get(count);
        Attribute attr = attributes.get(count);
        AttributeType attributeType = attr.getType();
        doc.add(StorageUtils.getLuceneField(attributeType, attr.getName(), field.getValue()));
    }
    return doc;
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) IField(edu.uci.ics.texera.api.field.IField) Document(org.apache.lucene.document.Document)

Example 24 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class NlpSplitOperator method open.

@Override
public void open() throws TexeraException {
    if (cursor != CLOSED) {
        return;
    }
    if (inputOperator == null) {
        throw new DataflowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
    }
    inputOperator.open();
    Schema inputSchema = inputOperator.getOutputSchema();
    // generate output schema by transforming the input schema based on what output format
    // is chosen (OneToOne vs. OneToMany)
    outputSchema = transformToOutputSchema(inputOperator.getOutputSchema());
    // check if attribute type is valid
    AttributeType inputAttributeType = inputSchema.getAttribute(predicate.getInputAttributeName()).getType();
    boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
    if (!isValidType) {
        throw new DataflowException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
    }
    cursor = OPENED;
}
Also used : AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) DataflowException(edu.uci.ics.texera.api.exception.DataflowException)

Example 25 with AttributeType

use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.

the class RegexMatcher method computeMatchingResultsWithPattern.

public static List<Span> computeMatchingResultsWithPattern(Tuple inputTuple, RegexPredicate predicate, Pattern pattern) {
    List<Span> matchingResults = new ArrayList<>();
    for (String attributeName : predicate.getAttributeNames()) {
        AttributeType attributeType = inputTuple.getSchema().getAttribute(attributeName).getType();
        String fieldValue = inputTuple.getField(attributeName).getValue().toString();
        // types other than TEXT and STRING: throw Exception for now
        if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
            throw new DataflowException("KeywordMatcher: Fields other than STRING and TEXT are not supported yet");
        }
        Matcher javaMatcher = pattern.matcher(fieldValue);
        while (javaMatcher.find()) {
            int start = javaMatcher.start();
            int end = javaMatcher.end();
            matchingResults.add(new Span(attributeName, start, end, predicate.getRegex(), fieldValue.substring(start, end)));
        }
    }
    return matchingResults;
}
Also used : Matcher(java.util.regex.Matcher) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) Span(edu.uci.ics.texera.api.span.Span)

Aggregations

AttributeType (edu.uci.ics.texera.api.schema.AttributeType)31 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)21 Schema (edu.uci.ics.texera.api.schema.Schema)16 TexeraException (edu.uci.ics.texera.api.exception.TexeraException)14 Attribute (edu.uci.ics.texera.api.schema.Attribute)13 Span (edu.uci.ics.texera.api.span.Span)10 Tuple (edu.uci.ics.texera.api.tuple.Tuple)7 SchemaConstants (edu.uci.ics.texera.api.constants.SchemaConstants)6 ListField (edu.uci.ics.texera.api.field.ListField)6 ArrayList (java.util.ArrayList)6 Collectors (java.util.stream.Collectors)6 ErrorMessages (edu.uci.ics.texera.api.constants.ErrorMessages)5 AbstractSingleInputOperator (edu.uci.ics.texera.dataflow.common.AbstractSingleInputOperator)5 DataflowUtils (edu.uci.ics.texera.dataflow.utils.DataflowUtils)5 IField (edu.uci.ics.texera.api.field.IField)4 java.util (java.util)4 Matcher (java.util.regex.Matcher)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)2 KeywordMatchingType (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatchingType)2