Search in sources :

Example 1 with Schema

use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.

the class TupleJsonDeserializer method deserialize.

@Override
public Tuple deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException {
    JsonNode node = p.getCodec().readTree(p);
    JsonNode schemaNode = node.get(JsonConstants.SCHEMA);
    JsonNode fieldsNode = node.get(JsonConstants.FIELDS);
    Schema schema = new ObjectMapper().treeToValue(schemaNode, Schema.class);
    ArrayList<IField> fields = new ArrayList<>();
    for (int i = 0; i < schema.getAttributes().size(); i++) {
        AttributeType attributeType = schema.getAttributes().get(i).getAttributeType();
        JsonNode fieldNode = fieldsNode.get(i);
        IField field = new ObjectMapper().treeToValue(fieldNode, attributeType.getFieldClass());
        fields.add(field);
    }
    return new Tuple(schema, fields);
}
Also used : AttributeType(edu.uci.ics.textdb.api.schema.AttributeType) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) JsonNode(com.fasterxml.jackson.databind.JsonNode) IField(edu.uci.ics.textdb.api.field.IField) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 2 with Schema

use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.

the class Utils method removeFields.

/**
    * Remove one or more fields from a tuple.
    * 
    * @param tuple
    * @param removeFields
    * @return
    */
public static Tuple removeFields(Tuple tuple, String... removeFields) {
    List<String> removeFieldList = Arrays.asList(removeFields);
    List<Integer> removedFeidsIndex = removeFieldList.stream().map(attributeName -> tuple.getSchema().getIndex(attributeName)).collect(Collectors.toList());
    Attribute[] newAttrs = tuple.getSchema().getAttributes().stream().filter(attr -> (!removeFieldList.contains(attr.getAttributeName()))).toArray(Attribute[]::new);
    Schema newSchema = new Schema(newAttrs);
    IField[] newFields = IntStream.range(0, tuple.getSchema().getAttributes().size()).filter(index -> (!removedFeidsIndex.contains(index))).mapToObj(index -> tuple.getField(index)).toArray(IField[]::new);
    return new Tuple(newSchema, newFields);
}
Also used : IntStream(java.util.stream.IntStream) SchemaConstants(edu.uci.ics.textdb.api.constants.SchemaConstants) DataConstants(edu.uci.ics.textdb.api.constants.DataConstants) Arrays(java.util.Arrays) Attribute(edu.uci.ics.textdb.api.schema.Attribute) TextdbProject(edu.uci.ics.textdb.api.constants.DataConstants.TextdbProject) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) Schema(edu.uci.ics.textdb.api.schema.Schema) List(java.util.List) Paths(java.nio.file.Paths) IField(edu.uci.ics.textdb.api.field.IField) StorageException(edu.uci.ics.textdb.api.exception.StorageException) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) IField(edu.uci.ics.textdb.api.field.IField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple)

Example 3 with Schema

use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.

the class NlpSentimentOperator method open.

@Override
public void open() throws TextDBException {
    if (cursor != CLOSED) {
        return;
    }
    if (inputOperator == null) {
        throw new DataFlowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
    }
    inputOperator.open();
    Schema inputSchema = inputOperator.getOutputSchema();
    // check if input schema is present
    if (!inputSchema.containsField(predicate.getInputAttributeName())) {
        throw new RuntimeException(String.format("input attribute %s is not in the input schema %s", predicate.getInputAttributeName(), inputSchema.getAttributeNames()));
    }
    // check if attribute type is valid
    AttributeType inputAttributeType = inputSchema.getAttribute(predicate.getInputAttributeName()).getAttributeType();
    boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
    if (!isValidType) {
        throw new RuntimeException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
    }
    // generate output schema by transforming the input schema
    outputSchema = transformSchema(inputOperator.getOutputSchema());
    cursor = OPENED;
    // setup NLP sentiment analysis pipeline
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
    sentimentPipeline = new StanfordCoreNLP(props);
}
Also used : AttributeType(edu.uci.ics.textdb.api.schema.AttributeType) Schema(edu.uci.ics.textdb.api.schema.Schema) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) Properties(java.util.Properties) StanfordCoreNLP(edu.stanford.nlp.pipeline.StanfordCoreNLP)

Example 4 with Schema

use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.

the class NlpSplitOperator method open.

@Override
public void open() throws TextDBException {
    if (cursor != CLOSED) {
        return;
    }
    if (inputOperator == null) {
        throw new DataFlowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
    }
    inputOperator.open();
    Schema inputSchema = inputOperator.getOutputSchema();
    // check if input schema is present
    if (!inputSchema.containsField(predicate.getInputAttributeName())) {
        throw new DataFlowException(String.format("input attribute %s is not in the input schema %s", predicate.getInputAttributeName(), inputSchema.getAttributeNames()));
    }
    // check if attribute type is valid
    AttributeType inputAttributeType = inputSchema.getAttribute(predicate.getInputAttributeName()).getAttributeType();
    boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
    if (!isValidType) {
        throw new DataFlowException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
    }
    // generate output schema by transforming the input schema based on what output format
    // is chosen (OneToOne vs. OneToMany)
    outputSchema = transformSchema(inputOperator.getOutputSchema());
    cursor = OPENED;
}
Also used : AttributeType(edu.uci.ics.textdb.api.schema.AttributeType) Schema(edu.uci.ics.textdb.api.schema.Schema) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException)

Example 5 with Schema

use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.

the class ProjectionOperator method setUp.

@Override
protected void setUp() throws TextDBException {
    inputSchema = inputOperator.getOutputSchema();
    List<Attribute> outputAttributes = inputSchema.getAttributes().stream().filter(attr -> predicate.getProjectionFields().contains(attr.getAttributeName().toLowerCase())).collect(Collectors.toList());
    if (outputAttributes.size() != predicate.getProjectionFields().size()) {
        throw new DataFlowException("input schema doesn't contain one of the attributes to be projected");
    }
    outputSchema = new Schema(outputAttributes.stream().toArray(Attribute[]::new));
}
Also used : Schema(edu.uci.ics.textdb.api.schema.Schema) List(java.util.List) Attribute(edu.uci.ics.textdb.api.schema.Attribute) TextDBException(edu.uci.ics.textdb.api.exception.TextDBException) AbstractSingleInputOperator(edu.uci.ics.textdb.exp.common.AbstractSingleInputOperator) IField(edu.uci.ics.textdb.api.field.IField) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) Collectors(java.util.stream.Collectors) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException)

Aggregations

Schema (edu.uci.ics.textdb.api.schema.Schema)126 ArrayList (java.util.ArrayList)98 Tuple (edu.uci.ics.textdb.api.tuple.Tuple)95 IField (edu.uci.ics.textdb.api.field.IField)94 Test (org.junit.Test)94 Attribute (edu.uci.ics.textdb.api.schema.Attribute)93 Span (edu.uci.ics.textdb.api.span.Span)86 TextField (edu.uci.ics.textdb.api.field.TextField)83 StringField (edu.uci.ics.textdb.api.field.StringField)65 IntegerField (edu.uci.ics.textdb.api.field.IntegerField)63 DoubleField (edu.uci.ics.textdb.api.field.DoubleField)52 DateField (edu.uci.ics.textdb.api.field.DateField)49 SimpleDateFormat (java.text.SimpleDateFormat)48 Dictionary (edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary)24 ListField (edu.uci.ics.textdb.api.field.ListField)14 DataFlowException (edu.uci.ics.textdb.api.exception.DataFlowException)9 AttributeType (edu.uci.ics.textdb.api.schema.AttributeType)9 JoinDistancePredicate (edu.uci.ics.textdb.exp.join.JoinDistancePredicate)9 KeywordMatcherSourceOperator (edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)9 IDField (edu.uci.ics.textdb.api.field.IDField)8