use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.
the class TupleJsonDeserializer method deserialize.
@Override
public Tuple deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException {
JsonNode node = p.getCodec().readTree(p);
JsonNode schemaNode = node.get(JsonConstants.SCHEMA);
JsonNode fieldsNode = node.get(JsonConstants.FIELDS);
Schema schema = new ObjectMapper().treeToValue(schemaNode, Schema.class);
ArrayList<IField> fields = new ArrayList<>();
for (int i = 0; i < schema.getAttributes().size(); i++) {
AttributeType attributeType = schema.getAttributes().get(i).getAttributeType();
JsonNode fieldNode = fieldsNode.get(i);
IField field = new ObjectMapper().treeToValue(fieldNode, attributeType.getFieldClass());
fields.add(field);
}
return new Tuple(schema, fields);
}
use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.
the class Utils method removeFields.
/**
* Remove one or more fields from a tuple.
*
* @param tuple
* @param removeFields
* @return
*/
public static Tuple removeFields(Tuple tuple, String... removeFields) {
List<String> removeFieldList = Arrays.asList(removeFields);
List<Integer> removedFeidsIndex = removeFieldList.stream().map(attributeName -> tuple.getSchema().getIndex(attributeName)).collect(Collectors.toList());
Attribute[] newAttrs = tuple.getSchema().getAttributes().stream().filter(attr -> (!removeFieldList.contains(attr.getAttributeName()))).toArray(Attribute[]::new);
Schema newSchema = new Schema(newAttrs);
IField[] newFields = IntStream.range(0, tuple.getSchema().getAttributes().size()).filter(index -> (!removedFeidsIndex.contains(index))).mapToObj(index -> tuple.getField(index)).toArray(IField[]::new);
return new Tuple(newSchema, newFields);
}
use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.
the class NlpSentimentOperator method open.
@Override
public void open() throws TextDBException {
if (cursor != CLOSED) {
return;
}
if (inputOperator == null) {
throw new DataFlowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
}
inputOperator.open();
Schema inputSchema = inputOperator.getOutputSchema();
// check if input schema is present
if (!inputSchema.containsField(predicate.getInputAttributeName())) {
throw new RuntimeException(String.format("input attribute %s is not in the input schema %s", predicate.getInputAttributeName(), inputSchema.getAttributeNames()));
}
// check if attribute type is valid
AttributeType inputAttributeType = inputSchema.getAttribute(predicate.getInputAttributeName()).getAttributeType();
boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
if (!isValidType) {
throw new RuntimeException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
}
// generate output schema by transforming the input schema
outputSchema = transformSchema(inputOperator.getOutputSchema());
cursor = OPENED;
// setup NLP sentiment analysis pipeline
Properties props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
sentimentPipeline = new StanfordCoreNLP(props);
}
use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.
the class NlpSplitOperator method open.
@Override
public void open() throws TextDBException {
if (cursor != CLOSED) {
return;
}
if (inputOperator == null) {
throw new DataFlowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
}
inputOperator.open();
Schema inputSchema = inputOperator.getOutputSchema();
// check if input schema is present
if (!inputSchema.containsField(predicate.getInputAttributeName())) {
throw new DataFlowException(String.format("input attribute %s is not in the input schema %s", predicate.getInputAttributeName(), inputSchema.getAttributeNames()));
}
// check if attribute type is valid
AttributeType inputAttributeType = inputSchema.getAttribute(predicate.getInputAttributeName()).getAttributeType();
boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
if (!isValidType) {
throw new DataFlowException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
}
// generate output schema by transforming the input schema based on what output format
// is chosen (OneToOne vs. OneToMany)
outputSchema = transformSchema(inputOperator.getOutputSchema());
cursor = OPENED;
}
use of edu.uci.ics.textdb.api.schema.Schema in project textdb by TextDB.
the class ProjectionOperator method setUp.
@Override
protected void setUp() throws TextDBException {
inputSchema = inputOperator.getOutputSchema();
List<Attribute> outputAttributes = inputSchema.getAttributes().stream().filter(attr -> predicate.getProjectionFields().contains(attr.getAttributeName().toLowerCase())).collect(Collectors.toList());
if (outputAttributes.size() != predicate.getProjectionFields().size()) {
throw new DataFlowException("input schema doesn't contain one of the attributes to be projected");
}
outputSchema = new Schema(outputAttributes.stream().toArray(Attribute[]::new));
}
Aggregations