use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.
the class LineChartSink method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
if (inputOperator == null) {
throw new TexeraException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
}
inputOperator.open();
Schema schema = inputOperator.getOutputSchema();
Attribute nameColumn = schema.getAttribute(predicate.getNameColumn());
AttributeType nameColumnType = nameColumn.getType();
if (!nameColumnType.equals(AttributeType.STRING) && !nameColumnType.equals(AttributeType.TEXT)) {
throw new DataflowException("Type of name column should be string or text.");
}
attributes.add(nameColumn);
List<String> dataColumns = predicate.getDataColumn();
for (String name : dataColumns) {
Attribute dataColumn = schema.getAttribute(name);
AttributeType dataColumnType = dataColumn.getType();
if (!dataColumnType.equals(AttributeType.DOUBLE) && !dataColumnType.equals(AttributeType.INTEGER)) {
throw new DataflowException(("Type of data column should be integer or double."));
}
attributes.add(dataColumn);
}
outputSchema = new Schema.Builder().add(attributes).build();
cursor = OPENED;
}
use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.
the class DictionaryMatcher method appendConjunctionMatchingSpans4Dictionary.
private List<Span> appendConjunctionMatchingSpans4Dictionary(Tuple inputTuple, List<String> attributeNames, List<Set<String>> queryTokenSetList, List<String> queryList) throws DataflowException {
List<Span> matchingResults = new ArrayList<>();
ListField<Span> payloadField = inputTuple.getField(SchemaConstants.PAYLOAD);
List<Span> payload = payloadField.getValue();
Map<Integer, List<Span>> relevantSpansMap = filterRelevantSpans(payload, queryTokenSetList);
for (String attributeName : attributeNames) {
AttributeType attributeType = inputTuple.getSchema().getAttribute(attributeName).getType();
String fieldValue = inputTuple.getField(attributeName).getValue().toString();
// types other than TEXT and STRING: throw Exception for now
if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
throw new DataflowException("KeywordMatcher: Fields other than STRING and TEXT are not supported yet");
}
// for STRING type, check if the dictionary entries contains the complete fieldValue
if (attributeType == AttributeType.STRING) {
if (queryList.contains(fieldValue)) {
Span span = new Span(attributeName, 0, fieldValue.length(), fieldValue, fieldValue);
matchingResults.add(span);
}
}
// for TEXT type, every token in the query should be present in span
if (attributeType == AttributeType.TEXT) {
for (int index : relevantSpansMap.keySet()) {
List<Span> fieldSpanList = relevantSpansMap.get(index).stream().filter(span -> span.getAttributeName().equals(attributeName)).collect(Collectors.toList());
if (DataflowUtils.isAllQueryTokensPresent(fieldSpanList, queryTokenSetList.get(index))) {
matchingResults.addAll(fieldSpanList);
}
}
}
}
return matchingResults;
}
use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.
the class DataWriter method getLuceneDocument.
/*
* Converts a Texera tuple to a Lucene document
*/
private static Document getLuceneDocument(Tuple tuple) {
List<IField> fields = tuple.getFields();
List<Attribute> attributes = tuple.getSchema().getAttributes();
Document doc = new Document();
for (int count = 0; count < fields.size(); count++) {
IField field = fields.get(count);
Attribute attr = attributes.get(count);
AttributeType attributeType = attr.getType();
doc.add(StorageUtils.getLuceneField(attributeType, attr.getName(), field.getValue()));
}
return doc;
}
use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.
the class NlpSplitOperator method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
if (inputOperator == null) {
throw new DataflowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
}
inputOperator.open();
Schema inputSchema = inputOperator.getOutputSchema();
// generate output schema by transforming the input schema based on what output format
// is chosen (OneToOne vs. OneToMany)
outputSchema = transformToOutputSchema(inputOperator.getOutputSchema());
// check if attribute type is valid
AttributeType inputAttributeType = inputSchema.getAttribute(predicate.getInputAttributeName()).getType();
boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
if (!isValidType) {
throw new DataflowException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
}
cursor = OPENED;
}
use of edu.uci.ics.texera.api.schema.AttributeType in project textdb by TextDB.
the class RegexMatcher method computeMatchingResultsWithPattern.
public static List<Span> computeMatchingResultsWithPattern(Tuple inputTuple, RegexPredicate predicate, Pattern pattern) {
List<Span> matchingResults = new ArrayList<>();
for (String attributeName : predicate.getAttributeNames()) {
AttributeType attributeType = inputTuple.getSchema().getAttribute(attributeName).getType();
String fieldValue = inputTuple.getField(attributeName).getValue().toString();
// types other than TEXT and STRING: throw Exception for now
if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
throw new DataflowException("KeywordMatcher: Fields other than STRING and TEXT are not supported yet");
}
Matcher javaMatcher = pattern.matcher(fieldValue);
while (javaMatcher.find()) {
int start = javaMatcher.start();
int end = javaMatcher.end();
matchingResults.add(new Span(attributeName, start, end, predicate.getRegex(), fieldValue.substring(start, end)));
}
}
return matchingResults;
}
Aggregations