Search in sources :

Example 26 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class NltkSentimentOperator method popupOneTuple.

private Tuple popupOneTuple() {
    Tuple outputTuple = tupleBuffer.get(0);
    tupleBuffer.remove(0);
    if (tupleBuffer.isEmpty()) {
        tupleBuffer = null;
    }
    List<IField> outputFields = new ArrayList<>();
    outputFields.addAll(outputTuple.getFields());
    Integer className = idClassMap.get(outputTuple.getField(SchemaConstants._ID).getValue().toString());
    outputFields.add(new IntegerField(className));
    return new Tuple(outputSchema, outputFields);
}
Also used : ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 27 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class PlanStore method updatePlanInternal.

/**
 * Updates both plan description and plan json of a plan with the given plan name.
 * If description is null, it will not update plan description.
 * If plan json is NULL, it will not update the plan's JSON file.
 *
 * @param planName, the name of the plan.
 * @param description, the new description of the plan.
 * @param logicalPlanJson, the new plan json string.
 * @throws TexeraException
 */
private void updatePlanInternal(String planName, String description, String logicalPlanJson) throws TexeraException {
    Tuple existingPlan = getPlan(planName);
    if (existingPlan == null) {
        return;
    }
    // Checking if an updated description or logical plan JSON string has been provided
    if (description == null && logicalPlanJson == null) {
        return;
    }
    // Checking if the logical plan JSON string needs to be updated
    if (logicalPlanJson != null) {
        // Compressing and checking the validity of the logical plan JSON string
        try {
            ObjectMapper objectMapper = new ObjectMapper();
            JsonNode jsonNode = objectMapper.readValue(logicalPlanJson, JsonNode.class);
            logicalPlanJson = objectMapper.writeValueAsString(jsonNode);
        } catch (IOException e) {
            throw new StorageException("logical plan json is an invalid json string: " + logicalPlanJson);
        }
    }
    // Getting the fields in order for performing the update
    IDField idField = (IDField) existingPlan.getField(SchemaConstants._ID);
    IField descriptionField = description != null ? new StringField(description) : existingPlan.getField(PlanStoreConstants.DESCRIPTION);
    IField logicalPlanJsonField = logicalPlanJson != null ? new StringField(logicalPlanJson) : existingPlan.getField(PlanStoreConstants.LOGICAL_PLAN_JSON);
    // Creating a tuple out of all the fields
    Tuple newTuple = new Tuple(PlanStoreConstants.SCHEMA_PLAN, new StringField(planName), descriptionField, logicalPlanJsonField);
    // Writing the updated tuple
    DataWriter dataWriter = relationManager.getTableDataWriter(PlanStoreConstants.TABLE_NAME);
    dataWriter.open();
    dataWriter.updateTuple(newTuple, idField);
    dataWriter.close();
}
Also used : IDField(edu.uci.ics.texera.api.field.IDField) StringField(edu.uci.ics.texera.api.field.StringField) JsonNode(com.fasterxml.jackson.databind.JsonNode) IOException(java.io.IOException) IField(edu.uci.ics.texera.api.field.IField) StorageException(edu.uci.ics.texera.api.exception.StorageException) Tuple(edu.uci.ics.texera.api.tuple.Tuple) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DataWriter(edu.uci.ics.texera.storage.DataWriter)

Example 28 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class PlanStore method getPlan.

/**
 * Retrieves a plan by given name from plan store.
 *
 * @param planName, the name of the plan.
 * @Return ITuple, the tuple consisting of fields of the plan.
 * @throws TexeraException
 */
public Tuple getPlan(String planName) throws TexeraException {
    Query q = new TermQuery(new Term(PlanStoreConstants.NAME, planName));
    DataReader reader = relationManager.getTableDataReader(PlanStoreConstants.TABLE_NAME, q);
    reader.open();
    Tuple inputTuple = null;
    while ((inputTuple = reader.getNextTuple()) != null) {
        IField nameField = inputTuple.getField(PlanStoreConstants.NAME);
        if (nameField.getValue().toString().equals(planName)) {
            reader.close();
            return inputTuple;
        }
    }
    reader.close();
    return null;
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) DataReader(edu.uci.ics.texera.storage.DataReader) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 29 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class RegexSplitOperator method computeSentenceList.

private List<Span> computeSentenceList(Tuple inputTuple) {
    String inputText = inputTuple.<IField>getField(predicate.getInputAttributeName()).getValue().toString();
    List<Span> textSpanList = new ArrayList<Span>();
    String attributeName = predicate.getInputAttributeName();
    // Create a pattern using regex.
    Pattern pattern = Pattern.compile(predicate.getRegex());
    // Match the pattern in the text.
    Matcher regexMatcher = pattern.matcher(inputText);
    List<Integer> splitIndex = new ArrayList<Integer>();
    splitIndex.add(0);
    int endSplit;
    int startSplit;
    while (regexMatcher.find()) {
        if (predicate.getSplitType() == RegexSplitPredicate.SplitType.GROUP_RIGHT) {
            endSplit = regexMatcher.start();
            startSplit = endSplit;
            if (startSplit != 0) {
                splitIndex.add(endSplit);
                splitIndex.add(startSplit);
            }
        } else if (predicate.getSplitType() == RegexSplitPredicate.SplitType.GROUP_LEFT) {
            endSplit = regexMatcher.end();
            startSplit = endSplit;
            splitIndex.add(endSplit);
            splitIndex.add(startSplit);
        } else if (predicate.getSplitType() == RegexSplitPredicate.SplitType.STANDALONE) {
            endSplit = regexMatcher.start();
            startSplit = endSplit;
            if (endSplit != 0) {
                splitIndex.add(endSplit);
                splitIndex.add(startSplit);
            }
            endSplit = regexMatcher.end();
            startSplit = endSplit;
            if (endSplit < inputText.length()) {
                splitIndex.add(endSplit);
                splitIndex.add(startSplit);
            }
        }
    }
    splitIndex.add(inputText.length());
    // Make span list
    int startSpan = 0;
    int endSpan = 0;
    String key = PropertyNameConstants.REGEX_SPLIT_KEY;
    for (int i = 0; i < splitIndex.size() - 1; i++) {
        if (splitIndex.get(i) <= splitIndex.get(i + 1)) {
            String textSpan = inputText.substring(splitIndex.get(i), splitIndex.get(i + 1));
            startSpan = splitIndex.get(i);
            i++;
            endSpan = startSpan + textSpan.length();
            Span span = new Span(attributeName, startSpan, endSpan, key, textSpan);
            textSpanList.add(span);
        }
    }
    return textSpanList;
}
Also used : Pattern(java.util.regex.Pattern) Matcher(java.util.regex.Matcher) ArrayList(java.util.ArrayList) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span)

Example 30 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class MysqlSink method processTuples.

/**
 * Insert tuples into mysql database using prepared statement. No output
 */
@Override
public void processTuples() throws TexeraException {
    String sqlStatemnt = "INSERT INTO " + predicate.getTable() + " VALUES(" + Stream.generate(() -> "?").limit(outputSchema.getAttributeNames().size()).collect(Collectors.joining(",")) + ");";
    try {
        prepStatement = connection.prepareStatement(sqlStatemnt);
        Tuple tuple;
        while ((tuple = this.getNextTuple()) != null) {
            List<IField> fieldList = new ArrayList<>();
            for (int i = 0; i < outputSchema.getAttributeNames().size(); i++) {
                fieldList.add(tuple.getField(outputSchema.getAttributeNames().get(i)));
            }
            for (int i = 0; i < fieldList.size(); i++) {
                prepareField(i, fieldList.get(i));
            }
            prepStatement.executeUpdate();
        }
    } catch (SQLException e) {
        throw new DataflowException("MysqlSink processTuples fails to execute prepared statement. " + e.getMessage());
    }
}
Also used : SQLException(java.sql.SQLException) ArrayList(java.util.ArrayList) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Aggregations

IField (edu.uci.ics.texera.api.field.IField)145 ArrayList (java.util.ArrayList)113 Tuple (edu.uci.ics.texera.api.tuple.Tuple)106 TextField (edu.uci.ics.texera.api.field.TextField)100 Span (edu.uci.ics.texera.api.span.Span)99 Schema (edu.uci.ics.texera.api.schema.Schema)92 Test (org.junit.Test)84 StringField (edu.uci.ics.texera.api.field.StringField)79 IntegerField (edu.uci.ics.texera.api.field.IntegerField)78 DoubleField (edu.uci.ics.texera.api.field.DoubleField)63 DateField (edu.uci.ics.texera.api.field.DateField)58 Attribute (edu.uci.ics.texera.api.schema.Attribute)57 SimpleDateFormat (java.text.SimpleDateFormat)56 ListField (edu.uci.ics.texera.api.field.ListField)32 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)29 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)9 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)7 JsonNode (com.fasterxml.jackson.databind.JsonNode)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4