Search in sources :

Example 1 with DataReader

use of edu.uci.ics.texera.storage.DataReader in project textdb by TextDB.

the class WordCountIndexSource method computeWordCount.

private void computeWordCount() throws TexeraException {
    try {
        HashMap<String, Integer> wordCountMap = new HashMap<>();
        DataReader dataReader = RelationManager.getInstance().getTableDataReader(predicate.getTableName(), new MatchAllDocsQuery());
        dataReader.open();
        IndexReader luceneIndexReader = dataReader.getLuceneIndexReader();
        for (int i = 0; i < luceneIndexReader.numDocs(); i++) {
            Terms termVector = luceneIndexReader.getTermVector(i, predicate.getAttribute());
            TermsEnum termsEnum = termVector.iterator();
            while (termsEnum.next() != null) {
                String key = termsEnum.term().utf8ToString();
                wordCountMap.put(key, wordCountMap.get(key) == null ? ((int) termsEnum.totalTermFreq()) : wordCountMap.get(key) + ((int) termsEnum.totalTermFreq()));
            }
        }
        luceneIndexReader.close();
        dataReader.close();
        sortedWordCountMap = wordCountMap.entrySet().stream().sorted((e1, e2) -> e2.getValue().compareTo(e1.getValue())).collect(Collectors.toList());
        wordCountIterator = sortedWordCountMap.iterator();
    } catch (IOException e) {
        throw new DataflowException(e);
    }
}
Also used : DataReader(edu.uci.ics.texera.storage.DataReader) HashMap(java.util.HashMap) IndexReader(org.apache.lucene.index.IndexReader) Terms(org.apache.lucene.index.Terms) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) IOException(java.io.IOException) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermsEnum(org.apache.lucene.index.TermsEnum)

Example 2 with DataReader

use of edu.uci.ics.texera.storage.DataReader in project textdb by TextDB.

the class PlanStore method getPlan.

/**
 * Retrieves a plan by given name from plan store.
 *
 * @param planName, the name of the plan.
 * @Return ITuple, the tuple consisting of fields of the plan.
 * @throws TexeraException
 */
public Tuple getPlan(String planName) throws TexeraException {
    Query q = new TermQuery(new Term(PlanStoreConstants.NAME, planName));
    DataReader reader = relationManager.getTableDataReader(PlanStoreConstants.TABLE_NAME, q);
    reader.open();
    Tuple inputTuple = null;
    while ((inputTuple = reader.getNextTuple()) != null) {
        IField nameField = inputTuple.getField(PlanStoreConstants.NAME);
        if (nameField.getValue().toString().equals(planName)) {
            reader.close();
            return inputTuple;
        }
    }
    reader.close();
    return null;
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) DataReader(edu.uci.ics.texera.storage.DataReader) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 3 with DataReader

use of edu.uci.ics.texera.storage.DataReader in project textdb by TextDB.

the class DictionaryManager method getDictionary.

public String getDictionary(String dictionaryName) throws StorageException {
    DataReader dataReader = relationManager.getTableDataReader(DictionaryManagerConstants.TABLE_NAME, new TermQuery(new Term(DictionaryManagerConstants.NAME, dictionaryName)));
    dataReader.open();
    if (dataReader.getNextTuple() == null) {
        throw new StorageException("Dictionary " + dictionaryName + "does not exist");
    }
    dataReader.close();
    try {
        return Files.lines(DictionaryManagerConstants.DICTIONARY_DIR_PATH.resolve(dictionaryName)).collect(Collectors.joining(","));
    } catch (IOException e) {
        throw new StorageException(e);
    }
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) DataReader(edu.uci.ics.texera.storage.DataReader) Term(org.apache.lucene.index.Term) IOException(java.io.IOException) StorageException(edu.uci.ics.texera.api.exception.StorageException)

Example 4 with DataReader

use of edu.uci.ics.texera.storage.DataReader in project textdb by TextDB.

the class DictionaryManager method getDictionaries.

public List<String> getDictionaries() throws StorageException {
    List<String> dictionaries = new ArrayList<>();
    DataReader dataReader = relationManager.getTableDataReader(DictionaryManagerConstants.TABLE_NAME, new MatchAllDocsQuery());
    dataReader.open();
    Tuple tuple;
    while ((tuple = dataReader.getNextTuple()) != null) {
        dictionaries.add(tuple.getField(DictionaryManagerConstants.NAME).getValue().toString());
    }
    dataReader.close();
    return dictionaries;
}
Also used : DataReader(edu.uci.ics.texera.storage.DataReader) ArrayList(java.util.ArrayList) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 5 with DataReader

use of edu.uci.ics.texera.storage.DataReader in project textdb by TextDB.

the class PlanStoreTest method testPlanIterator.

@Test
public void testPlanIterator() throws TexeraException {
    List<String> validPlans = new ArrayList<>();
    validPlans.add(logicalPlanJson1);
    validPlans.add(logicalPlanJson2);
    List<String> expectedPlans = new ArrayList<>();
    String planNamePrefix = "plan_";
    for (int i = 0; i < 100; i++) {
        String plan = validPlans.get(i % 2);
        expectedPlans.add(plan);
        planStore.addPlan(planNamePrefix + i, "basic plan " + i, plan);
    }
    DataReader reader = planStore.getPlanIterator();
    reader.open();
    Tuple tuple;
    String[] returnedPlans = new String[expectedPlans.size()];
    while ((tuple = reader.getNextTuple()) != null) {
        String planName = tuple.getField(PlanStoreConstants.NAME).getValue().toString();
        int planIdx = Integer.parseInt(planName.split("_")[1]);
        String logicalPlanJson = tuple.getField(PlanStoreConstants.LOGICAL_PLAN_JSON).getValue().toString();
        returnedPlans[planIdx] = logicalPlanJson;
    }
    reader.close();
    for (int i = 0; i < expectedPlans.size(); i++) {
        assertPlanEquivalence(expectedPlans.get(i), returnedPlans[i]);
    }
}
Also used : DataReader(edu.uci.ics.texera.storage.DataReader) ArrayList(java.util.ArrayList) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

DataReader (edu.uci.ics.texera.storage.DataReader)6 Tuple (edu.uci.ics.texera.api.tuple.Tuple)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)3 Term (org.apache.lucene.index.Term)2 TermQuery (org.apache.lucene.search.TermQuery)2 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)1 StorageException (edu.uci.ics.texera.api.exception.StorageException)1 TexeraException (edu.uci.ics.texera.api.exception.TexeraException)1 IField (edu.uci.ics.texera.api.field.IField)1 TexeraWebException (edu.uci.ics.texera.web.TexeraWebException)1 QueryPlanBean (edu.uci.ics.texera.web.response.planstore.QueryPlanBean)1 QueryPlanListBean (edu.uci.ics.texera.web.response.planstore.QueryPlanListBean)1 HashMap (java.util.HashMap)1 IndexReader (org.apache.lucene.index.IndexReader)1 Terms (org.apache.lucene.index.Terms)1 TermsEnum (org.apache.lucene.index.TermsEnum)1 Query (org.apache.lucene.search.Query)1 Test (org.junit.Test)1