Search in sources :

Example 1 with DataWriter

use of edu.uci.ics.texera.storage.DataWriter in project textdb by TextDB.

the class ScanBasedSourceOperatorTest method setUp.

@BeforeClass
public static void setUp() throws TexeraException {
    RelationManager relationManager = RelationManager.getInstance();
    // create the people table and write tuples
    relationManager.createTable(PEOPLE_TABLE, TestUtils.getDefaultTestIndex().resolve(PEOPLE_TABLE), TestConstants.SCHEMA_PEOPLE, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter peopleDataWriter = relationManager.getTableDataWriter(PEOPLE_TABLE);
    peopleDataWriter.open();
    for (Tuple tuple : TestConstants.getSamplePeopleTuples()) {
        peopleDataWriter.insertTuple(tuple);
    }
    peopleDataWriter.close();
}
Also used : Tuple(edu.uci.ics.texera.api.tuple.Tuple) RelationManager(edu.uci.ics.texera.storage.RelationManager) DataWriter(edu.uci.ics.texera.storage.DataWriter) BeforeClass(org.junit.BeforeClass)

Example 2 with DataWriter

use of edu.uci.ics.texera.storage.DataWriter in project textdb by TextDB.

the class KeywordTestHelper method writeTestTables.

public static void writeTestTables() throws TexeraException {
    RelationManager relationManager = RelationManager.getInstance();
    // create the people table and write tuples
    relationManager.createTable(PEOPLE_TABLE, TestUtils.getDefaultTestIndex().resolve(PEOPLE_TABLE), TestConstants.SCHEMA_PEOPLE, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter peopleDataWriter = relationManager.getTableDataWriter(PEOPLE_TABLE);
    peopleDataWriter.open();
    for (Tuple tuple : TestConstants.getSamplePeopleTuples()) {
        peopleDataWriter.insertTuple(tuple);
    }
    peopleDataWriter.close();
    // create the medline table and write tuples
    relationManager.createTable(MEDLINE_TABLE, TestUtils.getDefaultTestIndex().resolve(MEDLINE_TABLE), keywordTestConstants.SCHEMA_MEDLINE, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter medDataWriter = relationManager.getTableDataWriter(MEDLINE_TABLE);
    medDataWriter.open();
    for (Tuple tuple : keywordTestConstants.getSampleMedlineRecord()) {
        medDataWriter.insertTuple(tuple);
    }
    medDataWriter.close();
    // create the people table and write tuples in Chinese
    relationManager.createTable(CHINESE_TABLE, TestUtils.getDefaultTestIndex().resolve(CHINESE_TABLE), TestConstantsChinese.SCHEMA_PEOPLE, LuceneAnalyzerConstants.chineseAnalyzerString());
    DataWriter chineseDataWriter = relationManager.getTableDataWriter(CHINESE_TABLE);
    chineseDataWriter.open();
    for (Tuple tuple : TestConstantsChinese.getSamplePeopleTuples()) {
        chineseDataWriter.insertTuple(tuple);
    }
    chineseDataWriter.close();
}
Also used : Tuple(edu.uci.ics.texera.api.tuple.Tuple) RelationManager(edu.uci.ics.texera.storage.RelationManager) DataWriter(edu.uci.ics.texera.storage.DataWriter)

Example 3 with DataWriter

use of edu.uci.ics.texera.storage.DataWriter in project textdb by TextDB.

the class NlpEntityTest method deleteData.

// table is cleared after each test case
@After
public void deleteData() throws TexeraException {
    RelationManager relationManager = RelationManager.getInstance();
    DataWriter oneSentenceDataWriter = relationManager.getTableDataWriter(ONE_SENTENCE_TABLE);
    oneSentenceDataWriter.open();
    oneSentenceDataWriter.clearData();
    oneSentenceDataWriter.close();
    DataWriter twoSentenceDataWriter = relationManager.getTableDataWriter(TWO_SENTENCE_TABLE);
    twoSentenceDataWriter.open();
    twoSentenceDataWriter.clearData();
    twoSentenceDataWriter.close();
}
Also used : RelationManager(edu.uci.ics.texera.storage.RelationManager) DataWriter(edu.uci.ics.texera.storage.DataWriter) After(org.junit.After)

Example 4 with DataWriter

use of edu.uci.ics.texera.storage.DataWriter in project textdb by TextDB.

the class MedlineIndexWriter method writeMedlineIndex.

public static void writeMedlineIndex(Path medlineFilepath, String tableName) throws IOException, StorageException, ParseException {
    RelationManager relationManager = RelationManager.getInstance();
    DataWriter dataWriter = relationManager.getTableDataWriter(tableName);
    dataWriter.open();
    BufferedReader reader = Files.newBufferedReader(medlineFilepath);
    String line;
    while ((line = reader.readLine()) != null) {
        try {
            dataWriter.insertTuple(recordToTuple(line));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    reader.close();
    dataWriter.close();
}
Also used : BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) StorageException(edu.uci.ics.texera.api.exception.StorageException) ParseException(java.text.ParseException) RelationManager(edu.uci.ics.texera.storage.RelationManager) DataWriter(edu.uci.ics.texera.storage.DataWriter)

Example 5 with DataWriter

use of edu.uci.ics.texera.storage.DataWriter in project textdb by TextDB.

the class SampleExtraction method writeSampleIndex.

public static void writeSampleIndex() throws Exception {
    // parse the original file
    File sourceFileFolder = new File(promedFilesDirectory);
    ArrayList<Tuple> fileTuples = new ArrayList<>();
    for (File htmlFile : sourceFileFolder.listFiles()) {
        StringBuilder sb = new StringBuilder();
        Scanner scanner = new Scanner(htmlFile);
        while (scanner.hasNext()) {
            sb.append(scanner.nextLine());
        }
        scanner.close();
        Tuple tuple = parsePromedHTML(htmlFile.getName(), sb.toString());
        if (tuple != null) {
            fileTuples.add(tuple);
        }
    }
    // write tuples into the table
    RelationManager relationManager = RelationManager.getInstance();
    relationManager.deleteTable(PROMED_SAMPLE_TABLE);
    relationManager.createTable(PROMED_SAMPLE_TABLE, Paths.get(promedIndexDirectory), PromedSchema.PROMED_SCHEMA, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter dataWriter = relationManager.getTableDataWriter(PROMED_SAMPLE_TABLE);
    dataWriter.open();
    for (Tuple tuple : fileTuples) {
        dataWriter.insertTuple(tuple);
    }
    dataWriter.close();
}
Also used : Scanner(java.util.Scanner) ArrayList(java.util.ArrayList) File(java.io.File) Tuple(edu.uci.ics.texera.api.tuple.Tuple) RelationManager(edu.uci.ics.texera.storage.RelationManager) DataWriter(edu.uci.ics.texera.storage.DataWriter)

Aggregations

DataWriter (edu.uci.ics.texera.storage.DataWriter)37 Tuple (edu.uci.ics.texera.api.tuple.Tuple)33 RelationManager (edu.uci.ics.texera.storage.RelationManager)19 Test (org.junit.Test)11 ArrayList (java.util.ArrayList)10 BeforeClass (org.junit.BeforeClass)8 StorageException (edu.uci.ics.texera.api.exception.StorageException)3 IDField (edu.uci.ics.texera.api.field.IDField)3 StringField (edu.uci.ics.texera.api.field.StringField)3 IOException (java.io.IOException)3 JsonNode (com.fasterxml.jackson.databind.JsonNode)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 Term (org.apache.lucene.index.Term)2 TermQuery (org.apache.lucene.search.TermQuery)2 TexeraException (edu.uci.ics.texera.api.exception.TexeraException)1 IField (edu.uci.ics.texera.api.field.IField)1 TupleSink (edu.uci.ics.texera.dataflow.sink.tuple.TupleSink)1 TupleSinkPredicate (edu.uci.ics.texera.dataflow.sink.tuple.TupleSinkPredicate)1 TwitterJsonConverter (edu.uci.ics.texera.dataflow.twitter.TwitterJsonConverter)1 TwitterJsonConverterPredicate (edu.uci.ics.texera.dataflow.twitter.TwitterJsonConverterPredicate)1