Search in sources :

Example 16 with RelationManager

use of edu.uci.ics.texera.storage.RelationManager in project textdb by TextDB.

the class ScanBasedSourceOperatorTest method cleanUp.

@AfterClass
public static void cleanUp() throws Exception {
    RelationManager relationManager = RelationManager.getInstance();
    relationManager.deleteTable(PEOPLE_TABLE);
}
Also used : RelationManager(edu.uci.ics.texera.storage.RelationManager) AfterClass(org.junit.AfterClass)

Example 17 with RelationManager

use of edu.uci.ics.texera.storage.RelationManager in project textdb by TextDB.

the class MedlineIndexWriter method writeMedlineIndex.

public static void writeMedlineIndex(Path medlineFilepath, String tableName) throws IOException, StorageException, ParseException {
    RelationManager relationManager = RelationManager.getInstance();
    DataWriter dataWriter = relationManager.getTableDataWriter(tableName);
    dataWriter.open();
    BufferedReader reader = Files.newBufferedReader(medlineFilepath);
    String line;
    while ((line = reader.readLine()) != null) {
        try {
            dataWriter.insertTuple(recordToTuple(line));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    reader.close();
    dataWriter.close();
}
Also used : BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) StorageException(edu.uci.ics.texera.api.exception.StorageException) ParseException(java.text.ParseException) RelationManager(edu.uci.ics.texera.storage.RelationManager) DataWriter(edu.uci.ics.texera.storage.DataWriter)

Example 18 with RelationManager

use of edu.uci.ics.texera.storage.RelationManager in project textdb by TextDB.

the class SampleExtraction method writeSampleIndex.

public static void writeSampleIndex() throws Exception {
    // parse the original file
    File sourceFileFolder = new File(promedFilesDirectory);
    ArrayList<Tuple> fileTuples = new ArrayList<>();
    for (File htmlFile : sourceFileFolder.listFiles()) {
        StringBuilder sb = new StringBuilder();
        Scanner scanner = new Scanner(htmlFile);
        while (scanner.hasNext()) {
            sb.append(scanner.nextLine());
        }
        scanner.close();
        Tuple tuple = parsePromedHTML(htmlFile.getName(), sb.toString());
        if (tuple != null) {
            fileTuples.add(tuple);
        }
    }
    // write tuples into the table
    RelationManager relationManager = RelationManager.getInstance();
    relationManager.deleteTable(PROMED_SAMPLE_TABLE);
    relationManager.createTable(PROMED_SAMPLE_TABLE, Paths.get(promedIndexDirectory), PromedSchema.PROMED_SCHEMA, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter dataWriter = relationManager.getTableDataWriter(PROMED_SAMPLE_TABLE);
    dataWriter.open();
    for (Tuple tuple : fileTuples) {
        dataWriter.insertTuple(tuple);
    }
    dataWriter.close();
}
Also used : Scanner(java.util.Scanner) ArrayList(java.util.ArrayList) File(java.io.File) Tuple(edu.uci.ics.texera.api.tuple.Tuple) RelationManager(edu.uci.ics.texera.storage.RelationManager) DataWriter(edu.uci.ics.texera.storage.DataWriter)

Example 19 with RelationManager

use of edu.uci.ics.texera.storage.RelationManager in project textdb by TextDB.

the class TwitterSample method writeTwitterIndex.

public static void writeTwitterIndex() throws Exception {
    RelationManager relationManager = RelationManager.getInstance();
    relationManager.deleteTable(twitterClimateTable);
    relationManager.createTable(twitterClimateTable, Utils.getDefaultIndexDirectory().resolve(twitterClimateTable), TwitterSchema.TWITTER_SCHEMA, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter dataWriter = relationManager.getTableDataWriter(twitterClimateTable);
    dataWriter.open();
    JsonNode jsonNode = new ObjectMapper().readTree(new File(twitterFilePath));
    for (JsonNode tweet : jsonNode) {
        try {
            String text = tweet.get("text").asText();
            Long id = tweet.get("id").asLong();
            String tweetLink = "https://twitter.com/statuses/" + id;
            JsonNode userNode = tweet.get("user");
            String userScreenName = userNode.get("screen_name").asText();
            String userLink = "https://twitter.com/" + userScreenName;
            String userName = userNode.get("name").asText();
            String userDescription = userNode.get("description").asText();
            Integer userFollowersCount = userNode.get("followers_count").asInt();
            Integer userFriendsCount = userNode.get("friends_count").asInt();
            JsonNode geoTagNode = tweet.get("geo_tag");
            String state = geoTagNode.get("stateName").asText();
            String county = geoTagNode.get("countyName").asText();
            String city = geoTagNode.get("cityName").asText();
            String createAt = tweet.get("create_at").asText();
            Tuple tuple = new Tuple(TwitterSchema.TWITTER_SCHEMA, new TextField(text), new StringField(tweetLink), new StringField(userLink), new TextField(userScreenName), new TextField(userName), new TextField(userDescription), new IntegerField(userFollowersCount), new IntegerField(userFriendsCount), new TextField(state), new TextField(county), new TextField(city), new StringField(createAt));
            dataWriter.insertTuple(tuple);
        } catch (Exception e) {
            // continue to next tuple if something goes wrong
            continue;
        }
    }
    dataWriter.close();
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) IntegerField(edu.uci.ics.texera.api.field.IntegerField) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Tuple(edu.uci.ics.texera.api.tuple.Tuple) RelationManager(edu.uci.ics.texera.storage.RelationManager) DataWriter(edu.uci.ics.texera.storage.DataWriter)

Example 20 with RelationManager

use of edu.uci.ics.texera.storage.RelationManager in project textdb by TextDB.

the class ComparableMatcherTest method setUp.

@BeforeClass
public static void setUp() throws TexeraException {
    RelationManager relationManager = RelationManager.getInstance();
    // create the people table and write tuples
    relationManager.createTable(PEOPLE_TABLE, TestUtils.getDefaultTestIndex().resolve(PEOPLE_TABLE), TestConstants.SCHEMA_PEOPLE, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter peopleDataWriter = relationManager.getTableDataWriter(PEOPLE_TABLE);
    peopleDataWriter.open();
    for (Tuple tuple : TestConstants.getSamplePeopleTuples()) {
        peopleDataWriter.insertTuple(tuple);
    }
    peopleDataWriter.close();
    // create the people table 2 and write tuples
    relationManager.createTable(PEOPLE_TABLE_2, TestUtils.getDefaultTestIndex().resolve(PEOPLE_TABLE_2), TestConstantsRegexSplit.SCHEMA_PEOPLE, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter people2DataWriter = relationManager.getTableDataWriter(PEOPLE_TABLE_2);
    people2DataWriter.open();
    for (Tuple tuple : TestConstantsRegexSplit.constructSamplePeopleTuples()) {
        people2DataWriter.insertTuple(tuple);
    }
    people2DataWriter.close();
}
Also used : Tuple(edu.uci.ics.texera.api.tuple.Tuple) RelationManager(edu.uci.ics.texera.storage.RelationManager) DataWriter(edu.uci.ics.texera.storage.DataWriter) BeforeClass(org.junit.BeforeClass)

Aggregations

RelationManager (edu.uci.ics.texera.storage.RelationManager)35 Tuple (edu.uci.ics.texera.api.tuple.Tuple)19 DataWriter (edu.uci.ics.texera.storage.DataWriter)18 BeforeClass (org.junit.BeforeClass)8 ArrayList (java.util.ArrayList)5 AfterClass (org.junit.AfterClass)5 ScanBasedSourceOperator (edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator)2 ScanSourcePredicate (edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)2 File (java.io.File)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 StorageException (edu.uci.ics.texera.api.exception.StorageException)1 IntegerField (edu.uci.ics.texera.api.field.IntegerField)1 StringField (edu.uci.ics.texera.api.field.StringField)1 TextField (edu.uci.ics.texera.api.field.TextField)1 DictionaryPredicate (edu.uci.ics.texera.dataflow.dictionarymatcher.DictionaryPredicate)1 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 ParseException (java.text.ParseException)1 Scanner (java.util.Scanner)1