Search in sources :

Example 51 with TextField

use of edu.uci.ics.textdb.api.field.TextField in project textdb by TextDB.

the class ComparableMatcherTest method testDoubleMatching5.

/**
     * Verifies the behavior of ComparableMatcher<Double> with matching type EQUAL_TO
     *
     * @throws Exception
     */
@Test
public void testDoubleMatching5() throws Exception {
    // Prepare the query
    double threshold = 6.10;
    Attribute attribute = TestConstants.HEIGHT_ATTR;
    String attributeName = attribute.getAttributeName();
    ComparisonType matchingType = ComparisonType.EQUAL_TO;
    // Perform the query
    List<Tuple> returnedResults = getDoubleQueryResults(attributeName, matchingType, threshold);
    IField[] fields1 = { new StringField("brad lie angelina"), new StringField("pitt"), new IntegerField(44), new DoubleField(6.10), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-12-1972")), new TextField("White Angry") };
    List<Tuple> expectedResults = new ArrayList<>();
    expectedResults.add(new Tuple(TestConstants.SCHEMA_PEOPLE, fields1));
    // check the results
    Assert.assertEquals(1, returnedResults.size());
    Assert.assertTrue(TestUtils.equals(expectedResults, returnedResults));
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Test(org.junit.Test)

Example 52 with TextField

use of edu.uci.ics.textdb.api.field.TextField in project textdb by TextDB.

the class ComparableMatcherTest method testIntegerMatching2.

/**
     * Verifies the behavior of ComparableMatcher<Integer> with matching type GREATER_THAN
     *
     * @throws Exception
     */
@Test
public void testIntegerMatching2() throws Exception {
    // Prepare the query
    int threshold = 45;
    Attribute attribute = TestConstants.AGE_ATTR;
    String attributeName = attribute.getAttributeName();
    ComparisonType matchingType = ComparisonType.GREATER_THAN;
    // Perform the query
    List<Tuple> returnedResults = getIntegerQueryResults(attributeName, matchingType, threshold);
    IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry") };
    List<Tuple> expectedResults = new ArrayList<>();
    expectedResults.add(new Tuple(TestConstants.SCHEMA_PEOPLE, fields1));
    // check the results
    Assert.assertEquals(1, returnedResults.size());
    Assert.assertTrue(TestUtils.equals(expectedResults, returnedResults));
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Test(org.junit.Test)

Example 53 with TextField

use of edu.uci.ics.textdb.api.field.TextField in project textdb by TextDB.

the class ComparableMatcherTest method testIntegerMatching4.

/**
     * Verifies the behavior of ComparableMatcher<Integer> with matching type LESS_THAN_OR_EQAUL_TO
     *
     * @throws Exception
     */
@Test
public void testIntegerMatching4() throws Exception {
    // Prepare the query
    int threshold = 43;
    Attribute attribute = TestConstants.AGE_ATTR;
    String attributeName = attribute.getAttributeName();
    ComparisonType matchingType = ComparisonType.LESS_THAN_OR_EQUAL_TO;
    // Perform the query
    List<Tuple> returnedResults = getIntegerQueryResults(attributeName, matchingType, threshold);
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry") };
    IField[] fields2 = { new StringField("christian john wayne"), new StringField("rock bale"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Tall Fair") };
    IField[] fields3 = { new StringField("Mary brown"), new StringField("Lake Forest"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Short angry") };
    List<Tuple> expectedResults = new ArrayList<>();
    expectedResults.add(new Tuple(TestConstants.SCHEMA_PEOPLE, fields1));
    expectedResults.add(new Tuple(TestConstants.SCHEMA_PEOPLE, fields2));
    expectedResults.add(new Tuple(TestConstants.SCHEMA_PEOPLE, fields3));
    // check the results
    Assert.assertEquals(3, returnedResults.size());
    Assert.assertTrue(TestUtils.equals(expectedResults, returnedResults));
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Test(org.junit.Test)

Example 54 with TextField

use of edu.uci.ics.textdb.api.field.TextField in project textdb by TextDB.

the class SampleExtraction method parsePromedHTML.

public static Tuple parsePromedHTML(String fileName, String content) {
    try {
        Document parsedDocument = Jsoup.parse(content);
        String mainText = parsedDocument.getElementById("preview").text();
        Tuple tuple = new Tuple(PromedSchema.PROMED_SCHEMA, new StringField(fileName), new TextField(mainText));
        return tuple;
    } catch (Exception e) {
        return null;
    }
}
Also used : StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) Document(org.jsoup.nodes.Document) Tuple(edu.uci.ics.textdb.api.tuple.Tuple)

Example 55 with TextField

use of edu.uci.ics.textdb.api.field.TextField in project textdb by TextDB.

the class TwitterSample method writeTwitterIndex.

public static void writeTwitterIndex() throws Exception {
    RelationManager relationManager = RelationManager.getRelationManager();
    relationManager.deleteTable(twitterClimateTable);
    relationManager.createTable(twitterClimateTable, "../index/twitter/", TwitterSchema.TWITTER_SCHEMA, LuceneAnalyzerConstants.standardAnalyzerString());
    DataWriter dataWriter = relationManager.getTableDataWriter(twitterClimateTable);
    dataWriter.open();
    int counter = 0;
    JsonNode jsonNode = new ObjectMapper().readTree(new File(twitterFilePath));
    for (JsonNode tweet : jsonNode) {
        try {
            String text = tweet.get("text").asText();
            Long id = tweet.get("id").asLong();
            String tweetLink = "https://twitter.com/statuses/" + id;
            JsonNode userNode = tweet.get("user");
            String userScreenName = userNode.get("screen_name").asText();
            String userLink = "https://twitter.com/" + userScreenName;
            String userName = userNode.get("name").asText();
            String userDescription = userNode.get("description").asText();
            Integer userFollowersCount = userNode.get("followers_count").asInt();
            Integer userFriendsCount = userNode.get("friends_count").asInt();
            JsonNode geoTagNode = tweet.get("geo_tag");
            String state = geoTagNode.get("stateName").asText();
            String county = geoTagNode.get("countyName").asText();
            String city = geoTagNode.get("cityName").asText();
            String createAt = tweet.get("create_at").asText();
            Tuple tuple = new Tuple(TwitterSchema.TWITTER_SCHEMA, new TextField(text), new StringField(tweetLink), new StringField(userLink), new TextField(userScreenName), new TextField(userName), new TextField(userDescription), new IntegerField(userFollowersCount), new IntegerField(userFriendsCount), new TextField(state), new TextField(county), new TextField(city), new StringField(createAt));
            dataWriter.insertTuple(tuple);
            counter++;
        } catch (RuntimeException e) {
            e.printStackTrace();
            continue;
        }
    }
    dataWriter.close();
    System.out.println("write twitter data finished");
    System.out.println(counter + " tweets written");
}
Also used : JsonNode(com.fasterxml.jackson.databind.JsonNode) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) RelationManager(edu.uci.ics.textdb.storage.RelationManager) DataWriter(edu.uci.ics.textdb.storage.DataWriter)

Aggregations

TextField (edu.uci.ics.textdb.api.field.TextField)117 IField (edu.uci.ics.textdb.api.field.IField)105 Tuple (edu.uci.ics.textdb.api.tuple.Tuple)92 ArrayList (java.util.ArrayList)91 Schema (edu.uci.ics.textdb.api.schema.Schema)83 Test (org.junit.Test)83 StringField (edu.uci.ics.textdb.api.field.StringField)81 IntegerField (edu.uci.ics.textdb.api.field.IntegerField)80 Attribute (edu.uci.ics.textdb.api.schema.Attribute)74 Span (edu.uci.ics.textdb.api.span.Span)71 DoubleField (edu.uci.ics.textdb.api.field.DoubleField)68 DateField (edu.uci.ics.textdb.api.field.DateField)64 SimpleDateFormat (java.text.SimpleDateFormat)63 Dictionary (edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary)24 JoinDistancePredicate (edu.uci.ics.textdb.exp.join.JoinDistancePredicate)9 KeywordMatcherSourceOperator (edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)9 ParseException (java.text.ParseException)4 IOperator (edu.uci.ics.textdb.api.dataflow.IOperator)3 ScanBasedSourceOperator (edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator)3 ScanSourcePredicate (edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate)3