use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.
the class FileSourceOperatorTest method test2.
/*
* Test FileSourceOperator with a Directory.
* Optional parameters are all set to default. (only list files directly in this folder)
*
* Only the files directly under this directory will be used.
*
* expected results: test1.txt and test2.txt will be included.
*/
@Test
public void test2() throws Exception {
String attrName = "content";
Schema schema = new Schema(new Attribute(attrName, AttributeType.TEXT));
FileSourcePredicate predicate = new FileSourcePredicate(tempFolderPath.toString(), attrName);
FileSourceOperator fileSource = new FileSourceOperator(predicate);
Tuple tuple;
ArrayList<Tuple> exactResults = new ArrayList<>();
fileSource.open();
while ((tuple = fileSource.getNextTuple()) != null) {
exactResults.add(tuple);
}
fileSource.close();
List<Tuple> expectedResults = Arrays.asList(new Tuple(schema, new TextField(tempFile1String)), new Tuple(schema, new TextField(tempFile2String)));
Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.
the class MedlineIndexWriter method recordToTuple.
public static Tuple recordToTuple(String record) throws IOException, ParseException {
JsonNode jsonNode = new ObjectMapper().readValue(record, JsonNode.class);
ArrayList<IField> fieldList = new ArrayList<IField>();
for (Attribute attr : ATTRIBUTES_MEDLINE) {
fieldList.add(StorageUtils.getField(attr.getType(), jsonNode.get(attr.getName()).toString()));
}
IField[] fieldArray = new IField[fieldList.size()];
Tuple tuple = new Tuple(SCHEMA_MEDLINE, fieldList.toArray(fieldArray));
return tuple;
}
use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.
the class DataReader method documentToFields.
private ArrayList<IField> documentToFields(Document luceneDocument) throws ParseException {
ArrayList<IField> fields = new ArrayList<>();
for (Attribute attr : inputSchema.getAttributes()) {
AttributeType attributeType = attr.getType();
String fieldValue = luceneDocument.get(attr.getName());
fields.add(StorageUtils.getField(attributeType, fieldValue));
}
return fields;
}
use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.
the class DataReader method buildPayloadFromTermVector.
private ArrayList<Span> buildPayloadFromTermVector(List<IField> fields, int docID) throws IOException {
ArrayList<Span> payloadSpanList = new ArrayList<>();
for (Attribute attr : inputSchema.getAttributes()) {
String attributeName = attr.getName();
AttributeType attributeType = attr.getType();
// payload.
if (attributeType != AttributeType.TEXT) {
continue;
}
String fieldValue = fields.get(inputSchema.getIndex(attributeName)).getValue().toString();
Terms termVector = luceneIndexReader.getTermVector(docID, attributeName);
if (termVector == null) {
continue;
}
TermsEnum termsEnum = termVector.iterator();
PostingsEnum termPostings = null;
// go through document terms
while ((termsEnum.next()) != null) {
termPostings = termsEnum.postings(termPostings, PostingsEnum.ALL);
if (termPostings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
continue;
}
// for each term, go through its postings
for (int i = 0; i < termPostings.freq(); i++) {
// nextPosition needs to be called first
int tokenPosition = termPostings.nextPosition();
int charStart = termPostings.startOffset();
int charEnd = termPostings.endOffset();
String analyzedTermStr = termsEnum.term().utf8ToString();
String originalTermStr = fieldValue.substring(charStart, charEnd);
Span span = new Span(attributeName, charStart, charEnd, analyzedTermStr, originalTermStr, tokenPosition);
payloadSpanList.add(span);
}
}
}
return payloadSpanList;
}
use of edu.uci.ics.texera.api.schema.Attribute in project textdb by TextDB.
the class RelationManagerTest method test15.
@Test
public void test15() throws Exception {
String tableName1 = "relation_manager_test_table_15_1";
String tableName2 = "relation_manager_test_table_15_2";
String indexDirectory = "./index/test_table/relation_manager_test_table_15";
Schema schema = new Schema(new Attribute("content", AttributeType.TEXT));
String luceneAnalyzerString = "standard";
relationManager.deleteTable(tableName1);
relationManager.deleteTable(tableName2);
relationManager.createTable(tableName1, Paths.get(indexDirectory), schema, luceneAnalyzerString);
// create another table with the same directory should fail
try {
relationManager.createTable(tableName2, Paths.get(indexDirectory), schema, luceneAnalyzerString);
Assert.fail("Storage exception should be thrown because of duplicate index directories");
} catch (StorageException e) {
}
relationManager.deleteTable(tableName1);
}
Aggregations