use of edu.uci.ics.textdb.api.schema.Attribute in project textdb by TextDB.
the class RelationManagerTest method test10.
/*
* Test inserting multiple tuples to a table, getting them by a query, then deleting them by a query
*/
@Test
public void test10() throws Exception {
String tableName = "relation_manager_test_table";
String tableDirectory = "./index/test_table";
Schema tableSchema = new Schema(new Attribute("content", AttributeType.STRING), new Attribute("number", AttributeType.STRING));
RelationManager relationManager = RelationManager.getRelationManager();
relationManager.deleteTable(tableName);
relationManager.createTable(tableName, tableDirectory, tableSchema, LuceneAnalyzerConstants.standardAnalyzerString());
DataWriter dataWriter = relationManager.getTableDataWriter(tableName);
dataWriter.open();
Tuple insertedTuple = new Tuple(tableSchema, new StringField("test"), new StringField("1"));
dataWriter.insertTuple(insertedTuple);
Tuple insertedTuple2 = new Tuple(tableSchema, new StringField("test"), new StringField("2"));
IDField idField2 = dataWriter.insertTuple(insertedTuple2);
Tuple insertedTuple3 = new Tuple(tableSchema, new StringField("test"), new StringField("3"));
dataWriter.insertTuple(insertedTuple3);
dataWriter.close();
// test should match all 3 tuples
Query allTupleQuery = new TermQuery(new Term("content", "test"));
DataReader allTupleReader = relationManager.getTableDataReader(tableName, allTupleQuery);
int tupleCounter = 0;
allTupleReader.open();
while (allTupleReader.getNextTuple() != null) {
tupleCounter++;
}
allTupleReader.close();
Assert.assertEquals(3, tupleCounter);
// tuple 2 should be deleted
Query tuple2Query = new TermQuery(new Term("number", "2"));
dataWriter.open();
dataWriter.deleteTuple(tuple2Query);
dataWriter.close();
Tuple deletedTuple = relationManager.getTupleByID(tableName, idField2);
Assert.assertNull(deletedTuple);
relationManager.deleteTable(tableName);
}
use of edu.uci.ics.textdb.api.schema.Attribute in project textdb by TextDB.
the class DataReader method buildPayloadFromTermVector.
private ArrayList<Span> buildPayloadFromTermVector(List<IField> fields, int docID) throws IOException {
ArrayList<Span> payloadSpanList = new ArrayList<>();
for (Attribute attr : inputSchema.getAttributes()) {
String attributeName = attr.getAttributeName();
AttributeType attributeType = attr.getAttributeType();
// payload.
if (attributeType != AttributeType.TEXT) {
continue;
}
String fieldValue = fields.get(inputSchema.getIndex(attributeName)).getValue().toString();
Terms termVector = luceneIndexReader.getTermVector(docID, attributeName);
if (termVector == null) {
continue;
}
TermsEnum termsEnum = termVector.iterator();
PostingsEnum termPostings = null;
// go through document terms
while ((termsEnum.next()) != null) {
termPostings = termsEnum.postings(termPostings, PostingsEnum.ALL);
if (termPostings.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
continue;
}
// for each term, go through its postings
for (int i = 0; i < termPostings.freq(); i++) {
// nextPosition needs to be called first
int tokenPosition = termPostings.nextPosition();
int charStart = termPostings.startOffset();
int charEnd = termPostings.endOffset();
String analyzedTermStr = termsEnum.term().utf8ToString();
String originalTermStr = fieldValue.substring(charStart, charEnd);
Span span = new Span(attributeName, charStart, charEnd, analyzedTermStr, originalTermStr, tokenPosition);
payloadSpanList.add(span);
}
}
}
return payloadSpanList;
}
use of edu.uci.ics.textdb.api.schema.Attribute in project textdb by TextDB.
the class DataReader method documentToFields.
private ArrayList<IField> documentToFields(Document luceneDocument) throws ParseException {
ArrayList<IField> fields = new ArrayList<>();
for (Attribute attr : inputSchema.getAttributes()) {
AttributeType attributeType = attr.getAttributeType();
String fieldValue = luceneDocument.get(attr.getAttributeName());
fields.add(StorageUtils.getField(attributeType, fieldValue));
}
return fields;
}
use of edu.uci.ics.textdb.api.schema.Attribute in project textdb by TextDB.
the class DataWriter method getLuceneDocument.
/*
* Converts a TextDB tuple to a Lucene document
*/
private static Document getLuceneDocument(Tuple tuple) {
List<IField> fields = tuple.getFields();
List<Attribute> attributes = tuple.getSchema().getAttributes();
Document doc = new Document();
for (int count = 0; count < fields.size(); count++) {
IField field = fields.get(count);
Attribute attr = attributes.get(count);
AttributeType attributeType = attr.getAttributeType();
doc.add(StorageUtils.getLuceneField(attributeType, attr.getAttributeName(), field.getValue()));
}
return doc;
}
use of edu.uci.ics.textdb.api.schema.Attribute in project textdb by TextDB.
the class FileSourceOperatorTest method test3.
/*
* Test FileSourceOperator with a Directory with recursive = true and maxDepth = null.
*
* All the files under the recursive sub-directories will be read.
*
* expected results: test1.txt, test2.txt, test4.txt and test5.txt will be included
*/
@Test
public void test3() throws Exception {
String attrName = "content";
Schema schema = new Schema(new Attribute(attrName, AttributeType.TEXT));
FileSourcePredicate predicate = new FileSourcePredicate(tempFolderPath.toString(), attrName, true, null, null);
FileSourceOperator fileSource = new FileSourceOperator(predicate);
Tuple tuple;
ArrayList<Tuple> exactResults = new ArrayList<>();
fileSource.open();
while ((tuple = fileSource.getNextTuple()) != null) {
exactResults.add(tuple);
}
fileSource.close();
List<Tuple> expectedResults = Arrays.asList(new Tuple(schema, new TextField(tempFile1String)), new Tuple(schema, new TextField(tempFile2String)), new Tuple(schema, new TextField(tempFile4String)), new Tuple(schema, new TextField(tempFile5String)));
Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Aggregations