use of org.apache.lucene.search.TermQuery in project textdb by TextDB.
the class RelationManager method createTable.
/**
* Creates a new table.
* Table name must be unique (case insensitive).
* LuceneAnalyzer must be a valid analyzer string.
*
* The "_id" attribute will be added to the table schema.
* System automatically generates a unique ID for each tuple inserted to a table,
* the generated ID will be in "_id" field.
*
* @param tableName, the name of the table, must be unique, case is not sensitive
* @param indexDirectory, the directory to store the index and data, must not duplicate with other tables' directories
* @param schema, the schema of the table
* @param luceneAnalyzerString, the string representing the lucene analyzer used
* @throws StorageException
*/
public void createTable(String tableName, String indexDirectory, Schema schema, String luceneAnalyzerString) throws StorageException {
// convert the table name to lower case
tableName = tableName.toLowerCase();
// table should not exist
if (checkTableExistence(tableName)) {
throw new StorageException(String.format("Table %s already exists.", tableName));
}
// and convert the index directory to its absolute path
try {
Path indexPath = Paths.get(indexDirectory);
if (Files.notExists(indexPath)) {
Files.createDirectories(indexPath);
}
indexDirectory = indexPath.toRealPath().toString();
} catch (IOException e) {
throw new StorageException(e);
}
// check if the indexDirectory overlaps with another table's index directory
Query indexDirectoryQuery = new TermQuery(new Term(CatalogConstants.TABLE_DIRECTORY, indexDirectory));
DataReader tableCatalogDataReader = new DataReader(CatalogConstants.TABLE_CATALOG_DATASTORE, indexDirectoryQuery);
tableCatalogDataReader.setPayloadAdded(false);
tableCatalogDataReader.open();
Tuple nextTuple = tableCatalogDataReader.getNextTuple();
tableCatalogDataReader.close();
// if the index directory is already taken by another table, throws an exception
if (nextTuple != null) {
String overlapTableName = nextTuple.getField(CatalogConstants.TABLE_NAME).getValue().toString();
throw new StorageException(String.format("Table %s already takes the index directory %s. Please choose another directory.", overlapTableName, indexDirectory));
}
// check if the lucene analyzer string is valid
Analyzer luceneAnalyzer = null;
try {
luceneAnalyzer = LuceneAnalyzerConstants.getLuceneAnalyzer(luceneAnalyzerString);
} catch (DataFlowException e) {
throw new StorageException("Lucene Analyzer String is not valid.");
}
// create the directory and clear all data in the index directory
Schema tableSchema = Utils.getSchemaWithID(schema);
DataStore tableDataStore = new DataStore(indexDirectory, tableSchema);
DataWriter dataWriter = new DataWriter(tableDataStore, luceneAnalyzer);
dataWriter.open();
dataWriter.clearData();
dataWriter.close();
// write table info to catalog
writeTableInfoToCatalog(tableName, indexDirectory, schema, luceneAnalyzerString);
}
use of org.apache.lucene.search.TermQuery in project textdb by TextDB.
the class RelationManager method getSchemaCatalogTuples.
/*
* Gets the tuples of a table from schema catalog.
*/
private static List<Tuple> getSchemaCatalogTuples(String tableName) throws StorageException {
tableName = tableName.toLowerCase();
Query tableNameQuery = new TermQuery(new Term(CatalogConstants.TABLE_NAME, tableName));
DataReader schemaCatalogDataReader = new DataReader(CatalogConstants.SCHEMA_CATALOG_DATASTORE, tableNameQuery);
// read the tuples into a list
schemaCatalogDataReader.open();
List<Tuple> tupleList = new ArrayList<>();
Tuple nextTuple;
while ((nextTuple = schemaCatalogDataReader.getNextTuple()) != null) {
tupleList.add(nextTuple);
}
schemaCatalogDataReader.close();
return tupleList;
}
use of org.apache.lucene.search.TermQuery in project textdb by TextDB.
the class RelationManager method getTableCatalogTuple.
/*
* Gets the a tuple of a table from table catalog.
*/
private static Tuple getTableCatalogTuple(String tableName) throws StorageException {
tableName = tableName.toLowerCase();
Query tableNameQuery = new TermQuery(new Term(CatalogConstants.TABLE_NAME, tableName));
DataReader tableCatalogDataReader = new DataReader(CatalogConstants.TABLE_CATALOG_DATASTORE, tableNameQuery);
tableCatalogDataReader.setPayloadAdded(false);
tableCatalogDataReader.open();
List<Tuple> tupleList = new ArrayList<>();
Tuple nextTuple;
while ((nextTuple = tableCatalogDataReader.getNextTuple()) != null) {
tupleList.add(nextTuple);
}
tableCatalogDataReader.close();
if (tupleList.size() == 0) {
return null;
} else if (tupleList.size() == 1) {
return tupleList.get(0);
} else {
throw new StorageException("Catalog corrupted: duplicate table name found in catalog.");
}
}
use of org.apache.lucene.search.TermQuery in project lucene-solr by apache.
the class TestHierarchicalDocBuilder method createBlockJoinQuery.
private Query createBlockJoinQuery(Hierarchy hierarchy) {
List<Hierarchy> elements = hierarchy.elements;
if (elements.isEmpty()) {
BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
childQuery.add(new TermQuery(new Term(FIELD_ID, (String) hierarchy.elementData.get(FIELD_ID))), Occur.MUST);
return childQuery.build();
}
Query childQuery = createBlockJoinQuery(elements.get(random().nextInt(elements.size())));
return createToParentQuery(hierarchy.elementType, childQuery);
}
use of org.apache.lucene.search.TermQuery in project lucene-solr by apache.
the class CloudMLTQParser method parse.
public Query parse() {
String id = localParams.get(QueryParsing.V);
// Do a Real Time Get for the document
SolrDocument doc = getDocument(id);
if (doc == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + id + "]");
}
String[] qf = localParams.getParams("qf");
Map<String, Float> boostFields = new HashMap<>();
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
mlt.setMinDocFreq(localParams.getInt("mindf", 0));
mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
mlt.setBoost(boost);
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
Map<String, Collection<Object>> filteredDocument = new HashMap<>();
String[] fieldNames;
if (qf != null) {
ArrayList<String> fields = new ArrayList();
for (String fieldName : qf) {
if (!StringUtils.isEmpty(fieldName)) {
String[] strings = splitList.split(fieldName);
for (String string : strings) {
if (!StringUtils.isEmpty(string)) {
fields.add(string);
}
}
}
}
// Parse field names and boosts from the fields
boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
fieldNames = boostFields.keySet().toArray(new String[0]);
} else {
ArrayList<String> fields = new ArrayList();
for (String field : doc.getFieldNames()) {
// Only use fields that are stored and have an explicit analyzer.
// This makes sense as the query uses tf/idf/.. for query construction.
// We might want to relook and change this in the future though.
SchemaField f = req.getSchema().getFieldOrNull(field);
if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
fields.add(field);
}
}
fieldNames = fields.toArray(new String[0]);
}
if (fieldNames.length < 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
}
mlt.setFieldNames(fieldNames);
for (String field : fieldNames) {
Collection<Object> fieldValues = doc.getFieldValues(field);
if (fieldValues != null) {
Collection<Object> values = new ArrayList();
for (Object val : fieldValues) {
if (val instanceof IndexableField) {
values.add(((IndexableField) val).stringValue());
} else {
values.add(val);
}
}
filteredDocument.put(field, values);
}
}
try {
Query rawMLTQuery = mlt.like(filteredDocument);
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
if (boost && boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
for (BooleanClause clause : boostedMLTQuery) {
Query q = clause.getQuery();
float originalBoost = 1f;
if (q instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) q;
q = bq.getQuery();
originalBoost = bq.getBoost();
}
Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
newQ.add(q, clause.getOccur());
}
boostedMLTQuery = newQ.build();
}
// exclude current document from results
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(createIdQuery(req.getSchema().getUniqueKeyField().getName(), id), BooleanClause.Occur.MUST_NOT);
return realMLTQuery.build();
} catch (IOException e) {
e.printStackTrace();
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
}
}
Aggregations