use of org.apache.lucene.analysis.Analyzer in project titan by thinkaurelius.
the class LuceneExample method example1.
@Test
public void example1() throws Exception {
Directory dir = FSDirectory.open(path);
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_4, analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(dir, iwc);
indexDocs(writer, "doc1", ImmutableMap.of("name", "The laborious work of John Doe as we know it", "city", "Blumenkamp", "location", Geoshape.point(51.687882, 6.612053), "time", 1000342034));
indexDocs(writer, "doc2", ImmutableMap.of("name", "Life as we know it or not", "city", "Essen", "location", Geoshape.point(51.787882, 6.712053), "time", 1000342034 - 500));
indexDocs(writer, "doc3", ImmutableMap.of("name", "Berlin - poor but sexy and a display of the extraordinary", "city", "Berlin", "location", Geoshape.circle(52.509535, 13.425293, 50), "time", 1000342034 + 2000));
writer.close();
//Search
IndexReader reader = DirectoryReader.open(FSDirectory.open(path));
IndexSearcher searcher = new IndexSearcher(reader);
analyzer = new StandardAnalyzer();
//Auesee
BooleanFilter filter = new BooleanFilter();
//filter.add(new TermsFilter(new Term("name_txt","know")), BooleanClause.Occur.MUST);
SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects, Geoshape.circle(51.666167, 6.58905, 450).convert2Spatial4j());
//filter.add(getSpatialStrategy("location").makeFilter(args), BooleanClause.Occur.MUST);
filter.add(NumericRangeFilter.newLongRange("time", (long) 1000342034, (long) 1000342034, true, true), BooleanClause.Occur.MUST);
// filter.add(NumericRangeFilter.newLongRange("time",(long)1000342034-100,Long.MAX_VALUE,true,true), BooleanClause.Occur.MUST);
// filter.add(NumericRangeFilter.newLongRange("time",Long.MIN_VALUE,(long)1000342034+300,true,true), BooleanClause.Occur.MUST);
filter.add(new PrefixFilter(new Term("city_str", "B")), BooleanClause.Occur.MUST);
TopDocs docs = searcher.search(new MatchAllDocsQuery(), filter, MAX_RESULT);
if (docs.totalHits >= MAX_RESULT)
throw new RuntimeException("Max results exceeded: " + MAX_RESULT);
Set<String> result = getResults(searcher, docs);
System.out.println(result);
}
use of org.apache.lucene.analysis.Analyzer in project stargate-core by tuplejump.
the class CassandraUtils method getOptions.
public static Options getOptions(Properties mapping, ColumnFamilyStore baseCfs, String colName) {
Map<String, NumericConfig> numericFieldOptions = new HashMap<>();
Map<String, FieldType> fieldDocValueTypes = new TreeMap<>();
Map<String, FieldType> collectionFieldDocValueTypes = new TreeMap<>();
Map<String, FieldType> fieldTypes = new TreeMap<>();
Map<String, FieldType[]> collectionFieldTypes = new TreeMap<>();
Map<String, ColumnDefinition> validators = new TreeMap<>();
Map<String, ColumnDefinition> clusteringKeysIndexed = new LinkedHashMap<>();
Map<String, ColumnDefinition> partitionKeysIndexed = new LinkedHashMap<>();
Set<String> indexedColumnNames;
//getForRow all the fields options.
indexedColumnNames = new TreeSet<>();
indexedColumnNames.addAll(mapping.getFields().keySet());
Set<String> added = new HashSet<>(indexedColumnNames.size());
List<ColumnDefinition> partitionKeys = baseCfs.metadata.partitionKeyColumns();
List<ColumnDefinition> clusteringKeys = baseCfs.metadata.clusteringColumns();
for (ColumnDefinition colDef : partitionKeys) {
String columnName = colDef.name.toString();
if (Options.logger.isDebugEnabled()) {
Options.logger.debug("Partition key name is {} and index is {}", colName, colDef.position());
}
validators.put(columnName, colDef);
if (indexedColumnNames.contains(columnName)) {
partitionKeysIndexed.put(colName, colDef);
addPropertiesAndFieldType(mapping, numericFieldOptions, fieldDocValueTypes, collectionFieldDocValueTypes, fieldTypes, collectionFieldTypes, added, colDef, columnName);
}
}
for (ColumnDefinition colDef : clusteringKeys) {
String columnName = colDef.name.toString();
if (Options.logger.isDebugEnabled()) {
Options.logger.debug("Clustering key name is {} and index is {}", colName, colDef.position() + 1);
}
validators.put(columnName, colDef);
if (indexedColumnNames.contains(columnName)) {
clusteringKeysIndexed.put(columnName, colDef);
addPropertiesAndFieldType(mapping, numericFieldOptions, fieldDocValueTypes, collectionFieldDocValueTypes, fieldTypes, collectionFieldTypes, added, colDef, columnName);
}
}
for (String columnName : indexedColumnNames) {
if (added.add(columnName.toLowerCase())) {
Properties options = mapping.getFields().get(columnName);
ColumnDefinition colDef = getColumnDefinition(baseCfs, columnName);
if (colDef != null) {
validators.put(columnName, colDef);
addFieldType(columnName, colDef.type, options, numericFieldOptions, fieldDocValueTypes, collectionFieldDocValueTypes, fieldTypes, collectionFieldTypes);
} else {
throw new IllegalArgumentException(String.format("Column Definition for %s not found", columnName));
}
if (options.getType() == Type.object) {
mapping.getFields().putAll(options.getFields());
}
}
}
Set<ColumnDefinition> otherColumns = baseCfs.metadata.regularColumns();
for (ColumnDefinition colDef : otherColumns) {
String columnName = UTF8Type.instance.getString(colDef.name.bytes);
validators.put(columnName, colDef);
}
numericFieldOptions.putAll(mapping.getDynamicNumericConfig());
Analyzer defaultAnalyzer = mapping.getLuceneAnalyzer();
Analyzer analyzer = new PerFieldAnalyzerWrapper(defaultAnalyzer, mapping.perFieldAnalyzers());
Map<String, Type> types = new TreeMap<>();
Set<String> nestedFields = new TreeSet<>();
for (Map.Entry<String, ColumnDefinition> entry : validators.entrySet()) {
CQL3Type cql3Type = entry.getValue().type.asCQL3Type();
AbstractType inner = getValueValidator(cql3Type.getType());
if (cql3Type.isCollection()) {
types.put(entry.getKey(), fromAbstractType(inner.asCQL3Type()));
nestedFields.add(entry.getKey());
} else {
types.put(entry.getKey(), fromAbstractType(cql3Type));
}
}
return new Options(mapping, numericFieldOptions, fieldDocValueTypes, collectionFieldDocValueTypes, fieldTypes, collectionFieldTypes, types, nestedFields, clusteringKeysIndexed, partitionKeysIndexed, indexedColumnNames, analyzer, colName);
}
use of org.apache.lucene.analysis.Analyzer in project stargate-core by tuplejump.
the class PhraseCondition method query.
/**
* {@inheritDoc}
*/
@Override
public Query query(Options schema) {
if (field == null || field.trim().isEmpty()) {
throw new IllegalArgumentException("Field name required");
}
if (values == null) {
throw new IllegalArgumentException("Field values required");
}
if (slop == null) {
throw new IllegalArgumentException("Slop required");
}
if (slop < 0) {
throw new IllegalArgumentException("Slop must be positive");
}
Properties properties = schema.getProperties(field);
Type fieldType = properties != null ? properties.getType() : Type.text;
if (fieldType.isCharSeq()) {
Analyzer analyzer = schema.analyzer;
PhraseQuery.Builder query = new PhraseQuery.Builder();
query.setSlop(slop);
int count = 0;
for (String value : values) {
if (value != null) {
String analyzedValue = analyze(field, value, analyzer);
if (analyzedValue != null) {
Term term = new Term(field, analyzedValue);
query.add(term, count);
}
}
count++;
}
return query.build();
}
String message = String.format("Phrase queries cannot be supported until mapping is defined");
throw new UnsupportedOperationException(message);
}
use of org.apache.lucene.analysis.Analyzer in project textdb by TextDB.
the class RelationManager method createTable.
/**
* Creates a new table.
* Table name must be unique (case insensitive).
* LuceneAnalyzer must be a valid analyzer string.
*
* The "_id" attribute will be added to the table schema.
* System automatically generates a unique ID for each tuple inserted to a table,
* the generated ID will be in "_id" field.
*
* @param tableName, the name of the table, must be unique, case is not sensitive
* @param indexDirectory, the directory to store the index and data, must not duplicate with other tables' directories
* @param schema, the schema of the table
* @param luceneAnalyzerString, the string representing the lucene analyzer used
* @throws StorageException
*/
public void createTable(String tableName, String indexDirectory, Schema schema, String luceneAnalyzerString) throws StorageException {
// convert the table name to lower case
tableName = tableName.toLowerCase();
// table should not exist
if (checkTableExistence(tableName)) {
throw new StorageException(String.format("Table %s already exists.", tableName));
}
// and convert the index directory to its absolute path
try {
Path indexPath = Paths.get(indexDirectory);
if (Files.notExists(indexPath)) {
Files.createDirectories(indexPath);
}
indexDirectory = indexPath.toRealPath().toString();
} catch (IOException e) {
throw new StorageException(e);
}
// check if the indexDirectory overlaps with another table's index directory
Query indexDirectoryQuery = new TermQuery(new Term(CatalogConstants.TABLE_DIRECTORY, indexDirectory));
DataReader tableCatalogDataReader = new DataReader(CatalogConstants.TABLE_CATALOG_DATASTORE, indexDirectoryQuery);
tableCatalogDataReader.setPayloadAdded(false);
tableCatalogDataReader.open();
Tuple nextTuple = tableCatalogDataReader.getNextTuple();
tableCatalogDataReader.close();
// if the index directory is already taken by another table, throws an exception
if (nextTuple != null) {
String overlapTableName = nextTuple.getField(CatalogConstants.TABLE_NAME).getValue().toString();
throw new StorageException(String.format("Table %s already takes the index directory %s. Please choose another directory.", overlapTableName, indexDirectory));
}
// check if the lucene analyzer string is valid
Analyzer luceneAnalyzer = null;
try {
luceneAnalyzer = LuceneAnalyzerConstants.getLuceneAnalyzer(luceneAnalyzerString);
} catch (DataFlowException e) {
throw new StorageException("Lucene Analyzer String is not valid.");
}
// create the directory and clear all data in the index directory
Schema tableSchema = Utils.getSchemaWithID(schema);
DataStore tableDataStore = new DataStore(indexDirectory, tableSchema);
DataWriter dataWriter = new DataWriter(tableDataStore, luceneAnalyzer);
dataWriter.open();
dataWriter.clearData();
dataWriter.close();
// write table info to catalog
writeTableInfoToCatalog(tableName, indexDirectory, schema, luceneAnalyzerString);
}
use of org.apache.lucene.analysis.Analyzer in project textdb by TextDB.
the class RelationManagerTest method test1.
/*
* Test the information about "table catalog" itself is stored properly.
*
*/
@Test
public void test1() throws Exception {
String tableCatalogDirectory = relationManager.getTableDirectory(CatalogConstants.TABLE_CATALOG);
Analyzer tableCatalogLuceneAnalyzer = relationManager.getTableAnalyzer(CatalogConstants.TABLE_CATALOG);
Schema tableCatalogSchema = relationManager.getTableSchema(CatalogConstants.TABLE_CATALOG);
Assert.assertEquals(tableCatalogDirectory, new File(CatalogConstants.TABLE_CATALOG_DIRECTORY).getCanonicalPath());
Assert.assertTrue(tableCatalogLuceneAnalyzer instanceof StandardAnalyzer);
Assert.assertEquals(tableCatalogSchema, Utils.getSchemaWithID(CatalogConstants.TABLE_CATALOG_SCHEMA));
}
Aggregations