use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class AnalysisRequestHandlerBase method convertTokensToNamedLists.
/**
* Converts the list of Tokens to a list of NamedLists representing the tokens.
*
* @param tokenList Tokens to convert
* @param context The analysis context
*
* @return List of NamedLists containing the relevant information taken from the tokens
*/
private List<NamedList> convertTokensToNamedLists(final List<AttributeSource> tokenList, AnalysisContext context) {
final List<NamedList> tokensNamedLists = new ArrayList<>();
final FieldType fieldType = context.getFieldType();
final AttributeSource[] tokens = tokenList.toArray(new AttributeSource[tokenList.size()]);
// sort the tokens by absolute position
ArrayUtil.timSort(tokens, new Comparator<AttributeSource>() {
@Override
public int compare(AttributeSource a, AttributeSource b) {
return arrayCompare(a.getAttribute(TokenTrackingAttribute.class).getPositions(), b.getAttribute(TokenTrackingAttribute.class).getPositions());
}
private int arrayCompare(int[] a, int[] b) {
int p = 0;
final int stop = Math.min(a.length, b.length);
while (p < stop) {
int diff = a[p] - b[p];
if (diff != 0)
return diff;
p++;
}
// One is a prefix of the other, or, they are equal:
return a.length - b.length;
}
});
for (int i = 0; i < tokens.length; i++) {
AttributeSource token = tokens[i];
final NamedList<Object> tokenNamedList = new SimpleOrderedMap<>();
final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
BytesRef rawBytes = termAtt.getBytesRef();
final String text = fieldType.indexedToReadable(rawBytes, new CharsRefBuilder()).toString();
tokenNamedList.add("text", text);
if (token.hasAttribute(CharTermAttribute.class)) {
final String rawText = token.getAttribute(CharTermAttribute.class).toString();
if (!rawText.equals(text)) {
tokenNamedList.add("raw_text", rawText);
}
}
tokenNamedList.add("raw_bytes", rawBytes.toString());
if (context.getTermsToMatch().contains(rawBytes)) {
tokenNamedList.add("match", true);
}
token.reflectWith(new AttributeReflector() {
@Override
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
// leave out position and bytes term
if (TermToBytesRefAttribute.class.isAssignableFrom(attClass))
return;
if (CharTermAttribute.class.isAssignableFrom(attClass))
return;
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
return;
String k = attClass.getName() + '#' + key;
// map keys for "standard attributes":
if (ATTRIBUTE_MAPPING.containsKey(k)) {
k = ATTRIBUTE_MAPPING.get(k);
}
if (value instanceof BytesRef) {
final BytesRef p = (BytesRef) value;
value = p.toString();
}
tokenNamedList.add(k, value);
}
});
tokensNamedLists.add(tokenNamedList);
}
return tokensNamedLists;
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class DocumentAnalysisRequestHandler method handleAnalysisRequest.
/**
* Handles the resolved {@link DocumentAnalysisRequest} and returns the analysis response as a named list.
*
* @param request The {@link DocumentAnalysisRequest} to be handled.
* @param schema The index schema.
*
* @return The analysis response as a named list.
*/
NamedList<Object> handleAnalysisRequest(DocumentAnalysisRequest request, IndexSchema schema) {
SchemaField uniqueKeyField = schema.getUniqueKeyField();
NamedList<Object> result = new SimpleOrderedMap<>();
for (SolrInputDocument document : request.getDocuments()) {
NamedList<NamedList> theTokens = new SimpleOrderedMap<>();
result.add(document.getFieldValue(uniqueKeyField.getName()).toString(), theTokens);
for (String name : document.getFieldNames()) {
// there's no point of providing analysis to unindexed fields.
SchemaField field = schema.getField(name);
if (!field.indexed()) {
continue;
}
NamedList<Object> fieldTokens = new SimpleOrderedMap<>();
theTokens.add(name, fieldTokens);
FieldType fieldType = schema.getFieldType(name);
final String queryValue = request.getQuery();
Set<BytesRef> termsToMatch;
try {
termsToMatch = (queryValue != null && request.isShowMatch()) ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer()) : EMPTY_BYTES_SET;
} catch (Exception e) {
// ignore analysis exceptions since we are applying arbitrary text to all fields
termsToMatch = EMPTY_BYTES_SET;
}
if (request.getQuery() != null) {
try {
AnalysisContext analysisContext = new AnalysisContext(fieldType, fieldType.getQueryAnalyzer(), EMPTY_BYTES_SET);
fieldTokens.add("query", analyzeValue(request.getQuery(), analysisContext));
} catch (Exception e) {
// ignore analysis exceptions since we are applying arbitrary text to all fields
}
}
Analyzer analyzer = fieldType.getIndexAnalyzer();
AnalysisContext analysisContext = new AnalysisContext(fieldType, analyzer, termsToMatch);
Collection<Object> fieldValues = document.getFieldValues(name);
NamedList<NamedList<? extends Object>> indexTokens = new SimpleOrderedMap<>();
for (Object fieldValue : fieldValues) {
indexTokens.add(String.valueOf(fieldValue), analyzeValue(fieldValue.toString(), analysisContext));
}
fieldTokens.add("index", indexTokens);
}
}
return result;
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class BlobHandler method handleRequestBody.
@Override
public void handleRequestBody(final SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
String httpMethod = req.getHttpMethod();
String path = (String) req.getContext().get("path");
SolrConfigHandler.setWt(req, JSON);
List<String> pieces = StrUtils.splitSmart(path, '/');
String blobName = null;
if (pieces.size() >= 3)
blobName = pieces.get(2);
if ("POST".equals(httpMethod)) {
if (blobName == null || blobName.isEmpty()) {
rsp.add("error", "Name not found");
return;
}
String err = SolrConfigHandler.validateName(blobName);
if (err != null) {
log.warn("no blob name");
rsp.add("error", err);
return;
}
if (req.getContentStreams() == null) {
log.warn("no content stream");
rsp.add("error", "No stream");
return;
}
for (ContentStream stream : req.getContentStreams()) {
ByteBuffer payload = SimplePostTool.inputStreamToByteArray(stream.getStream(), maxSize);
MessageDigest m = MessageDigest.getInstance("MD5");
m.update(payload.array(), payload.position(), payload.limit());
String md5 = new BigInteger(1, m.digest()).toString(16);
TopDocs duplicate = req.getSearcher().search(new TermQuery(new Term("md5", md5)), 1);
if (duplicate.totalHits > 0) {
rsp.add("error", "duplicate entry");
forward(req, null, new MapSolrParams((Map) makeMap("q", "md5:" + md5, "fl", "id,size,version,timestamp,blobName")), rsp);
log.warn("duplicate entry for blob :" + blobName);
return;
}
TopFieldDocs docs = req.getSearcher().search(new TermQuery(new Term("blobName", blobName)), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
long version = 0;
if (docs.totalHits > 0) {
Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
Number n = doc.getField("version").numericValue();
version = n.longValue();
}
version++;
String id = blobName + "/" + version;
Map<String, Object> doc = makeMap(ID, id, "md5", md5, "blobName", blobName, VERSION, version, "timestamp", new Date(), "size", payload.limit(), "blob", payload);
verifyWithRealtimeGet(blobName, version, req, doc);
log.info(StrUtils.formatString("inserting new blob {0} ,size {1}, md5 {2}", doc.get(ID), String.valueOf(payload.limit()), md5));
indexMap(req, rsp, doc);
log.info(" Successfully Added and committed a blob with id {} and size {} ", id, payload.limit());
break;
}
} else {
int version = -1;
if (pieces.size() > 3) {
try {
version = Integer.parseInt(pieces.get(3));
} catch (NumberFormatException e) {
rsp.add("error", "Invalid version" + pieces.get(3));
return;
}
}
if (ReplicationHandler.FILE_STREAM.equals(req.getParams().get(CommonParams.WT))) {
if (blobName == null) {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Please send the request in the format /blob/<blobName>/<version>");
} else {
String q = "blobName:{0}";
if (version != -1)
q = "id:{0}/{1}";
QParser qparser = QParser.getParser(StrUtils.formatString(q, blobName, version), req);
final TopDocs docs = req.getSearcher().search(qparser.parse(), 1, new Sort(new SortField("version", SortField.Type.LONG, true)));
if (docs.totalHits > 0) {
rsp.add(ReplicationHandler.FILE_STREAM, new SolrCore.RawWriter() {
@Override
public void write(OutputStream os) throws IOException {
Document doc = req.getSearcher().doc(docs.scoreDocs[0].doc);
IndexableField sf = doc.getField("blob");
FieldType fieldType = req.getSchema().getField("blob").getType();
ByteBuffer buf = (ByteBuffer) fieldType.toObject(sf);
if (buf == null) {
//should never happen unless a user wrote this document directly
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Invalid document . No field called blob");
} else {
os.write(buf.array(), 0, buf.limit());
}
}
});
} else {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, StrUtils.formatString("Invalid combination of blobName {0} and version {1}", blobName, version));
}
}
} else {
String q = "*:*";
if (blobName != null) {
q = "blobName:{0}";
if (version != -1) {
q = "id:{0}/{1}";
}
}
forward(req, null, new MapSolrParams((Map) makeMap("q", StrUtils.formatString(q, blobName, version), "fl", "id,size,version,timestamp,blobName,md5", SORT, "version desc")), rsp);
}
}
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class TermsComponent method fetchTerms.
private static void fetchTerms(SolrIndexSearcher indexSearcher, String[] fields, String termList, boolean includeTotalTermFreq, NamedList<Object> result) throws IOException {
String[] splitTerms = termList.split(",");
for (int i = 0; i < splitTerms.length; i++) {
splitTerms[i] = splitTerms[i].trim();
}
// Sort the terms once
Arrays.sort(splitTerms);
IndexReaderContext topReaderContext = indexSearcher.getTopReaderContext();
for (String field : fields) {
FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
// Since splitTerms is already sorted, this array will also be sorted
Term[] terms = new Term[splitTerms.length];
for (int i = 0; i < splitTerms.length; i++) {
terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
}
TermContext[] termContexts = new TermContext[terms.length];
collectTermContext(topReaderContext, termContexts, terms);
NamedList<Object> termsMap = new SimpleOrderedMap<>();
for (int i = 0; i < terms.length; i++) {
if (termContexts[i] != null) {
String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
int docFreq = termContexts[i].docFreq();
if (!includeTotalTermFreq) {
termsMap.add(outTerm, docFreq);
} else {
long totalTermFreq = termContexts[i].totalTermFreq();
NamedList<Long> termStats = new SimpleOrderedMap<>();
termStats.add("df", (long) docFreq);
termStats.add("ttf", totalTermFreq);
termsMap.add(outTerm, termStats);
}
}
}
result.add(field, termsMap);
}
}
use of org.apache.solr.schema.FieldType in project lucene-solr by apache.
the class SimpleFacets method getListedTermCounts.
/**
* Computes the term->count counts for the specified term values relative to the
* @param field the name of the field to compute term counts against
* @param parsed contains the docset to compute term counts relative to
* @param terms a list of term values (in the specified field) to compute the counts for
*/
protected NamedList<Integer> getListedTermCounts(String field, final ParsedParams parsed, List<String> terms) throws IOException {
SchemaField sf = searcher.getSchema().getField(field);
FieldType ft = sf.getType();
NamedList<Integer> res = new NamedList<>();
for (String term : terms) {
int count = searcher.numDocs(ft.getFieldQuery(null, sf, term), parsed.docs);
res.add(term, count);
}
return res;
}
Aggregations