use of gate.creole.annic.apache.lucene.index.Term in project gate-core by GateNLP.
the class FieldCacheImpl method getStringIndex.
// inherit javadocs
@Override
public StringIndex getStringIndex(IndexReader reader, String field) throws IOException {
field = field.intern();
Object ret = lookup(reader, field, STRING_INDEX);
if (ret == null) {
final int[] retArray = new int[reader.maxDoc()];
String[] mterms = new String[reader.maxDoc() + 1];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms(new Term(field, ""));
// current term number
int t = 0;
// an entry for documents that have no terms in this field
// should a document with no terms be at top or bottom?
// this puts them at the top - if it is changed, FieldDocSortedHitQueue
// needs to change as well.
mterms[t++] = null;
try {
if (termEnum.term() == null) {
throw new RuntimeException("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field)
break;
// we expect that there is at most one term per document
if (t >= mterms.length)
throw new RuntimeException("there are more terms than documents in field \"" + field + "\"");
mterms[t] = term.text();
termDocs.seek(termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = t;
}
t++;
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
if (t == 0) {
// if there are no terms, make the term array
// have a single null entry
mterms = new String[1];
} else if (t < mterms.length) {
// if there are less terms than documents,
// trim off the dead array space
String[] terms = new String[t];
System.arraycopy(mterms, 0, terms, 0, t);
mterms = terms;
}
}
StringIndex value = new StringIndex(retArray, mterms);
store(reader, field, STRING_INDEX, value);
return value;
}
return (StringIndex) ret;
}
use of gate.creole.annic.apache.lucene.index.Term in project gate-core by GateNLP.
the class FieldCacheImpl method getInts.
// inherit javadocs
@Override
public int[] getInts(IndexReader reader, String field) throws IOException {
field = field.intern();
Object ret = lookup(reader, field, SortField.INT);
if (ret == null) {
final int[] retArray = new int[reader.maxDoc()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms(new Term(field, ""));
try {
if (termEnum.term() == null) {
throw new RuntimeException("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field)
break;
int termval = Integer.parseInt(term.text());
termDocs.seek(termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
}
store(reader, field, SortField.INT, retArray);
return retArray;
}
return (int[]) ret;
}
use of gate.creole.annic.apache.lucene.index.Term in project gate-core by GateNLP.
the class FieldCacheImpl method getAuto.
/**
* The pattern used to detect integer values in a field
*/
/**
* removed for java 1.3 compatibility
* protected static final Pattern pIntegers = Pattern.compile ("[0-9\\-]+");
*/
/**
* The pattern used to detect float values in a field
*/
/**
* removed for java 1.3 compatibility
* protected static final Object pFloats = Pattern.compile ("[0-9+\\-\\.eEfFdD]+");
*/
// inherit javadocs
@Override
public Object getAuto(IndexReader reader, String field) throws IOException {
field = field.intern();
Object ret = lookup(reader, field, SortField.AUTO);
if (ret == null) {
TermEnum enumerator = reader.terms(new Term(field, ""));
try {
Term term = enumerator.term();
if (term == null) {
throw new RuntimeException("no terms in field " + field + " - cannot determine sort type");
}
if (term.field() == field) {
String termtext = term.text().trim();
// Java 1.3 level code:
try {
Integer.parseInt(termtext);
ret = getInts(reader, field);
} catch (NumberFormatException nfe1) {
try {
Float.parseFloat(termtext);
ret = getFloats(reader, field);
} catch (NumberFormatException nfe2) {
ret = getStringIndex(reader, field);
}
}
if (ret != null) {
store(reader, field, SortField.AUTO, ret);
}
} else {
throw new RuntimeException("field \"" + field + "\" does not appear to be indexed");
}
} finally {
enumerator.close();
}
}
return ret;
}
use of gate.creole.annic.apache.lucene.index.Term in project gate-core by GateNLP.
the class FieldCacheImpl method getCustom.
// inherit javadocs
@Override
public Comparable[] getCustom(IndexReader reader, String field, SortComparator comparator) throws IOException {
field = field.intern();
Object ret = lookup(reader, field, comparator);
if (ret == null) {
final Comparable[] retArray = new Comparable[reader.maxDoc()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms(new Term(field, ""));
try {
if (termEnum.term() == null) {
throw new RuntimeException("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field)
break;
Comparable termval = comparator.getComparable(term.text());
termDocs.seek(termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
}
store(reader, field, SortField.CUSTOM, retArray);
return retArray;
}
return (String[]) ret;
}
use of gate.creole.annic.apache.lucene.index.Term in project gate-core by GateNLP.
the class QueryParser method createTerms.
public List<?>[] createTerms(String elem) throws gate.creole.ir.SearchException {
areAllTermsTokens = true;
List<Term> terms = new ArrayList<Term>();
List<Integer> pos = new ArrayList<Integer>();
List<Boolean> consider = new ArrayList<Boolean>();
elem = elem.trim();
if (elem.charAt(0) == '{' && elem.charAt(elem.length() - 1) == '}') {
// possible
elem = elem.substring(1, elem.length() - 1);
int index = elem.indexOf("==");
int index1 = findIndexOf(elem, '.');
if (index == -1 && index1 == -1) {
// 3. {AnnotationType}
// this can be {AnnotationType, AnnotationType...}
ArrayList<String> fields = splitString(elem, ',', true);
for (int p = 0; p < fields.size(); p++) {
if (areAllTermsTokens && !fields.get(p).equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, norm(fields.get(p)), "*"));
pos.add(position);
consider.add(p == 0);
}
position++;
} else if (index != -1 && index1 == -1) {
// 4. {AnnotationType==String}
// 5. {AnnotationType=="String"}
ArrayList<String> fields = splitString(elem, ',', false);
for (int p = 0; p < fields.size(); p++) {
index = fields.get(p).indexOf("==");
// {AnnotationType, AnnotationType=="String"}
if (index != -1) {
String annotType = norm(fields.get(p).substring(0, index).trim());
String annotText = norm(fields.get(p).substring(index + 2, fields.get(p).length()).trim());
if (annotText.length() > 2 && annotText.charAt(0) == '\"' && annotText.charAt(annotText.length() - 1) == '\"') {
annotText = annotText.substring(1, annotText.length() - 1);
}
if (!annotType.trim().equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, annotText, annotType + ".string"));
pos.add(position);
consider.add(p == 0);
} else {
if (!(norm(fields.get(p))).equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, norm(fields.get(p)), "*"));
pos.add(position);
consider.add(p == 0);
}
}
position++;
} else if (index == -1 && index1 != -1) {
throw new SearchException("missing operator", "an equal operator (==) is missing", elem, (elem.indexOf("=", index1) != -1) ? elem.indexOf("=", index1) : elem.length());
} else if (index != -1 && index1 != -1) {
// it can be {AT, AT.f==S, AT=="S"}
int index2 = findIndexOf(elem, ',');
String[] subElems = null;
if (index2 == -1) {
subElems = new String[] { elem };
} else {
ArrayList<String> list = splitString(elem, ',', false);
subElems = new String[list.size()];
for (int k = 0; k < list.size(); k++) {
subElems[k] = list.get(k);
}
}
int lengthTravelledSoFar = 0;
for (int j = 0; j < subElems.length; j++) {
// 7. {AnnotationType.feature==string}
// 8. {AnnotationType.feature=="string"}
index = subElems[j].indexOf("==");
index1 = findIndexOf(subElems[j], '.');
if (index == -1 && index1 == -1) {
// this is {AT}
if (!norm(subElems[j].trim()).equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, norm(subElems[j].trim()), "*"));
pos.add(position);
consider.add(j == 0);
} else if (index != -1 && index1 == -1) {
// this is {AT=="String"}
String annotType = norm(subElems[j].substring(0, index).trim());
String annotText = norm(subElems[j].substring(index + 2, subElems[j].length()).trim());
if (annotText.charAt(0) == '\"' && annotText.charAt(annotText.length() - 1) == '\"') {
annotText = annotText.substring(1, annotText.length() - 1);
}
if (!annotType.trim().equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, annotText, annotType + ".string"));
pos.add(position);
consider.add(j == 0);
} else if (index == -1 && index1 != -1) {
throw new SearchException("missing operator", "an equal operator (==) is missing", elem, (elem.indexOf("=", lengthTravelledSoFar) != -1) ? elem.indexOf("=", lengthTravelledSoFar) : elem.length());
} else {
// this is {AT.f == "s"}
String annotType = norm(subElems[j].substring(0, index1).trim());
String featureType = norm(subElems[j].substring(index1 + 1, index).trim());
String featureText = norm(subElems[j].substring(index + 2, subElems[j].length()).trim());
if (featureText.length() > 2 && featureText.charAt(0) == '\"' && featureText.charAt(featureText.length() - 1) == '\"')
featureText = featureText.substring(1, featureText.length() - 1);
if (!annotType.trim().equals(baseTokenAnnotationType))
areAllTermsTokens = false;
terms.add(new Term(field, featureText, annotType + "." + featureType));
pos.add(position);
consider.add(j == 0);
}
lengthTravelledSoFar += subElems[j].length() + 1;
}
position++;
}
} else {
// possible
// remove all the inverted commas
StringBuilder newString = new StringBuilder();
char prev = ' ', ch = ' ';
for (int i = 0; i < elem.length(); i++) {
prev = ch;
ch = elem.charAt(i);
if (ch == '\"' && prev != '\\') {
continue;
} else {
newString.append(ch);
}
}
// there can be many tokens
String[] subTokens = norm(newString.toString()).split("( )+");
for (int k = 0; k < subTokens.length; k++) {
if (subTokens[k].trim().length() > 0) {
terms.add(new Term(field, norm(subTokens[k]), baseTokenAnnotationType + ".string"));
pos.add(position);
consider.add(Boolean.TRUE);
position++;
}
}
}
return new List<?>[] { terms, pos, consider };
}
Aggregations