use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class BooleanPerceptronClassifier method updateFST.
private void updateFST(SortedMap<String, Double> weights) throws IOException {
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
BytesRefBuilder scratchBytes = new BytesRefBuilder();
IntsRefBuilder scratchInts = new IntsRefBuilder();
for (Map.Entry<String, Double> entry : weights.entrySet()) {
scratchBytes.copyChars(entry.getKey());
fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry.getValue().longValue());
}
fst = fstBuilder.finish();
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class TestAutomaton method toIntsRef.
private static IntsRef toIntsRef(String s) {
IntsRefBuilder b = new IntsRefBuilder();
for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
cp = s.codePointAt(i);
b.append(cp);
}
return b.get();
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class TestAutomaton method assertSame.
private void assertSame(Collection<BytesRef> terms, Automaton a) {
try {
assertTrue(Operations.isFinite(a));
assertFalse(Operations.isTotal(a));
Automaton detA = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
// Make sure all terms are accepted:
IntsRefBuilder scratch = new IntsRefBuilder();
for (BytesRef term : terms) {
Util.toIntsRef(term, scratch);
assertTrue("failed to accept term=" + term.utf8ToString(), Operations.run(detA, term.utf8ToString()));
}
// Use getFiniteStrings:
Set<IntsRef> expected = new HashSet<>();
for (BytesRef term : terms) {
IntsRefBuilder intsRef = new IntsRefBuilder();
Util.toUTF32(term.utf8ToString(), intsRef);
expected.add(intsRef.toIntsRef());
}
Set<IntsRef> actual = TestOperations.getFiniteStrings(a);
if (expected.equals(actual) == false) {
System.out.println("FAILED:");
for (IntsRef term : expected) {
if (actual.contains(term) == false) {
System.out.println(" term=" + term + " should be accepted but isn't");
}
}
for (IntsRef term : actual) {
if (expected.contains(term) == false) {
System.out.println(" term=" + term + " is accepted but should not be");
}
}
throw new AssertionError("mismatch");
}
// Use sameLanguage:
Automaton a2 = Operations.removeDeadStates(Operations.determinize(unionTerms(terms), Integer.MAX_VALUE));
assertTrue(Operations.sameLanguage(a2, Operations.removeDeadStates(Operations.determinize(a, Integer.MAX_VALUE))));
// Do same check, in UTF8 space
Automaton utf8 = randomNoOp(new UTF32ToUTF8().convert(a));
Set<IntsRef> expected2 = new HashSet<>();
for (BytesRef term : terms) {
IntsRefBuilder intsRef = new IntsRefBuilder();
Util.toIntsRef(term, intsRef);
expected2.add(intsRef.toIntsRef());
}
assertEquals(expected2, TestOperations.getFiniteStrings(utf8));
} catch (AssertionError ae) {
System.out.println("TEST: FAILED: not same");
System.out.println(" terms (count=" + terms.size() + "):");
for (BytesRef term : terms) {
System.out.println(" " + term);
}
System.out.println(" automaton:");
System.out.println(a.toDot());
//a.writeDot("fail");
throw ae;
}
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class TestAutomaton method assertMatches.
private void assertMatches(Automaton a, String... strings) {
Set<IntsRef> expected = new HashSet<>();
for (String s : strings) {
IntsRefBuilder ints = new IntsRefBuilder();
expected.add(Util.toUTF32(s, ints));
}
assertEquals(expected, TestOperations.getFiniteStrings(Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES)));
}
use of org.apache.lucene.util.IntsRefBuilder in project lucene-solr by apache.
the class TestAutomaton method accepts.
private boolean accepts(Automaton a, BytesRef b) {
IntsRefBuilder intsBuilder = new IntsRefBuilder();
Util.toIntsRef(b, intsBuilder);
return Operations.run(a, intsBuilder.toIntsRef());
}
Aggregations