use of org.apache.commons.codec.language.Metaphone in project lucene-solr by apache.
the class TestPhoneticFilter method testRandomStrings.
/** blast some random strings through the analyzer */
public void testRandomStrings() throws IOException {
Encoder[] encoders = new Encoder[] { new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2() };
for (final Encoder e : encoders) {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
}
};
checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
a.close();
Analyzer b = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, false));
}
};
checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
b.close();
}
}
use of org.apache.commons.codec.language.Metaphone in project lucene-solr by apache.
the class TestPhoneticFilterFactory method testFactoryDefaults.
/**
* Case: default
*/
public void testFactoryDefaults() throws IOException {
Map<String, String> args = new HashMap<>();
args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
factory.inform(new ClasspathResourceLoader(factory.getClass()));
assertTrue(factory.getEncoder() instanceof Metaphone);
// default
assertTrue(factory.inject);
}
use of org.apache.commons.codec.language.Metaphone in project Saiy-PS by brandall76.
the class MetaphoneHelper method executeGeneric.
/**
* Method to iterate through the given input data and attempt to match the given String data
* using the {@link Metaphone} within ranges applied by the associated thresholds constants.
*
* @return an {@link AlgorithmicContainer} or null if thresholds aren't satisfied
*/
public AlgorithmicContainer executeGeneric() {
long then = System.nanoTime();
final double jwdLowerThreshold = SPH.getJaroWinklerLower(mContext);
final ArrayList<AlgorithmicContainer> toKeep = new ArrayList<>();
final Metaphone metaphone = new Metaphone();
final JaroWinklerDistance jwd = new JaroWinklerDistance();
String generic;
String genericLower;
AlgorithmicContainer container = null;
double score;
boolean matches;
int size = genericData.size();
outer: for (int i = 0; i < size; i++) {
generic = (String) genericData.get(i);
genericLower = generic.toLowerCase(loc).trim();
for (String vd : inputData) {
vd = vd.toLowerCase(loc).trim();
matches = metaphone.isMetaphoneEqual(genericLower, vd);
if (matches && Algorithm.checkLength(genericLower, vd)) {
score = jwd.apply(genericLower, vd);
if (score > jwdLowerThreshold) {
container = new AlgorithmicContainer();
container.setInput(vd);
container.setGenericMatch(generic);
container.setScore(score);
container.setAlgorithm(Algorithm.METAPHONE);
container.setParentPosition(i);
container.setExactMatch(true);
toKeep.add(container);
break outer;
} else {
if (DEBUG) {
MyLog.i(CLS_NAME, "Matches: double check JW: rejected");
}
}
}
}
}
if (UtilsList.notNaked(toKeep)) {
if (DEBUG) {
MyLog.i(CLS_NAME, "Have a match");
}
container = toKeep.get(0);
} else {
if (DEBUG) {
MyLog.i(CLS_NAME, "no matches");
}
}
if (DEBUG) {
MyLog.getElapsed(CLS_NAME, then);
}
return container;
}
use of org.apache.commons.codec.language.Metaphone in project Saiy-PS by brandall76.
the class MetaphoneHelper method executeCustomCommand.
/**
* Method to iterate through the voice data and attempt to match the user's custom commands
* using the {@link Metaphone} within ranges applied by the associated thresholds constants.
*
* @return the highest scoring {@link CustomCommand} or null if thresholds aren't satisfied
*/
public CustomCommand executeCustomCommand() {
long then = System.nanoTime();
final double jwdLowerThreshold = SPH.getJaroWinklerLower(mContext);
CustomCommand customCommand = null;
final ArrayList<CustomCommandContainer> toKeep = new ArrayList<>();
final Metaphone metaphone = new Metaphone();
final JaroWinklerDistance jwd = new JaroWinklerDistance();
String phrase;
CustomCommandContainer container;
double score;
boolean matches;
int size = genericData.size();
outer: for (int i = 0; i < size; i++) {
container = (CustomCommandContainer) genericData.get(i);
phrase = container.getKeyphrase().toLowerCase(loc).trim();
for (String vd : inputData) {
vd = vd.toLowerCase(loc).trim();
matches = metaphone.isMetaphoneEqual(phrase, vd);
if (matches && Algorithm.checkLength(phrase, vd)) {
score = jwd.apply(phrase, vd);
if (score > jwdLowerThreshold) {
container.setScore(score);
container.setUtterance(vd);
container.setExactMatch(true);
toKeep.add(SerializationUtils.clone(container));
break outer;
} else {
if (DEBUG) {
MyLog.i(CLS_NAME, "Matches: double check JW: rejected");
}
}
}
}
}
if (UtilsList.notNaked(toKeep)) {
if (DEBUG) {
MyLog.i(CLS_NAME, "Have a match");
}
final CustomCommandContainer ccc = toKeep.get(0);
final Gson gson = new GsonBuilder().disableHtmlEscaping().create();
customCommand = gson.fromJson(ccc.getSerialised(), CustomCommand.class);
customCommand.setExactMatch(ccc.isExactMatch());
customCommand.setUtterance(ccc.getUtterance());
customCommand.setAlgorithm(Algorithm.METAPHONE);
} else {
if (DEBUG) {
MyLog.i(CLS_NAME, "no custom phrases matched");
}
}
if (DEBUG) {
MyLog.getElapsed(CLS_NAME, then);
}
return customCommand;
}
use of org.apache.commons.codec.language.Metaphone in project lucene-solr by apache.
the class TestPhoneticFilter method testAlgorithms.
public void testAlgorithms() throws Exception {
assertAlgorithm(new Metaphone(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" });
assertAlgorithm(new Metaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "KKK", "ESKS" });
assertAlgorithm(new DoubleMetaphone(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" });
assertAlgorithm(new DoubleMetaphone(), false, "aaa bbb ccc easgasg", new String[] { "A", "PP", "KK", "ASKS" });
assertAlgorithm(new Soundex(), true, "aaa bbb ccc easgasg", new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" });
assertAlgorithm(new Soundex(), false, "aaa bbb ccc easgasg", new String[] { "A000", "B000", "C000", "E220" });
assertAlgorithm(new RefinedSoundex(), true, "aaa bbb ccc easgasg", new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" });
assertAlgorithm(new RefinedSoundex(), false, "aaa bbb ccc easgasg", new String[] { "A0", "B1", "C3", "E034034" });
assertAlgorithm(new Caverphone2(), true, "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen", "TTA1111111", "Datha", "KLN1111111", "Carlene" });
assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg", new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" });
assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg", new String[] { "A", "B", "C", "EASGAS" });
}
Aggregations