use of com.joliciel.talismane.machineLearning.features.RuntimeEnvironment in project talismane by joliciel-informatique.
the class CombinedLexicalAttributesTest method testCheckInternalMultipleAttributes.
@Test
public void testCheckInternalMultipleAttributes() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("blah", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("blah", tokenSequence, 1, "".length(), "blah".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
StringLiteralFeature<PosTaggedTokenWrapper> number = new StringLiteralFeature<>(LexicalAttribute.Number.name());
CombinedLexicalAttributesFeature<PosTaggerContext> feature = new CombinedLexicalAttributesFeature<>(addressFunction, person, number);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<String> featureResult = feature.checkInternal(context, env);
String outcome = featureResult.getOutcome();
System.out.println(outcome);
assertEquals("1;3|p;s", outcome);
}
use of com.joliciel.talismane.machineLearning.features.RuntimeEnvironment in project talismane by joliciel-informatique.
the class LexicalAttributeFeatureTest method testCheckInternalMultipleAttributes.
@Test
public void testCheckInternalMultipleAttributes() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("blah", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("blah", tokenSequence, 1, "".length(), "blah".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
StringLiteralFeature<PosTaggedTokenWrapper> number = new StringLiteralFeature<>(LexicalAttribute.Number.name());
LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person, number);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
System.out.println(outcomes);
for (WeightedOutcome<String> outcome : outcomes) {
assertTrue("3|p".equals(outcome.getOutcome()) || "1|s".equals(outcome.getOutcome()) || "3|s".equals(outcome.getOutcome()));
}
assertEquals(3, outcomes.size());
}
use of com.joliciel.talismane.machineLearning.features.RuntimeEnvironment in project talismane by joliciel-informatique.
the class LexicalAttributeFeatureTest method testCheckInternal.
@Test
public void testCheckInternal() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("une dame", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("dame", tokenSequence, 1, "une ".length(), "une dame".length(), sessionId);
Decision decision = new Decision("NC", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> gender = new StringLiteralFeature<>(LexicalAttribute.Gender.name());
LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, gender);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
System.out.println(outcomes);
assertEquals("f", outcomes.get(0).getOutcome());
assertEquals(1, outcomes.size());
}
use of com.joliciel.talismane.machineLearning.features.RuntimeEnvironment in project talismane by joliciel-informatique.
the class LexicalAttributeFeatureTest method testCheckInternalMultipleEntries.
@Test
public void testCheckInternalMultipleEntries() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("je demande", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("demande", tokenSequence, 1, "je ".length(), "je demande".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
System.out.println(outcomes);
for (WeightedOutcome<String> outcome : outcomes) {
assertTrue("1".equals(outcome.getOutcome()) || "3".equals(outcome.getOutcome()));
}
assertEquals(2, outcomes.size());
}
use of com.joliciel.talismane.machineLearning.features.RuntimeEnvironment in project jochre by urieli.
the class SegmentationTest method testSegmentation.
@Test
public void testSegmentation() throws Exception {
// TODO: Note currently this requires high thresholds to work
// Need to decide if this is valid in general, or only for these samples
System.setProperty("config.file", "src/test/resources/testHighThresholds.conf");
ConfigFactory.invalidateCaches();
Config config = ConfigFactory.load();
JochreSession jochreSession = new JochreSession(config);
boolean writePixelsToLog = true;
for (int imageNumber = 1; imageNumber <= 4; imageNumber++) {
if (imageNumber != 1)
continue;
String imageName = "";
String suffix = "";
String text = "";
String fileName = "";
String userFileName;
int rowCount = 2;
int shapeCountRow1;
int shapeCountRow2;
int groupCountRow1;
int groupCountRow2;
int groupCountRow3 = 0;
int shapeCountRow1Group1;
int shapeCountRow2Group1;
if (imageNumber == 1) {
imageName = "MotlPeysiDemKhazns2RowsShort2";
suffix = "jpg";
text = "איך געה מיט אייך קיינער אין דער וועלט";
fileName = "MotlPeysiDemKhazns2RowsShort2.pdf";
userFileName = "Motl Peysi Dem Khazns";
shapeCountRow1 = 13;
shapeCountRow2 = 17;
groupCountRow1 = 4;
groupCountRow2 = 4;
shapeCountRow1Group1 = 3;
shapeCountRow2Group1 = 6;
} else if (imageNumber == 2) {
imageName = "MegileLiderZeresh";
suffix = "png";
text = "זרש, די מכשפה, װאָס שעלט ווי אַ מגפה";
fileName = "MegileLiderManger.pdf";
userFileName = "Megile Lider";
shapeCountRow1 = 12;
shapeCountRow2 = 17;
groupCountRow1 = 3;
groupCountRow2 = 5;
shapeCountRow1Group1 = 4;
shapeCountRow2Group1 = 4;
} else if (imageNumber == 3) {
imageName = "MendeleMoykherSforimVol1_41_0Excerpt";
suffix = "png";
text = "ער הייסט יאַנקיל, בעריל,";
fileName = "MendeleMoykherSforimVol1_41_0.png";
userFileName = "MendeleMoykherSforimVol1_41_0";
shapeCountRow1 = 20;
shapeCountRow2 = 0;
groupCountRow1 = 4;
groupCountRow2 = 0;
shapeCountRow1Group1 = 2;
shapeCountRow2Group1 = 0;
} else {
imageName = "JoinedLetterTest";
suffix = "png";
text = "Joined Letter Test";
fileName = "JoinedLetterTest.png";
userFileName = "JoinedLetterTest";
rowCount = 2;
shapeCountRow1 = 23;
shapeCountRow2 = 23;
groupCountRow1 = 4;
groupCountRow2 = 4;
groupCountRow3 = 5;
shapeCountRow1Group1 = 6;
shapeCountRow2Group1 = 5;
}
LOG.debug("######### imageName: " + imageName);
// String fileName = "data/Zelmenyaners3Words.gif";
InputStream imageFileStream = getClass().getResourceAsStream("/com/joliciel/jochre/test/resources/" + imageName + "." + suffix);
assertNotNull(imageFileStream);
BufferedImage image = ImageIO.read(imageFileStream);
JochreDocument doc = new JochreDocument(jochreSession);
doc.setFileName(fileName);
doc.setName(userFileName);
JochrePage page = doc.newPage();
SourceImage sourceImage = page.newJochreImage(image, imageName);
sourceImage.setWhiteGapFillFactor(5);
sourceImage.setImageStatus(ImageStatus.AUTO_NEW);
if (writePixelsToLog) {
LOG.debug("i012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
for (int y = 0; y < sourceImage.getHeight(); y++) {
String line = "" + y;
for (int x = 0; x < sourceImage.getWidth(); x++) {
if (sourceImage.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
line += "x";
else
line += "o";
}
LOG.debug(line);
}
}
Segmenter segmenter = new Segmenter(sourceImage, jochreSession);
segmenter.segment();
if (segmenter.isDrawSegmentation()) {
BufferedImage segmentedImage = segmenter.getSegmentedImage();
File tempDir = new File(System.getProperty("java.io.tmpdir"));
ImageIO.write(segmentedImage, "PNG", new File(tempDir, imageName + "_seg.png"));
}
JochreImage jochreImage = sourceImage;
int i = 0;
boolean firstShape = true;
int midPixelFirstShape = 0;
int midPixelFirstShapeRaw = 0;
for (Paragraph paragraph : jochreImage.getParagraphs()) {
for (RowOfShapes row : paragraph.getRows()) {
int j = 0;
LOG.debug("============= Row " + i + " ================");
for (Shape shape : row.getShapes()) {
LOG.debug("Shape (" + i + "," + j + "). Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom() + ". Group: " + shape.getGroup().getIndex());
if (firstShape) {
midPixelFirstShape = shape.getPixel(3, 3);
midPixelFirstShapeRaw = shape.getRawPixel(3, 3);
firstShape = false;
}
if (writePixelsToLog) {
for (int y = 0; y < shape.getHeight(); y++) {
String line = "";
if (y == shape.getMeanLine())
line += "M";
else if (y == shape.getBaseLine())
line += "B";
else
line += y;
for (int x = 0; x < shape.getWidth(); x++) {
if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
line += "x";
else
line += "o";
}
LOG.debug(line);
}
}
j++;
}
// next shape
i++;
}
// next row
}
// next paragraph
i = 0;
for (Paragraph paragraph : jochreImage.getParagraphs()) {
for (RowOfShapes row : paragraph.getRows()) {
for (GroupOfShapes group : row.getGroups()) {
for (Shape shape : group.getShapes()) {
if (i < text.length()) {
String letter = text.substring(i, i + 1);
String nextLetter = "";
if (i + 1 < text.length())
nextLetter = text.substring(i + 1, i + 2);
if (nextLetter.equals("ָֹ") || nextLetter.equals("ַ")) {
letter += nextLetter;
i++;
}
LOG.debug("Letter: " + letter);
shape.setLetter(letter);
}
i++;
}
// to skip the space
i++;
LOG.debug("Space");
}
// next group
}
// next row
}
// next paragraph
List<ShapeFeature<?>> features = new ArrayList<ShapeFeature<?>>();
features.add(new VerticalElongationFeature());
features.add(new VerticalSizeFeature());
features.add(new TouchesBaseLineFeature());
features.add(new TouchesMeanLineFeature());
features.add(new EmptyCentreFeature());
i = 0;
DecimalFormat df = new DecimalFormat("0.00");
firstShape = true;
int totalRowCount = 0;
for (Paragraph paragraph : jochreImage.getParagraphs()) {
for (RowOfShapes row : paragraph.getRows()) {
totalRowCount++;
LOG.debug("============= Row " + i + " ================");
int j = 0;
for (GroupOfShapes group : row.getGroups()) {
for (Shape shape : group.getShapes()) {
LOG.debug("============= Shape (" + i + "," + j + ") ================");
LOG.debug("Left = " + shape.getLeft() + ". Top = " + shape.getTop() + ". Right = " + shape.getRight() + ". Bottom = " + shape.getBottom());
LOG.debug("Letter " + shape.getLetter());
if (firstShape) {
LOG.debug("mid pixel: " + midPixelFirstShape);
assertEquals(midPixelFirstShape, shape.getPixel(3, 3));
LOG.debug("mid pixel raw: " + midPixelFirstShapeRaw);
assertEquals(midPixelFirstShapeRaw, shape.getRawPixel(3, 3));
firstShape = false;
}
if (writePixelsToLog) {
for (int y = 0; y < shape.getHeight(); y++) {
String line = "";
if (y == shape.getMeanLine())
line += "M";
else if (y == shape.getBaseLine())
line += "B";
else
line += y;
for (int x = 0; x < shape.getWidth(); x++) {
if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold()))
line += "x";
else
line += "o";
}
LOG.debug(line);
}
}
double[][] totals = shape.getBrightnessBySection(5, 5, 1, SectionBrightnessMeasurementMethod.RAW);
LOG.debug("Brightness counts");
for (int y = 0; y < totals[0].length; y++) {
String line = "";
for (int x = 0; x < totals.length; x++) {
line += df.format(totals[x][y]) + "\t";
}
LOG.debug(line);
}
for (ShapeFeature<?> feature : features) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> outcome = feature.check(shape, env);
LOG.debug(outcome.toString());
}
}
if (i == 0) {
if (j == 0)
assertEquals(shapeCountRow1Group1, group.getShapes().size());
} else if (i == 1) {
if (j == 0)
assertEquals(shapeCountRow2Group1, group.getShapes().size());
}
j++;
}
if (i == 0)
assertEquals(groupCountRow1, row.getGroups().size());
else if (i == 1)
assertEquals(groupCountRow2, row.getGroups().size());
else if (i == 2)
assertEquals(groupCountRow3, row.getGroups().size());
if (i == 0)
assertEquals(shapeCountRow1, row.getShapes().size());
else if (i == 1)
assertEquals(shapeCountRow2, row.getShapes().size());
i++;
}
// next row
}
// next paragraph
assertEquals(rowCount, totalRowCount);
}
// next test image
LOG.debug("************** Finished ***********");
}
Aggregations