use of org.omegat.filters3.Element in project omegat by omegat-org.
the class RelaxNGFilter method isFileSupported.
/**
* Returns whether the file is supported by the filter by checking
* RELAX NG element and namespace constraints.
*
* @return <code>true</code> or <code>false</code>
*/
public boolean isFileSupported(BufferedReader reader) {
XMLDialect dialect = getDialect();
if (dialect.getConstraints() == null || dialect.getConstraints().isEmpty()) {
return true;
}
try {
char[] cbuf = new char[OConsts.READ_AHEAD_LIMIT];
int cbufLen = reader.read(cbuf);
String buf = new String(cbuf, 0, cbufLen);
return RelaxNGDialect.RELAXNG_ROOT_TAG.matcher(buf).find() && RelaxNGDialect.RELAXNG_XMLNS.matcher(buf).find();
} catch (Exception e) {
return false;
}
}
use of org.omegat.filters3.Element in project omegat by omegat-org.
the class XLIFFDialect method constructShortcuts.
@Override
public String constructShortcuts(List<Element> elements, List<ProtectedPart> protectedParts) {
protectedParts.clear();
// create shortcuts
InlineTagHandler tagHandler = new InlineTagHandler();
StringBuilder r = new StringBuilder();
for (Element el : elements) {
if (el instanceof XMLContentBasedTag) {
XMLContentBasedTag tag = (XMLContentBasedTag) el;
String shortcut = null;
int shortcutLetter;
int tagIndex;
boolean tagProtected;
if ("bpt".equals(tag.getTag())) {
// XLIFF specification requires 'rid' and 'id' attributes,
// but some tools uses 'i' attribute like for TMX
tagHandler.startBPT(tag.getAttribute("rid"), tag.getAttribute("id"), tag.getAttribute("i"));
shortcutLetter = calcTagShortcutLetter(tag, ignoreTypeForBptTags);
tagHandler.setTagShortcutLetter(shortcutLetter);
tagIndex = tagHandler.endBPT();
shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
tagProtected = false;
} else if ("ept".equals(tag.getTag())) {
tagHandler.startEPT(tag.getAttribute("rid"), tag.getAttribute("id"), tag.getAttribute("i"));
tagIndex = tagHandler.endEPT();
shortcutLetter = tagHandler.getTagShortcutLetter();
shortcut = "</" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
tagProtected = false;
} else if ("it".equals(tag.getTag())) {
tagHandler.startOTHER();
tagHandler.setCurrentPos(tag.getAttribute("pos"));
tagIndex = tagHandler.endOTHER();
// XLIFF specification requires 'open/close' values,
// but some tools may use 'begin/end' values like for TMX
shortcutLetter = calcTagShortcutLetter(tag);
if ("close".equals(tagHandler.getCurrentPos()) || "end".equals(tagHandler.getCurrentPos())) {
// for better compatibility with corresponding TMX files
if (forceShortCutToF) {
shortcutLetter = 'f';
}
shortcut = "</" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
} else {
shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>';
}
tagProtected = false;
} else if ("ph".equals(tag.getTag())) {
tagHandler.startOTHER();
tagIndex = tagHandler.endOTHER();
shortcutLetter = calcTagShortcutLetter(tag, ignoreTypeForPhTags);
shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + "/>";
tagProtected = false;
} else if ("mrk".equals(tag.getTag())) {
tagHandler.startOTHER();
tagIndex = tagHandler.endOTHER();
shortcutLetter = 'm';
shortcut = "<m" + tagIndex + ">" + tag.getIntactContents().sourceToOriginal() + "</m" + tagIndex + ">";
tagProtected = true;
} else {
shortcutLetter = 'f';
tagIndex = -1;
tagProtected = false;
}
tag.setShortcutLetter(shortcutLetter);
tag.setShortcutIndex(tagIndex);
tag.setShortcut(shortcut);
r.append(shortcut);
ProtectedPart pp = new ProtectedPart();
pp.setTextInSourceSegment(shortcut);
pp.setDetailsFromSourceFile(tag.toOriginal());
if (tagProtected) {
// protected text with related tags, like <m0>Acme</m0>
if (StatisticsSettings.isCountingProtectedText()) {
// Protected texts are counted, but related tags are not counted in the word count
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT + tag.getIntactContents().sourceToOriginal() + StaticUtils.TAG_REPLACEMENT);
} else {
// All protected parts are not counted in the word count(default)
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
}
pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
pp.setReplacementMatchCalculation(tag.getIntactContents().sourceToOriginal());
} else {
// simple tag, like <i0>
if (StatisticsSettings.isCountingStandardTags()) {
pp.setReplacementWordsCountCalculation(tag.toSafeCalcShortcut());
} else {
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
}
pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT);
}
protectedParts.add(pp);
} else if (el instanceof Tag) {
Tag tag = (Tag) el;
int tagIndex = tagHandler.paired(tag.getTag(), tag.getType());
tag.setIndex(tagIndex);
String shortcut = tag.toShortcut();
r.append(shortcut);
ProtectedPart pp = new ProtectedPart();
pp.setTextInSourceSegment(shortcut);
pp.setDetailsFromSourceFile(tag.toOriginal());
if (StatisticsSettings.isCountingStandardTags()) {
pp.setReplacementWordsCountCalculation(tag.toSafeCalcShortcut());
} else {
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
}
pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT);
protectedParts.add(pp);
} else {
r.append(el.toShortcut());
}
}
return r.toString();
}
use of org.omegat.filters3.Element in project omegat by omegat-org.
the class Entry method detectTags.
/**
* Detects the first starting tag that has its ending in the paragraph
* "first translatable" and the last ending tag that has its beginning in
* the paragraph "last translatable".
*/
private void detectTags() {
// first, detecting if we have any text and where we have it
int textStart = -1;
for (int i = 0; i < size(); i++) {
Element elem = get(i);
if ((elem instanceof Text) && ((Text) elem).isMeaningful()) {
textStart = i;
break;
}
if (elem instanceof XMLContentBasedTag) {
textStart = i;
}
}
for (int i = 0; i < size(); i++) {
Element elem = get(i);
if ((elem instanceof Text) && ((Text) elem).isMeaningful()) {
textInstance = (Text) elem;
break;
}
}
if (textStart < 0) {
// we have no translatable text in the whole entry
firstGood = -1;
lastGood = -2;
textInstance = null;
return;
}
int textEnd = textStart;
for (int i = size() - 1; i >= 0; i--) {
Element elem = get(i);
if ((elem instanceof Text) && ((Text) elem).isMeaningful()) {
textEnd = i;
break;
}
}
// if content-based tag is inside text, then expand text into paired content-based tag
for (int i = textStart; i <= textEnd; i++) {
Element elem = get(i);
if (elem instanceof XMLContentBasedTag) {
XMLContentBasedTag tag = (XMLContentBasedTag) elem;
if (tag.getTag().equals("bpt") || tag.getTag().equals("ept")) {
// find id of paired tag
String id = StringUtil.nvl(tag.getAttribute("rid"), tag.getAttribute("id"), tag.getAttribute("i"));
if (id == null) {
continue;
}
// find paired tag before
for (int j = textStart - 1; j >= 0; j--) {
if (get(j) instanceof XMLContentBasedTag) {
XMLContentBasedTag tag2 = (XMLContentBasedTag) get(j);
if (tag2.getTag().equals("bpt") || tag2.getTag().equals("ept")) {
// find id of paired tag
String id2 = StringUtil.nvl(tag2.getAttribute("rid"), tag2.getAttribute("id"), tag2.getAttribute("i"));
if (id.equals(id2)) {
textStart = j;
}
}
}
}
// find paired tag after
for (int j = textEnd + 1; j < size(); j++) {
if (get(j) instanceof XMLContentBasedTag) {
XMLContentBasedTag tag2 = (XMLContentBasedTag) get(j);
if (tag2.getTag().equals("bpt") || tag2.getTag().equals("ept")) {
// find id of paired tag
String id2 = StringUtil.nvl(tag2.getAttribute("rid"), tag2.getAttribute("id"), tag2.getAttribute("i"));
if (id.equals(id2)) {
textEnd = j;
}
}
}
}
}
}
}
// //////////////////////////////////////////////////////////////////////
// "first good"
// detecting the first starting tag that has its ending in the paragraph
boolean found = false;
for (firstGood = 0; firstGood < textStart; firstGood++) {
Element goodElem = get(firstGood);
if (!(goodElem instanceof Tag)) {
continue;
}
Tag good = (Tag) goodElem;
if (Tag.Type.BEGIN != good.getType()) {
continue;
}
// trying to test
int recursion = 1;
for (int i = firstGood + 1; i < textEnd; i++) {
Element candElement = get(i);
if (candElement instanceof Tag) {
Tag cand = (Tag) candElement;
if (cand.getTag().equals(good.getTag())) {
if (Tag.Type.BEGIN == cand.getType()) {
recursion++;
} else if (Tag.Type.END == cand.getType()) {
recursion--;
if (recursion == 0) {
if (i > textStart) {
found = true;
}
break;
}
}
}
}
}
// if we could find an ending, this is a "good one"
if (found) {
break;
}
}
if (!found) {
firstGood = textStart;
}
// //////////////////////////////////////////////////////////////////////
// "last good"
// detecting the last ending tag that has its starting in the paragraph
found = false;
for (lastGood = size() - 1; lastGood > textEnd; lastGood--) {
Element goodElem = get(lastGood);
if (!(goodElem instanceof Tag)) {
continue;
}
Tag good = (Tag) goodElem;
if (Tag.Type.END != good.getType()) {
continue;
}
// trying to test
int recursion = 1;
for (int i = lastGood - 1; i > textStart; i--) {
Element candElement = get(i);
if (candElement instanceof Tag) {
Tag cand = (Tag) candElement;
if (cand.getTag().equals(good.getTag())) {
if (Tag.Type.END == cand.getType()) {
recursion++;
} else if (Tag.Type.BEGIN == cand.getType()) {
recursion--;
if (recursion == 0) {
if (i < textEnd) {
found = true;
}
break;
}
}
}
}
}
// if we coud find a starting, this is a "good one"
if (found) {
break;
}
}
if (!found) {
lastGood = textEnd;
}
boolean removeTags;
if (handler.getContext().isRemoveAllTags()) {
// If Remove Tags is on,
// Remove leading and trailing tags must be on
removeTags = true;
} else {
removeTags = Core.getFilterMaster().getConfig().isRemoveTags();
}
// tags was already removed - restore they if need
if (!removeTags) {
for (int i = firstGood - 1; i >= 0; i--) {
Element elem = get(i);
if (elem instanceof Tag) {
if (handler.isParagraphTag((Tag) elem)) {
break;
}
firstGood = i;
}
}
for (int i = lastGood + 1; i < size(); i++) {
Element elem = get(i);
if (elem instanceof Tag) {
if (handler.isParagraphTag((Tag) elem)) {
break;
}
lastGood = i;
}
}
}
boolean removeSpacesAround = Core.getFilterMaster().getConfig().isRemoveSpacesNonseg();
// spaces was already removed - restore they if need
if (!removeSpacesAround) {
for (int i = firstGood - 1; i >= 0; i--) {
Element elem = get(i);
if (elem instanceof Tag) {
if (handler.isParagraphTag((Tag) elem)) {
break;
}
}
if ((elem instanceof Text) && !((Text) elem).isMeaningful()) {
firstGood = i;
}
}
for (int i = lastGood + 1; i < size(); i++) {
Element elem = get(i);
if (elem instanceof Tag) {
if (handler.isParagraphTag((Tag) elem)) {
break;
}
}
if ((elem instanceof Text) && !((Text) elem).isMeaningful()) {
lastGood = i;
}
}
}
}
use of org.omegat.filters3.Element in project omegat by omegat-org.
the class Handler method translateButDontFlash.
/**
* One of the main methods of the XML filter: it collects all the data,
* adjusts it, and sends for translation.
*
* @see #translateAndFlush()
*/
private void translateButDontFlash() throws TranslationException {
if (currEntry().isEmpty()) {
return;
}
List<ProtectedPart> shortcutDetails = new ArrayList<ProtectedPart>();
boolean tagsAggregation = isTagsAggregationEnabled();
String src = currEntry().sourceToShortcut(tagsAggregation, dialect, shortcutDetails);
Element lead = currEntry().get(0);
String translation = src;
if ((lead instanceof Tag) && (isPreformattingTag(((Tag) lead).getTag(), ((Tag) lead).getAttributes()) || isSpacePreservingTag()) && isTranslatableTag() && !StringUtil.isEmpty(src)) {
resetSpacePreservingTag();
translation = translator.translate(src, shortcutDetails);
} else {
String compressed = src;
if (Core.getFilterMaster().getConfig().isRemoveSpacesNonseg()) {
compressed = StringUtil.compressSpaces(src);
}
if (isTranslatableTag()) {
translation = translator.translate(compressed, shortcutDetails);
}
// untranslated is written out uncompressed
if (compressed.equals(translation)) {
translation = src;
}
}
currEntry().setTranslation(translation, dialect, new ArrayList<ProtectedPart>());
}
use of org.omegat.filters3.Element in project gocd by gocd.
the class HgModificationSplitter method parseDOMTree.
private List<Modification> parseDOMTree(Document document) throws ParseException {
List<Modification> modifications = new ArrayList<>();
Element rootElement = document.getRootElement();
List logEntries = rootElement.getChildren("changeset");
for (Iterator iterator = logEntries.iterator(); iterator.hasNext(); ) {
Element changeset = (Element) iterator.next();
modifications.add(parseChangeset(changeset));
}
return modifications;
}
Aggregations