use of org.htmlcleaner.PrettyXmlSerializer in project Ebselen by Ardesco.
the class IDEToEbselen method convertToXML.
/**
* Cleans the relevant file and generates a valid XML file ready for processing to Sel 2 java File.
*
* @param absoluteFilename - name of the file to convert.
* @return String - location of the converted file.
*/
public String convertToXML(String absoluteFilename) throws Exception {
FileHandler fromSelIDE = new FileHandler(absoluteFilename);
FileHandler toXML = new FileHandler(System.getProperty("java.io.tmpdir") + File.separator + fromSelIDE.getFileName() + ".xml", true);
if (fromSelIDE.getFile().isDirectory()) {
LOGGER.error("Cannot convert directory {} into a Selenium Test!", fromSelIDE.getFileName());
return null;
}
//Clean up html so that we can read it as XML properly
HtmlCleaner cleaner = new HtmlCleaner();
CleanerProperties XMLPrefs = cleaner.getProperties();
XMLPrefs.setUseEmptyElementTags(true);
XMLPrefs.setTranslateSpecialEntities(true);
XMLPrefs.setTransResCharsToNCR(true);
XMLPrefs.setOmitComments(true);
XMLPrefs.setOmitComments(true);
XMLPrefs.setOmitDoctypeDeclaration(true);
XMLPrefs.setNamespacesAware(false);
TagNode tagNode = new HtmlCleaner(XMLPrefs).clean(fromSelIDE.getFile());
new PrettyXmlSerializer(XMLPrefs).writeToStream(tagNode, toXML.getWritableFileOutputStream(), "utf-8");
toXML.close();
return toXML.getAbsoluteFile();
}
Aggregations