use of com.helger.css.parser.ParserCSSCharsetDetectorTokenManager in project ph-css by phax.
the class CSSReader method getCharsetDeclaredInCSS.
/**
* Determine the charset to read the CSS file. The logic is as follows:
* <ol>
* <li>Determine the charset used to read the @charset from the stream. If a
* BOM is present and a matching Charset is present, this charset is used. As
* a fallback the CSS file is initially read with ISO-8859-1.</li>
* <li>If the CSS content contains a valid @charset rule, the defined charset
* is returned even if a different BOM is present.</li>
* <li>If the CSS content does not contain a valid @charset rule than the
* charset of the BOM is returned (if any).</li>
* <li>Otherwise <code>null</code> is returned.</li>
* </ol>
*
* @param aISP
* The input stream provider to read from. May not be <code>null</code>
* .
* @return <code>null</code> if the input stream could not be opened or if
* neither a BOM nor a charset is specified. Otherwise a non-
* <code>null</code> Charset is returned.
* @throws IllegalStateException
* if an invalid charset is supplied
*/
@Nullable
public static Charset getCharsetDeclaredInCSS(@Nonnull final IHasInputStream aISP) {
ValueEnforcer.notNull(aISP, "InputStreamProvider");
// Try to open input stream
final InputStream aIS = aISP.getInputStream();
if (aIS == null)
return null;
final InputStreamAndCharset aISAndBOM = CharsetHelper.getInputStreamAndCharsetFromBOM(aIS);
final Charset aBOMCharset = aISAndBOM.getCharset();
Charset aStreamCharset = aBOMCharset;
if (aStreamCharset == null) {
// Always read as ISO-8859-1 as everything contained in the CSS charset
// declaration can be handled by this charset
// A known problem is when the file is UTF-16, UTF-16BE, UTF-16LE etc.
// encoded. In this case a BOM must be present to read the file correctly!
aStreamCharset = StandardCharsets.ISO_8859_1;
}
final Reader aReader = StreamHelper.createReader(aISAndBOM.getInputStream(), aStreamCharset);
try {
// Read with the Stream charset
final CSSCharStream aCharStream = new CSSCharStream(aReader);
final ParserCSSCharsetDetectorTokenManager aTokenHdl = new ParserCSSCharsetDetectorTokenManager(aCharStream);
final ParserCSSCharsetDetector aParser = new ParserCSSCharsetDetector(aTokenHdl);
final String sCharsetName = aParser.styleSheetCharset().getText();
if (sCharsetName == null) {
// No charset specified - use the one from the BOM (may be null)
return aBOMCharset;
}
// Remove leading and trailing quotes from value
final String sPlainCharsetName = CSSParseHelper.extractStringValue(sCharsetName);
final Charset aReadCharset = CharsetHelper.getCharsetFromName(sPlainCharsetName);
if (aBOMCharset != null && !aBOMCharset.equals(aReadCharset)) {
// BOM charset different from read charset
s_aLogger.warn("The charset found in the CSS data (" + aReadCharset.name() + ") differs from the charset determined by the BOM (" + aBOMCharset.name() + ") -> Using the read charset");
}
return aReadCharset;
} catch (final ParseException ex) {
// grammar!
throw new IllegalStateException("Failed to parse CSS charset definition", ex);
} catch (final Throwable ex) {
// As e.g. indicated by https://github.com/phax/ph-css/issues/9
throw new IllegalStateException("Failed to parse CSS charset definition", ex);
} finally {
StreamHelper.close(aReader);
}
}
Aggregations