use of com.helger.commons.charset.CharsetHelper.InputStreamAndCharset in project ph-css by phax.
the class CSSReader method getCharsetDeclaredInCSS.
/**
* Determine the charset to read the CSS file. The logic is as follows:
* <ol>
* <li>Determine the charset used to read the @charset from the stream. If a
* BOM is present and a matching Charset is present, this charset is used. As
* a fallback the CSS file is initially read with ISO-8859-1.</li>
* <li>If the CSS content contains a valid @charset rule, the defined charset
* is returned even if a different BOM is present.</li>
* <li>If the CSS content does not contain a valid @charset rule than the
* charset of the BOM is returned (if any).</li>
* <li>Otherwise <code>null</code> is returned.</li>
* </ol>
*
* @param aISP
* The input stream provider to read from. May not be <code>null</code>
* .
* @return <code>null</code> if the input stream could not be opened or if
* neither a BOM nor a charset is specified. Otherwise a non-
* <code>null</code> Charset is returned.
* @throws IllegalStateException
* if an invalid charset is supplied
*/
@Nullable
public static Charset getCharsetDeclaredInCSS(@Nonnull final IHasInputStream aISP) {
ValueEnforcer.notNull(aISP, "InputStreamProvider");
// Try to open input stream
final InputStream aIS = aISP.getInputStream();
if (aIS == null)
return null;
final InputStreamAndCharset aISAndBOM = CharsetHelper.getInputStreamAndCharsetFromBOM(aIS);
final Charset aBOMCharset = aISAndBOM.getCharset();
Charset aStreamCharset = aBOMCharset;
if (aStreamCharset == null) {
// Always read as ISO-8859-1 as everything contained in the CSS charset
// declaration can be handled by this charset
// A known problem is when the file is UTF-16, UTF-16BE, UTF-16LE etc.
// encoded. In this case a BOM must be present to read the file correctly!
aStreamCharset = StandardCharsets.ISO_8859_1;
}
final Reader aReader = StreamHelper.createReader(aISAndBOM.getInputStream(), aStreamCharset);
try {
// Read with the Stream charset
final CSSCharStream aCharStream = new CSSCharStream(aReader);
final ParserCSSCharsetDetectorTokenManager aTokenHdl = new ParserCSSCharsetDetectorTokenManager(aCharStream);
final ParserCSSCharsetDetector aParser = new ParserCSSCharsetDetector(aTokenHdl);
final String sCharsetName = aParser.styleSheetCharset().getText();
if (sCharsetName == null) {
// No charset specified - use the one from the BOM (may be null)
return aBOMCharset;
}
// Remove leading and trailing quotes from value
final String sPlainCharsetName = CSSParseHelper.extractStringValue(sCharsetName);
final Charset aReadCharset = CharsetHelper.getCharsetFromName(sPlainCharsetName);
if (aBOMCharset != null && !aBOMCharset.equals(aReadCharset)) {
// BOM charset different from read charset
s_aLogger.warn("The charset found in the CSS data (" + aReadCharset.name() + ") differs from the charset determined by the BOM (" + aBOMCharset.name() + ") -> Using the read charset");
}
return aReadCharset;
} catch (final ParseException ex) {
// grammar!
throw new IllegalStateException("Failed to parse CSS charset definition", ex);
} catch (final Throwable ex) {
// As e.g. indicated by https://github.com/phax/ph-css/issues/9
throw new IllegalStateException("Failed to parse CSS charset definition", ex);
} finally {
StreamHelper.close(aReader);
}
}
use of com.helger.commons.charset.CharsetHelper.InputStreamAndCharset in project ph-css by phax.
the class CSSReader method readFromStream.
/**
* Read the CSS from the passed {@link IHasInputStream}. If the CSS contains
* an explicit charset, the whole CSS is parsed again, with the charset found
* inside the file, so the passed {@link IHasInputStream} must be able to
* create a new input stream on second invocation!
*
* @param aISP
* The input stream provider to use. Must be able to create new input
* streams on every invocation, in case an explicit charset node was
* found. May not be <code>null</code>.
* @param aSettings
* The settings to be used for reading the CSS. May not be
* <code>null</code>.
* @return <code>null</code> if reading failed, the CSS declarations
* otherwise.
* @since 3.8.2
*/
@Nullable
public static CascadingStyleSheet readFromStream(@Nonnull final IHasInputStream aISP, @Nonnull final CSSReaderSettings aSettings) {
ValueEnforcer.notNull(aISP, "InputStreamProvider");
ValueEnforcer.notNull(aSettings, "Settings");
Charset aCharsetToUse;
// Check if the CSS contains a declared charset or as an alternative use the
// Charset from the BOM
Charset aDeclaredCharset;
try {
aDeclaredCharset = getCharsetDeclaredInCSS(aISP);
} catch (final IllegalStateException ex) {
// Failed to parse CSS at a very low level
return null;
}
if (aDeclaredCharset != null) {
if (s_aLogger.isDebugEnabled())
s_aLogger.debug("Reading CSS definition again with explicit charset '" + aDeclaredCharset.name() + "'");
aCharsetToUse = aDeclaredCharset;
} else {
// No charset declared - use fallback
aCharsetToUse = aSettings.getFallbackCharset();
}
// Try to open input stream
final InputStream aISOrig = aISP.getInputStream();
if (aISOrig == null)
return null;
// Open input stream
final InputStreamAndCharset aISAndBOM = CharsetHelper.getInputStreamAndCharsetFromBOM(aISOrig);
final InputStream aIS = aISAndBOM.getInputStream();
final Reader aReader = StreamHelper.createReader(aIS, aCharsetToUse);
final ECSSVersion eVersion = aSettings.getCSSVersion();
try {
final CSSCharStream aCharStream = new CSSCharStream(aReader);
aCharStream.setTabSize(aSettings.getTabSize());
// Use the default CSS parse error handler if none is provided
ICSSParseErrorHandler aRealParseErrorHandler = aSettings.getCustomErrorHandler();
if (aRealParseErrorHandler == null)
aRealParseErrorHandler = getDefaultParseErrorHandler();
// Use the default CSS exception handler if none is provided
ICSSParseExceptionCallback aRealParseExceptionHandler = aSettings.getCustomExceptionHandler();
if (aRealParseExceptionHandler == null)
aRealParseExceptionHandler = getDefaultParseExceptionHandler();
final boolean bBrowserCompliantMode = aSettings.isBrowserCompliantMode();
final CSSNode aNode = _readStyleSheet(aCharStream, eVersion, aRealParseErrorHandler, aRealParseExceptionHandler, bBrowserCompliantMode);
// Failed to parse content as CSS?
if (aNode == null)
return null;
// Get the interpret error handler
ICSSInterpretErrorHandler aRealInterpretErrorHandler = aSettings.getInterpretErrorHandler();
if (aRealInterpretErrorHandler == null)
aRealInterpretErrorHandler = getDefaultInterpretErrorHandler();
// Convert the AST to a domain object
return CSSHandler.readCascadingStyleSheetFromNode(eVersion, aNode, aRealInterpretErrorHandler);
} finally {
StreamHelper.close(aReader);
}
}
Aggregations