Search in sources :

Example 1 with HtmlBuilderDriver

use of com.intellij.psi.impl.source.parsing.xml.HtmlBuilderDriver in project intellij-community by JetBrains.

the class HtmlUtil method detectCharsetFromMetaTag.

public static Charset detectCharsetFromMetaTag(@NotNull CharSequence content) {
    // check for <meta http-equiv="charset=CharsetName" > or <meta charset="CharsetName"> and return Charset
    // because we will lightly parse and explicit charset isn't used very often do quick check for applicability
    int charPrefix = StringUtil.indexOf(content, CHARSET);
    do {
        if (charPrefix == -1)
            return null;
        int charsetPrefixEnd = charPrefix + CHARSET.length();
        while (charsetPrefixEnd < content.length() && Character.isWhitespace(content.charAt(charsetPrefixEnd))) ++charsetPrefixEnd;
        if (charsetPrefixEnd < content.length() && content.charAt(charsetPrefixEnd) == '=')
            break;
        charPrefix = StringUtil.indexOf(content, CHARSET, charsetPrefixEnd);
    } while (true);
    final Ref<String> charsetNameRef = new Ref<>();
    try {
        new HtmlBuilderDriver(content).build(new XmlBuilder() {

            @NonNls
            final Set<String> inTag = new THashSet<>();

            boolean metHttpEquiv = false;

            boolean metHttml5Charset = false;

            @Override
            public void doctype(@Nullable final CharSequence publicId, @Nullable final CharSequence systemId, final int startOffset, final int endOffset) {
            }

            @Override
            public ProcessingOrder startTag(final CharSequence localName, final String namespace, final int startoffset, final int endoffset, final int headerEndOffset) {
                @NonNls String name = localName.toString().toLowerCase();
                inTag.add(name);
                if (!inTag.contains("head") && !"html".equals(name))
                    terminate();
                return ProcessingOrder.TAGS_AND_ATTRIBUTES;
            }

            private void terminate() {
                throw TerminateException.INSTANCE;
            }

            @Override
            public void endTag(final CharSequence localName, final String namespace, final int startoffset, final int endoffset) {
                @NonNls final String name = localName.toString().toLowerCase();
                if ("meta".equals(name) && (metHttpEquiv || metHttml5Charset) && contentAttributeValue != null) {
                    String charsetName;
                    if (metHttpEquiv) {
                        int start = contentAttributeValue.indexOf(CHARSET_PREFIX);
                        if (start == -1)
                            return;
                        start += CHARSET_PREFIX.length();
                        int end = contentAttributeValue.indexOf(';', start);
                        if (end == -1)
                            end = contentAttributeValue.length();
                        charsetName = contentAttributeValue.substring(start, end);
                    } else /*if (metHttml5Charset) */
                    {
                        charsetName = StringUtil.stripQuotesAroundValue(contentAttributeValue);
                    }
                    charsetNameRef.set(charsetName);
                    terminate();
                }
                if ("head".equals(name)) {
                    terminate();
                }
                inTag.remove(name);
                metHttpEquiv = false;
                metHttml5Charset = false;
                contentAttributeValue = null;
            }

            private String contentAttributeValue;

            @Override
            public void attribute(final CharSequence localName, final CharSequence v, final int startoffset, final int endoffset) {
                @NonNls final String name = localName.toString().toLowerCase();
                if (inTag.contains("meta")) {
                    @NonNls String value = v.toString().toLowerCase();
                    if (name.equals("http-equiv")) {
                        metHttpEquiv |= value.equals("content-type");
                    } else if (name.equals(CHARSET)) {
                        metHttml5Charset = true;
                        contentAttributeValue = value;
                    }
                    if (name.equals("content")) {
                        contentAttributeValue = value;
                    }
                }
            }

            @Override
            public void textElement(final CharSequence display, final CharSequence physical, final int startoffset, final int endoffset) {
            }

            @Override
            public void entityRef(final CharSequence ref, final int startOffset, final int endOffset) {
            }

            @Override
            public void error(String message, int startOffset, int endOffset) {
            }
        });
    } catch (TerminateException ignored) {
    //ignore
    } catch (Exception ignored) {
    // some weird things can happen, like unbalanaced tree
    }
    String name = charsetNameRef.get();
    return CharsetToolkit.forName(name);
}
Also used : THashSet(gnu.trove.THashSet) HtmlBuilderDriver(com.intellij.psi.impl.source.parsing.xml.HtmlBuilderDriver) Ref(com.intellij.openapi.util.Ref) XmlBuilder(com.intellij.psi.impl.source.parsing.xml.XmlBuilder)

Aggregations

Ref (com.intellij.openapi.util.Ref)1 HtmlBuilderDriver (com.intellij.psi.impl.source.parsing.xml.HtmlBuilderDriver)1 XmlBuilder (com.intellij.psi.impl.source.parsing.xml.XmlBuilder)1 THashSet (gnu.trove.THashSet)1