Search in sources :

Example 21 with BasedSequence

use of com.vladsch.flexmark.util.sequence.BasedSequence in project flexmark-java by vsch.

the class InlineParserImpl method parseReference.

/**
 * Attempt to parse a reference definition, modifying the internal reference map.
 *
 * @param block the block whose text is being parsed for references
 * @param s     sequence of the blocks characters
 * @return number of characters were parsed as a reference from the start of the sequence, {@code 0} if none
 */
protected int parseReference(Block block, BasedSequence s) {
    this.input = s;
    this.index = 0;
    BasedSequence dest;
    BasedSequence title;
    int matchChars;
    int startIndex = index;
    // label:
    matchChars = parseLinkLabel();
    if (matchChars == 0) {
        return 0;
    }
    // colon:
    if (peek() != ':') {
        return 0;
    }
    BasedSequence rawLabel = input.subSequence(0, matchChars + 1);
    index++;
    // link url
    spnl();
    dest = parseLinkDestination();
    if (dest == null || dest.length() == 0) {
        return 0;
    }
    int beforeTitle = index;
    spnl();
    title = parseLinkTitle();
    if (title == null) {
        // rewind before spaces
        index = beforeTitle;
    }
    boolean atLineEnd = true;
    if (index != input.length() && match(myParsing.LINE_END) == null) {
        if (title == null) {
            atLineEnd = false;
        } else {
            // the potential title we found is not at the line end,
            // but it could still be a legal link reference if we
            // discard the title
            title = null;
            // rewind before spaces
            index = beforeTitle;
            // and instead check if the link URL is at the line end
            atLineEnd = match(myParsing.LINE_END) != null;
        }
    }
    if (!atLineEnd) {
        return 0;
    }
    String normalizedLabel = Escaping.normalizeReferenceChars(rawLabel, true);
    if (normalizedLabel.isEmpty()) {
        return 0;
    }
    Reference reference = new Reference(rawLabel, dest, title);
    // NOTE: whether first or last reference is kept is defined by the repository modify behavior setting
    // for CommonMark this is set in the initializeDocument() function of the inline parser
    referenceRepository.put(normalizedLabel, reference);
    block.insertBefore(reference);
    return index - startIndex;
}
Also used : BasedSequence(com.vladsch.flexmark.util.sequence.BasedSequence)

Example 22 with BasedSequence

use of com.vladsch.flexmark.util.sequence.BasedSequence in project flexmark-java by vsch.

the class InlineParserImpl method matchWithGroups.

/**
 * If RE matches at current index in the input, advance index and return the match; otherwise return null.
 *
 * @param re pattern to match
 * @return sequence matched or null
 */
@Override
public BasedSequence[] matchWithGroups(Pattern re) {
    if (index >= input.length()) {
        return null;
    }
    Matcher matcher = re.matcher(input);
    matcher.region(index, input.length());
    boolean m = matcher.find();
    if (m) {
        index = matcher.end();
        MatchResult result = matcher.toMatchResult();
        final int iMax = matcher.groupCount() + 1;
        BasedSequence[] results = new BasedSequence[iMax];
        results[0] = input.subSequence(result.start(), result.end());
        for (int i = 1; i < iMax; i++) {
            if (matcher.group(i) != null) {
                results[i] = input.subSequence(result.start(i), result.end(i));
            } else {
                results[i] = null;
            }
        }
        return results;
    } else {
        return null;
    }
}
Also used : Matcher(java.util.regex.Matcher) BasedSequence(com.vladsch.flexmark.util.sequence.BasedSequence) MatchResult(java.util.regex.MatchResult)

Example 23 with BasedSequence

use of com.vladsch.flexmark.util.sequence.BasedSequence in project flexmark-java by vsch.

the class InlineParserImpl method parseEntity.

/**
 * Attempt to parse an entity, return Entity object if successful.
 *
 * @return true if processed characters false otherwise
 */
@Override
public boolean parseEntity() {
    BasedSequence m;
    if ((m = match(myParsing.ENTITY_HERE)) != null) {
        HtmlEntity node = new HtmlEntity(m);
        appendNode(node);
        return true;
    } else {
        return false;
    }
}
Also used : BasedSequence(com.vladsch.flexmark.util.sequence.BasedSequence)

Example 24 with BasedSequence

use of com.vladsch.flexmark.util.sequence.BasedSequence in project flexmark-java by vsch.

the class InlineParserImpl method parseNewline.

/**
 * Parse a newline. If it was preceded by two spaces, append a hard line break; otherwise a soft line break.
 *
 * @return true
 */
@Override
public boolean parseNewline() {
    boolean crLf = index > 0 && input.charAt(index - 1) == '\r';
    int crLfDelta = crLf ? 1 : 0;
    // assume we're at a \n
    index++;
    // We're gonna add a new node in any case and we need to check the last text node, so flush outstanding text.
    flushTextNode();
    Node lastChild = block.getLastChild();
    // The "endsWith" is an optimization to avoid an RE match in the common case.
    if (lastChild != null && lastChild instanceof Text && (lastChild.getChars().endsWith(" ") || crLf && lastChild.getChars().endsWith(" \r"))) {
        Text text = (Text) lastChild;
        BasedSequence literal = text.getChars();
        Matcher matcher = myParsing.FINAL_SPACE.matcher(literal);
        int spaces = matcher.find() ? matcher.end() - matcher.start() - crLfDelta : 0;
        appendNode(spaces >= 2 ? new HardLineBreak(input.subSequence(index - (options.hardLineBreakLimit ? 3 + crLfDelta : spaces + 1 + crLfDelta), index)) : new SoftLineBreak(input.subSequence(index - 1 - crLfDelta, index)));
        if (spaces + crLfDelta > 0) {
            if (literal.length() > spaces) {
                lastChild.setChars(literal.subSequence(0, literal.length() - spaces - crLfDelta).trimEnd());
            } else {
                lastChild.unlink();
            }
        }
    } else {
        if (crLf && lastChild != null && lastChild instanceof Text) {
            Text text = (Text) lastChild;
            BasedSequence literal = text.getChars();
            if (literal.length() > 1) {
                lastChild.setChars(literal.subSequence(0, literal.length() - crLfDelta).trimEnd());
            } else {
                lastChild.unlink();
            }
        }
        appendNode(new SoftLineBreak(input.subSequence(index - 1 - crLfDelta, index)));
    }
    // gobble leading spaces in next line
    while (peek() == ' ') {
        index++;
    }
    return true;
}
Also used : Matcher(java.util.regex.Matcher) BasedSequence(com.vladsch.flexmark.util.sequence.BasedSequence)

Example 25 with BasedSequence

use of com.vladsch.flexmark.util.sequence.BasedSequence in project flexmark-java by vsch.

the class ListBlockParser method parseListMarker.

/**
 * Parse a list marker and return data on the marker or null.
 */
static ListData parseListMarker(ListOptions options, int newItemCodeIndent, ParserState state) {
    Parsing parsing = state.getParsing();
    BasedSequence line = state.getLine();
    int markerIndex = state.getNextNonSpaceIndex();
    int markerColumn = state.getColumn() + state.getIndent();
    int markerIndent = state.getIndent();
    BasedSequence rest = line.subSequence(markerIndex, line.length());
    Matcher matcher = parsing.LIST_ITEM_MARKER.matcher(rest);
    if (!matcher.find()) {
        return null;
    }
    ListBlock listBlock = createListBlock(matcher);
    int markerLength = matcher.end() - matcher.start();
    boolean isNumberedList = !"+-*".contains(matcher.group());
    int indexAfterMarker = markerIndex + markerLength;
    // marker doesn't include tabs, so counting them as columns directly is ok
    int columnAfterMarker = markerColumn + markerLength;
    // the column within the line where the content starts
    int contentOffset = 0;
    // See at which column the content starts if there is content
    boolean hasContent = false;
    int contentIndex = indexAfterMarker;
    for (int i = indexAfterMarker; i < line.length(); i++) {
        char c = line.charAt(i);
        if (c == '\t') {
            contentOffset += Parsing.columnsToNextTabStop(columnAfterMarker + contentOffset);
            contentIndex++;
        } else if (c == ' ') {
            contentOffset++;
            contentIndex++;
        } else {
            hasContent = true;
            break;
        }
    }
    BasedSequence markerSuffix = BasedSequence.NULL;
    int markerSuffixOffset = contentOffset;
    if (!hasContent || contentOffset > newItemCodeIndent) {
        // If this line is blank or has a code block, default to 1 space after marker
        markerSuffixOffset = contentOffset = 1;
    } else if (!isNumberedList || options.isNumberedItemMarkerSuffixed()) {
        // see if we have optional suffix strings on the marker
        String[] markerSuffixes = options.getItemMarkerSuffixes();
        for (String suffix : markerSuffixes) {
            int suffixLength = suffix.length();
            if (suffixLength > 0 && line.matchChars(suffix, contentIndex)) {
                if (options.isItemMarkerSpace()) {
                    final char c = line.midCharAt(contentIndex + suffixLength);
                    if (c != ' ' && c != '\t') {
                        // no space after, no match
                        continue;
                    }
                }
                markerSuffix = line.subSequence(contentIndex, contentIndex + suffixLength);
                contentOffset += suffixLength;
                contentIndex += suffixLength;
                columnAfterMarker += suffixLength;
                hasContent = false;
                int suffixContentOffset = contentOffset;
                for (int i = contentIndex; i < line.length(); i++) {
                    char c = line.charAt(i);
                    if (c == '\t') {
                        contentOffset += Parsing.columnsToNextTabStop(columnAfterMarker + contentOffset);
                    } else if (c == ' ') {
                        contentOffset++;
                    } else {
                        hasContent = true;
                        break;
                    }
                }
                if (!hasContent || contentOffset - suffixContentOffset > newItemCodeIndent) {
                    // If this line is blank or has a code block, default to 1 space after marker suffix
                    contentOffset = suffixContentOffset + 1;
                }
                break;
            }
        }
    }
    return new ListData(listBlock, !hasContent, markerIndex, markerColumn, markerIndent, contentOffset, rest.subSequence(matcher.start(), matcher.end()), isNumberedList, markerSuffix, markerSuffixOffset);
}
Also used : Parsing(com.vladsch.flexmark.ast.util.Parsing) Matcher(java.util.regex.Matcher) BasedSequence(com.vladsch.flexmark.util.sequence.BasedSequence)

Aggregations

BasedSequence (com.vladsch.flexmark.util.sequence.BasedSequence)91 Matcher (java.util.regex.Matcher)13 Node (com.vladsch.flexmark.ast.Node)6 ArrayList (java.util.ArrayList)5 MacroClose (com.vladsch.flexmark.ext.xwiki.macros.MacroClose)3 ReplacedTextMapper (com.vladsch.flexmark.util.sequence.ReplacedTextMapper)3 Text (com.vladsch.flexmark.ast.Text)2 AttributesNode (com.vladsch.flexmark.ext.attributes.AttributesNode)2 FootnoteBlock (com.vladsch.flexmark.ext.footnotes.FootnoteBlock)2 Macro (com.vladsch.flexmark.ext.xwiki.macros.Macro)2 Pair (com.vladsch.flexmark.util.Pair)2 RepeatedCharSequence (com.vladsch.flexmark.util.sequence.RepeatedCharSequence)2 Block (com.vladsch.flexmark.ast.Block)1 BulletListItem (com.vladsch.flexmark.ast.BulletListItem)1 Link (com.vladsch.flexmark.ast.Link)1 ListItem (com.vladsch.flexmark.ast.ListItem)1 NodeIterator (com.vladsch.flexmark.ast.NodeIterator)1 OrderedListItem (com.vladsch.flexmark.ast.OrderedListItem)1 Parsing (com.vladsch.flexmark.ast.util.Parsing)1 TextCollectingVisitor (com.vladsch.flexmark.ast.util.TextCollectingVisitor)1