Search in sources :

Example 1 with TokensLine

use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.

the class TokenizerBridge method addNewTokensLine.

private static void addNewTokensLine(List<TokensLine> result, int startUnit, int endUnit, int startLine, StringBuilder sb) {
    if (sb.length() != 0) {
        result.add(new TokensLine(startUnit, endUnit, startLine, sb.toString()));
        sb.setLength(0);
    }
}
Also used : TokensLine(org.sonar.api.batch.sensor.cpd.internal.TokensLine)

Example 2 with TokensLine

use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.

the class TokenizerBridge method convert.

/**
 * We expect that implementation of {@link Tokenizer} is correct:
 * tokens ordered by occurrence in source code and last token is EOF.
 */
public static List<TokensLine> convert(List<TokenEntry> tokens) {
    List<TokensLine> result = new ArrayList<>();
    StringBuilder sb = new StringBuilder();
    int startLine = Integer.MIN_VALUE;
    int startIndex = 0;
    int currentIndex = 0;
    for (TokenEntry token : tokens) {
        if (token != TokenEntry.EOF) {
            String value = token.getValue();
            int line = token.getBeginLine();
            if (line != startLine) {
                addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
                startIndex = currentIndex + 1;
                startLine = line;
            }
            currentIndex++;
            sb.append(value);
        }
    }
    addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
    return result;
}
Also used : TokenEntry(net.sourceforge.pmd.cpd.TokenEntry) TokensLine(org.sonar.api.batch.sensor.cpd.internal.TokensLine) ArrayList(java.util.ArrayList)

Example 3 with TokensLine

use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.

the class PmdBlockChunkerTest method shouldBuildBlocks.

@Test
public void shouldBuildBlocks() {
    TokensLine line1 = new TokensLine(0, 9, 1, Character.toString((char) 1));
    TokensLine line2 = new TokensLine(10, 19, 2, Character.toString((char) 2));
    TokensLine line3 = new TokensLine(20, 29, 3, Character.toString((char) 3));
    List<Block> blocks = new PmdBlockChunker(2).chunk("resourceId", Arrays.asList(line1, line2, line3));
    assertThat(blocks.size(), is(2));
    Block block = blocks.get(0);
    // assertThat(block.getLengthInUnits(), is(11));
    assertThat(block.getStartLine(), is(1));
    assertThat(block.getEndLine(), is(2));
    assertThat(block.getBlockHash(), is(new ByteArray(1L * 31 + 2)));
    block = blocks.get(1);
    // assertThat(block.getLengthInUnits(), is(33));
    assertThat(block.getStartLine(), is(2));
    assertThat(block.getEndLine(), is(3));
    assertThat(block.getBlockHash(), is(new ByteArray(2L * 31 + 3)));
}
Also used : TokensLine(org.sonar.api.batch.sensor.cpd.internal.TokensLine) Block(org.sonar.duplications.block.Block) ByteArray(org.sonar.duplications.block.ByteArray) Test(org.junit.Test)

Example 4 with TokensLine

use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.

the class TokenizerBridgeTest method test.

@Test
public void test() {
    // To be sure that token index will be relative to file - run twice:
    bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
    List<TokensLine> lines = bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
    assertThat(lines.size(), is(3));
    TokensLine line = lines.get(0);
    // 2 tokens on 1 line
    assertThat(line.getStartUnit(), is(1));
    assertThat(line.getEndUnit(), is(2));
    assertThat(line.getStartLine(), is(1));
    assertThat(line.getEndLine(), is(1));
    assertThat(line.getHashCode(), is("t1t2".hashCode()));
    line = lines.get(1);
    // 1 token on 2 line
    assertThat(line.getStartUnit(), is(3));
    assertThat(line.getEndUnit(), is(3));
    assertThat(line.getStartLine(), is(2));
    assertThat(line.getEndLine(), is(2));
    assertThat(line.getHashCode(), is("t3".hashCode()));
    line = lines.get(2);
    // 3 tokens on 4 line
    assertThat(line.getStartUnit(), is(4));
    assertThat(line.getEndUnit(), is(6));
    assertThat(line.getStartLine(), is(4));
    assertThat(line.getEndLine(), is(4));
    assertThat(line.getHashCode(), is("t1t3t3".hashCode()));
}
Also used : InputStreamReader(java.io.InputStreamReader) ByteArrayInputStream(java.io.ByteArrayInputStream) TokensLine(org.sonar.api.batch.sensor.cpd.internal.TokensLine) Test(org.junit.Test)

Example 5 with TokensLine

use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.

the class PmdBlockChunker method chunk.

/**
 * @return ArrayList as we need a serializable object
 */
public List<Block> chunk(String resourceId, List<TokensLine> fragments) {
    List<TokensLine> filtered = new ArrayList<>();
    int i = 0;
    while (i < fragments.size()) {
        TokensLine first = fragments.get(i);
        int j = i + 1;
        while (j < fragments.size() && fragments.get(j).getValue().equals(first.getValue())) {
            j++;
        }
        filtered.add(fragments.get(i));
        if (i < j - 1) {
            filtered.add(fragments.get(j - 1));
        }
        i = j;
    }
    fragments = filtered;
    if (fragments.size() < blockSize) {
        return new ArrayList<>();
    }
    TokensLine[] fragmentsArr = fragments.toArray(new TokensLine[fragments.size()]);
    List<Block> blocks = new ArrayList<>(fragmentsArr.length - blockSize + 1);
    long hash = 0;
    int first = 0;
    int last = 0;
    for (; last < blockSize - 1; last++) {
        hash = hash * PRIME_BASE + fragmentsArr[last].getHashCode();
    }
    Block.Builder blockBuilder = Block.builder().setResourceId(resourceId);
    for (; last < fragmentsArr.length; last++, first++) {
        TokensLine firstFragment = fragmentsArr[first];
        TokensLine lastFragment = fragmentsArr[last];
        // add last statement to hash
        hash = hash * PRIME_BASE + lastFragment.getHashCode();
        // create block
        Block block = blockBuilder.setBlockHash(new ByteArray(hash)).setIndexInFile(first).setLines(firstFragment.getStartLine(), lastFragment.getEndLine()).setUnit(firstFragment.getStartUnit(), lastFragment.getEndUnit()).build();
        blocks.add(block);
        // remove first statement from hash
        hash -= power * firstFragment.getHashCode();
    }
    return blocks;
}
Also used : TokensLine(org.sonar.api.batch.sensor.cpd.internal.TokensLine) ArrayList(java.util.ArrayList) Block(org.sonar.duplications.block.Block) ByteArray(org.sonar.duplications.block.ByteArray)

Aggregations

TokensLine (org.sonar.api.batch.sensor.cpd.internal.TokensLine)5 ArrayList (java.util.ArrayList)2 Test (org.junit.Test)2 Block (org.sonar.duplications.block.Block)2 ByteArray (org.sonar.duplications.block.ByteArray)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 InputStreamReader (java.io.InputStreamReader)1 TokenEntry (net.sourceforge.pmd.cpd.TokenEntry)1