use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.
the class TokenizerBridge method addNewTokensLine.
private static void addNewTokensLine(List<TokensLine> result, int startUnit, int endUnit, int startLine, StringBuilder sb) {
if (sb.length() != 0) {
result.add(new TokensLine(startUnit, endUnit, startLine, sb.toString()));
sb.setLength(0);
}
}
use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.
the class TokenizerBridge method convert.
/**
* We expect that implementation of {@link Tokenizer} is correct:
* tokens ordered by occurrence in source code and last token is EOF.
*/
public static List<TokensLine> convert(List<TokenEntry> tokens) {
List<TokensLine> result = new ArrayList<>();
StringBuilder sb = new StringBuilder();
int startLine = Integer.MIN_VALUE;
int startIndex = 0;
int currentIndex = 0;
for (TokenEntry token : tokens) {
if (token != TokenEntry.EOF) {
String value = token.getValue();
int line = token.getBeginLine();
if (line != startLine) {
addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
startIndex = currentIndex + 1;
startLine = line;
}
currentIndex++;
sb.append(value);
}
}
addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
return result;
}
use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.
the class PmdBlockChunkerTest method shouldBuildBlocks.
@Test
public void shouldBuildBlocks() {
TokensLine line1 = new TokensLine(0, 9, 1, Character.toString((char) 1));
TokensLine line2 = new TokensLine(10, 19, 2, Character.toString((char) 2));
TokensLine line3 = new TokensLine(20, 29, 3, Character.toString((char) 3));
List<Block> blocks = new PmdBlockChunker(2).chunk("resourceId", Arrays.asList(line1, line2, line3));
assertThat(blocks.size(), is(2));
Block block = blocks.get(0);
// assertThat(block.getLengthInUnits(), is(11));
assertThat(block.getStartLine(), is(1));
assertThat(block.getEndLine(), is(2));
assertThat(block.getBlockHash(), is(new ByteArray(1L * 31 + 2)));
block = blocks.get(1);
// assertThat(block.getLengthInUnits(), is(33));
assertThat(block.getStartLine(), is(2));
assertThat(block.getEndLine(), is(3));
assertThat(block.getBlockHash(), is(new ByteArray(2L * 31 + 3)));
}
use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.
the class TokenizerBridgeTest method test.
@Test
public void test() {
// To be sure that token index will be relative to file - run twice:
bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
List<TokensLine> lines = bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8));
assertThat(lines.size(), is(3));
TokensLine line = lines.get(0);
// 2 tokens on 1 line
assertThat(line.getStartUnit(), is(1));
assertThat(line.getEndUnit(), is(2));
assertThat(line.getStartLine(), is(1));
assertThat(line.getEndLine(), is(1));
assertThat(line.getHashCode(), is("t1t2".hashCode()));
line = lines.get(1);
// 1 token on 2 line
assertThat(line.getStartUnit(), is(3));
assertThat(line.getEndUnit(), is(3));
assertThat(line.getStartLine(), is(2));
assertThat(line.getEndLine(), is(2));
assertThat(line.getHashCode(), is("t3".hashCode()));
line = lines.get(2);
// 3 tokens on 4 line
assertThat(line.getStartUnit(), is(4));
assertThat(line.getEndUnit(), is(6));
assertThat(line.getStartLine(), is(4));
assertThat(line.getEndLine(), is(4));
assertThat(line.getHashCode(), is("t1t3t3".hashCode()));
}
use of org.sonar.api.batch.sensor.cpd.internal.TokensLine in project sonarqube by SonarSource.
the class PmdBlockChunker method chunk.
/**
* @return ArrayList as we need a serializable object
*/
public List<Block> chunk(String resourceId, List<TokensLine> fragments) {
List<TokensLine> filtered = new ArrayList<>();
int i = 0;
while (i < fragments.size()) {
TokensLine first = fragments.get(i);
int j = i + 1;
while (j < fragments.size() && fragments.get(j).getValue().equals(first.getValue())) {
j++;
}
filtered.add(fragments.get(i));
if (i < j - 1) {
filtered.add(fragments.get(j - 1));
}
i = j;
}
fragments = filtered;
if (fragments.size() < blockSize) {
return new ArrayList<>();
}
TokensLine[] fragmentsArr = fragments.toArray(new TokensLine[fragments.size()]);
List<Block> blocks = new ArrayList<>(fragmentsArr.length - blockSize + 1);
long hash = 0;
int first = 0;
int last = 0;
for (; last < blockSize - 1; last++) {
hash = hash * PRIME_BASE + fragmentsArr[last].getHashCode();
}
Block.Builder blockBuilder = Block.builder().setResourceId(resourceId);
for (; last < fragmentsArr.length; last++, first++) {
TokensLine firstFragment = fragmentsArr[first];
TokensLine lastFragment = fragmentsArr[last];
// add last statement to hash
hash = hash * PRIME_BASE + lastFragment.getHashCode();
// create block
Block block = blockBuilder.setBlockHash(new ByteArray(hash)).setIndexInFile(first).setLines(firstFragment.getStartLine(), lastFragment.getEndLine()).setUnit(firstFragment.getStartUnit(), lastFragment.getEndUnit()).build();
blocks.add(block);
// remove first statement from hash
hash -= power * firstFragment.getHashCode();
}
return blocks;
}
Aggregations