You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

TokenizerBridge.java 3.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. /*
  2. * SonarQube
  3. * Copyright (C) 2009-2022 SonarSource SA
  4. * mailto:info AT sonarsource DOT com
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 3 of the License, or (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public License
  17. * along with this program; if not, write to the Free Software Foundation,
  18. * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19. */
  20. package org.sonar.duplications.internal.pmd;
  21. import java.io.Reader;
  22. import java.util.ArrayList;
  23. import java.util.List;
  24. import net.sourceforge.pmd.cpd.SourceCode;
  25. import net.sourceforge.pmd.cpd.TokenEntry;
  26. import net.sourceforge.pmd.cpd.Tokenizer;
  27. import net.sourceforge.pmd.cpd.Tokens;
  28. import org.sonar.api.batch.sensor.cpd.internal.TokensLine;
  29. import org.sonar.duplications.block.Block;
  30. import org.sonar.duplications.cpd.FileCodeLoaderWithoutCache;
  31. /**
  32. * Bridge, which allows to convert list of {@link TokenEntry} produced by {@link Tokenizer} into list of {@link TokensLine}s.
  33. */
  34. public class TokenizerBridge {
  35. private final Tokenizer tokenizer;
  36. private final PmdBlockChunker blockBuilder;
  37. public TokenizerBridge(Tokenizer tokenizer, int blockSize) {
  38. this.tokenizer = tokenizer;
  39. this.blockBuilder = new PmdBlockChunker(blockSize);
  40. }
  41. public List<Block> chunk(String resourceId, String fileName, Reader fileReader) {
  42. return blockBuilder.chunk(resourceId, chunk(fileName, fileReader));
  43. }
  44. public List<TokensLine> chunk(String fileName, Reader fileReader) {
  45. SourceCode sourceCode = new SourceCode(new FileCodeLoaderWithoutCache(fileName, fileReader));
  46. Tokens tokens = new Tokens();
  47. TokenEntry.clearImages();
  48. try {
  49. tokenizer.tokenize(sourceCode, tokens);
  50. } catch (RuntimeException e) {
  51. throw e;
  52. } catch (Exception e) {
  53. throw new RuntimeException(e);
  54. }
  55. TokenEntry.clearImages();
  56. return convert(tokens.getTokens());
  57. }
  58. /**
  59. * We expect that implementation of {@link Tokenizer} is correct:
  60. * tokens ordered by occurrence in source code and last token is EOF.
  61. */
  62. public static List<TokensLine> convert(List<TokenEntry> tokens) {
  63. List<TokensLine> result = new ArrayList<>();
  64. StringBuilder sb = new StringBuilder();
  65. int startLine = Integer.MIN_VALUE;
  66. int startIndex = 0;
  67. int currentIndex = 0;
  68. for (TokenEntry token : tokens) {
  69. if (token != TokenEntry.EOF) {
  70. String value = token.getValue();
  71. int line = token.getBeginLine();
  72. if (line != startLine) {
  73. addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
  74. startIndex = currentIndex + 1;
  75. startLine = line;
  76. }
  77. currentIndex++;
  78. sb.append(value);
  79. }
  80. }
  81. addNewTokensLine(result, startIndex, currentIndex, startLine, sb);
  82. return result;
  83. }
  84. private static void addNewTokensLine(List<TokensLine> result, int startUnit, int endUnit, int startLine, StringBuilder sb) {
  85. if (sb.length() != 0) {
  86. result.add(new TokensLine(startUnit, endUnit, startLine, sb.toString()));
  87. sb.setLength(0);
  88. }
  89. }
  90. }