aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java136
1 files changed, 0 insertions, 136 deletions
diff --git a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java b/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java
deleted file mode 100644
index be76b12b139..00000000000
--- a/sonar-duplications/src/main/java/net/sourceforge/pmd/cpd/AbstractTokenizer.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/**
- * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
- * @author Zev Blut zb@ubit.com
- * @author Romain PELISSE belaran@gmail.com
- */
-package net.sourceforge.pmd.cpd;
-
-import java.util.List;
-
-public abstract class AbstractTokenizer implements Tokenizer
-{
-
- protected List<String> stringToken; // List<String>, should be setted by children classes
- protected List<String> ignorableCharacter; // List<String>, should be setted by children classes
- // FIXME:Maybe an array of 'char' would be better for perfomance ?
- protected List<String> ignorableStmt; // List<String>, should be setted by children classes
- protected char ONE_LINE_COMMENT_CHAR = '#'; // Most script language ( shell, ruby, python,...) use this symbol for comment line
-
- private List<String> code;
- private int lineNumber = 0;
- private String currentLine;
-
- protected boolean spanMultipleLinesString = true; // Most language does, so default is true
-
- private boolean downcaseString = true;
-
- public void tokenize(SourceCode tokens, Tokens tokenEntries) {
- this.code = tokens.getCode();
-
- for ( this.lineNumber = 0; lineNumber < this.code.size(); lineNumber++ ) {
- this.currentLine = this.code.get(this.lineNumber);
- int loc = 0;
- while ( loc < currentLine.length() ) {
- StringBuffer token = new StringBuffer();
- loc = getTokenFromLine(token,loc);
- if (token.length() > 0 && !isIgnorableString(token.toString())) {
- if (downcaseString) {
- token = new StringBuffer(token.toString().toLowerCase());
- }
- }
- }
- }
- tokenEntries.add(TokenEntry.getEOF());
- }
-
- private int getTokenFromLine(StringBuffer token, int loc) {
- for (int j = loc; j < this.currentLine.length(); j++) {
- char tok = this.currentLine.charAt(j);
- if (!Character.isWhitespace(tok) && !ignoreCharacter(tok)) {
- if (isComment(tok)) {
- if (token.length() > 0) {
- return j;
- } else {
- return getCommentToken(token, loc);
- }
- } else if (isString(tok)) {
- if (token.length() > 0) {
- return j; // we need to now parse the string as a seperate token.
- } else {
- // we are at the start of a string
- return parseString(token, j, tok);
- }
- } else {
- token.append(tok);
- }
- } else {
- if (token.length() > 0) {
- return j;
- }
- }
- loc = j;
- }
- return loc + 1;
- }
-
- private int parseString(StringBuffer token, int loc, char stringDelimiter) {
- boolean escaped = false;
- boolean done = false;
- char tok = ' '; // this will be replaced.
- while ((loc < currentLine.length()) && ! done) {
- tok = currentLine.charAt(loc);
- if (escaped && tok == stringDelimiter) // Found an escaped string
- escaped = false;
- else if (tok == stringDelimiter && (token.length() > 0)) // We are done, we found the end of the string...
- done = true;
- else if (tok == '\\') // Found an escaped char
- escaped = true;
- else // Adding char...
- escaped = false;
- //Adding char to String:" + token.toString());
- token.append(tok);
- loc++;
- }
- // Handling multiple lines string
- if ( ! done && // ... we didn't find the end of the string
- loc >= currentLine.length() && // ... we have reach the end of the line ( the String is incomplete, for the moment at least)
- this.spanMultipleLinesString && // ... the language allow multiple line span Strings
- ++this.lineNumber < this.code.size() // ... there is still more lines to parse
- ) {
- // parsing new line
- this.currentLine = this.code.get(this.lineNumber);
- // Warning : recursive call !
- loc = this.parseString(token, loc, stringDelimiter);
- }
- return loc + 1;
- }
-
- private boolean ignoreCharacter(char tok)
- {
- return this.ignorableCharacter.contains("" + tok);
- }
-
- private boolean isString(char tok)
- {
- return this.stringToken.contains("" + tok);
- }
-
- private boolean isComment(char tok)
- {
- return tok == ONE_LINE_COMMENT_CHAR;
- }
-
- private int getCommentToken(StringBuffer token, int loc)
- {
- while (loc < this.currentLine.length())
- {
- token.append(this.currentLine.charAt(loc++));
- }
- return loc;
- }
-
- private boolean isIgnorableString(String token)
- {
- return this.ignorableStmt.contains(token);
- }
-}