From 691ae00bf9e8cdfe5bd5528f2c58f8c5415601e2 Mon Sep 17 00:00:00 2001 From: Evgeny Mandrikov Date: Thu, 8 Sep 2011 12:32:06 +0400 Subject: SONAR-1091 Sonar-CPD: support Java 7 (JSR334) * Binary integer literals * Underscores in numeric literals --- .../sonar/duplications/java/JavaTokenProducer.java | 20 +++++----- .../duplications/java/JavaTokenProducerTest.java | 44 +++++++++++++++++++--- 2 files changed, 49 insertions(+), 15 deletions(-) (limited to 'sonar-duplications/src') diff --git a/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java index d9d922fcb27..41a25d38be5 100644 --- a/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java +++ b/sonar-duplications/src/main/java/org/sonar/duplications/java/JavaTokenProducer.java @@ -23,6 +23,7 @@ import org.sonar.duplications.token.TokenChunker; /** * See The Java Language Specification, Third Edition: Lexical Structure + * and JSR334 (Java 7 - binary integral literals and underscores in numeric literals). * *

* We decided to use dollar sign as a prefix for normalization, even if it can be a part of an identifier, @@ -38,8 +39,8 @@ public final class JavaTokenProducer { private static final String NORMALIZED_CHARACTER_LITERAL = "$CHARS"; private static final String NORMALIZED_NUMERIC_LITERAL = "$NUMBER"; - private static final String EXP = "([Ee][+-]?+[0-9]++)"; - private static final String BINARY_EXP = "([Pp][+-]?+[0-9]++)"; + private static final String EXP = "([Ee][+-]?+[0-9_]++)"; + private static final String BINARY_EXP = "([Pp][+-]?+[0-9_]++)"; private static final String FLOAT_SUFFIX = "[fFdD]"; private static final String INT_SUFFIX = "[lL]"; @@ -58,14 +59,15 @@ public final class JavaTokenProducer { // Identifiers, Keywords, Boolean Literals, The Null Literal .token("\\p{javaJavaIdentifierStart}++\\p{javaJavaIdentifierPart}*+") // Floating-Point Literals - .token("[0-9]++\\.([0-9]++)?+" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) - .token("\\.[0-9]++" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) - .token("[0-9]++" + EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) - .token("0[xX][0-9a-fA-F]++\\.[0-9a-fA-F]*+" + BINARY_EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) - .token("0[xX][0-9a-fA-F]++" + BINARY_EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) + .token("[0-9_]++\\.([0-9_]++)?+" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) // Decimal + .token("\\.[0-9_]++" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) // Decimal + .token("[0-9_]++" + EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) // Decimal + .token("0[xX][0-9a-fA-F_]++\\.[0-9a-fA-F_]*+" + BINARY_EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) // Hexadecimal + .token("0[xX][0-9a-fA-F_]++" + BINARY_EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) // Hexadecimal // Integer Literals - .token("0[xX][0-9a-fA-F]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) - .token("[0-9]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) + .token("0[xX][0-9a-fA-F_]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) // Hexadecimal + .token("0[bB][01_]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) // Binary (Java 7) + .token("[0-9_]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) // Decimal and Octal // Any other character .token(".") .build(); diff --git a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java index 8a2add96005..e792c9a9878 100644 --- a/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java +++ b/sonar-duplications/src/test/java/org/sonar/duplications/java/JavaTokenProducerTest.java @@ -42,12 +42,6 @@ import org.sonar.duplications.token.TokenQueue; import com.google.common.collect.Lists; -/** - * See The Java Language Specification, Third Edition: Lexical Structure - * - * TODO Java 7 features: Binary Integer Literals, Using Underscore Characters in Numeric Literals - * TODO add more complex example - */ public class JavaTokenProducerTest { private TokenChunker chunker = JavaTokenProducer.build(); @@ -130,6 +124,24 @@ public class JavaTokenProducerTest { assertThat(chunk("0xFF"), isNumericLiteral()); assertThat(chunk("0xFFl"), isNumericLiteral()); assertThat(chunk("0xFFL"), isNumericLiteral()); + + assertThat(chunk("0XFF"), isNumericLiteral()); + assertThat(chunk("0XFFl"), isNumericLiteral()); + assertThat(chunk("0XFFL"), isNumericLiteral()); + } + + /** + * New in Java 7. + */ + @Test + public void shouldNormalizeBinaryIntegerLiteral() { + assertThat(chunk("0b10"), isNumericLiteral()); + assertThat(chunk("0b10l"), isNumericLiteral()); + assertThat(chunk("0b10L"), isNumericLiteral()); + + assertThat(chunk("0B10"), isNumericLiteral()); + assertThat(chunk("0B10l"), isNumericLiteral()); + assertThat(chunk("0B10L"), isNumericLiteral()); } /** @@ -206,6 +218,26 @@ public class JavaTokenProducerTest { assertThat(chunk("0XAFp-1D"), isNumericLiteral()); } + /** + * New in Java 7. + */ + @Test + public void shouldNormalizeNumericLiteralsWithUnderscores() { + assertThat(chunk("54_3L"), isNumericLiteral()); + assertThat(chunk("07_7L"), isNumericLiteral()); + assertThat(chunk("0b1_0L"), isNumericLiteral()); + assertThat(chunk("0xF_FL"), isNumericLiteral()); + + assertThat(chunk("1_234."), isNumericLiteral()); + assertThat(chunk("1_2.3_4"), isNumericLiteral()); + assertThat(chunk(".1_234"), isNumericLiteral()); + assertThat(chunk("1_234e1_0"), isNumericLiteral()); + + assertThat(chunk("0xA_F."), isNumericLiteral()); + assertThat(chunk("0xA_F.B_C"), isNumericLiteral()); + assertThat(chunk("0x1.ffff_ffff_ffff_fP1_023"), isNumericLiteral()); + } + /** * Boolean Literals */ -- cgit v1.2.3