From 63a7fdf83df3f43343ba4fdb878bdfc8ebf52204 Mon Sep 17 00:00:00 2001 From: Haijian Wang Date: Fri, 4 Jan 2013 14:13:02 +0200 Subject: [PATCH] Preserve UTF-8 in imported files and output @charset in generated css (#10505) Change-Id: I53f46611ef39124d532b118bb8ccb34f31cf8a6a --- .../vaadin/sass/internal/ScssStylesheet.java | 44 ++++- .../vaadin/sass/internal/parser/Parser.java | 185 +++++++++--------- .../com/vaadin/sass/internal/parser/Parser.jj | 135 ++++++------- .../internal/visitor/ImportNodeHandler.java | 5 +- .../tests/resources/automatic/css/utf8.css | 5 + ...o-be-imported-scss-file-contains-utf8.scss | 3 + .../tests/resources/automatic/scss/utf8.scss | 4 + 7 files changed, 210 insertions(+), 171 deletions(-) create mode 100644 theme-compiler/tests/resources/automatic/css/utf8.css create mode 100644 theme-compiler/tests/resources/automatic/scss/utf8-imported/to-be-imported-scss-file-contains-utf8.scss create mode 100644 theme-compiler/tests/resources/automatic/scss/utf8.scss diff --git a/theme-compiler/src/com/vaadin/sass/internal/ScssStylesheet.java b/theme-compiler/src/com/vaadin/sass/internal/ScssStylesheet.java index e915bdca7e..fd00dbff2c 100644 --- a/theme-compiler/src/com/vaadin/sass/internal/ScssStylesheet.java +++ b/theme-compiler/src/com/vaadin/sass/internal/ScssStylesheet.java @@ -58,6 +58,8 @@ public class ScssStylesheet extends Node { private String fileName; + private String charset; + /** * Read in a file SCSS and parse it into a ScssStylesheet * @@ -69,7 +71,7 @@ public class ScssStylesheet extends Node { } /** - * Main entry point for the SASS compiler. Takes in a file and builds upp a + * Main entry point for the SASS compiler. Takes in a file and builds up a * ScssStylesheet tree out of it. Calling compile() on it will transform * SASS into CSS. Calling toString() will print out the SCSS/CSS. * @@ -80,6 +82,29 @@ public class ScssStylesheet extends Node { */ public static ScssStylesheet get(String identifier) throws CSSException, IOException { + return get(identifier, null); + } + + /** + * Main entry point for the SASS compiler. Takes in a file and encoding then + * builds up a ScssStylesheet tree out of it. Calling compile() on it will + * transform SASS into CSS. Calling toString() will print out the SCSS/CSS. + * + * @param file + * @param encoding + * @return + * @throws CSSException + * @throws IOException + */ + public static ScssStylesheet get(String identifier, String encoding) + throws CSSException, IOException { + /* + * The encoding to be used is passed through "encoding" parameter. the + * imported children scss node will have the same encoding as their + * parent, ultimately the root scss file. The root scss node has this + * "encoding" parameter to be null. Its encoding is determined by the + * @charset declaration, the default one is ASCII. + */ File file = new File(identifier); file = file.getCanonicalFile(); @@ -90,12 +115,14 @@ public class ScssStylesheet extends Node { if (source == null) { return null; } + source.setEncoding(encoding); Parser parser = new Parser(); parser.setErrorHandler(new SCSSErrorHandler()); parser.setDocumentHandler(handler); parser.parseStyleSheet(source); + stylesheet.setCharset(parser.getInputSource().getEncoding()); return stylesheet; } @@ -169,10 +196,15 @@ public class ScssStylesheet extends Node { @Override public String toString() { StringBuilder string = new StringBuilder(""); + String delimeter = "\n\n"; + // add charset declaration, if it is not default "ASCII". + if (!"ASCII".equals(getCharset())) { + string.append("@charset \"").append(getCharset()).append("\";") + .append(delimeter); + } if (children.size() > 0) { string.append(children.get(0).toString()); } - String delimeter = "\n\n"; if (children.size() > 1) { for (int i = 1; i < children.size(); i++) { String childString = children.get(i).toString(); @@ -318,4 +350,12 @@ public class ScssStylesheet extends Node { public static final void warning(String msg) { Logger.getLogger(ScssStylesheet.class.getName()).warning(msg); } + + public String getCharset() { + return charset; + } + + public void setCharset(String charset) { + this.charset = charset; + } } diff --git a/theme-compiler/src/com/vaadin/sass/internal/parser/Parser.java b/theme-compiler/src/com/vaadin/sass/internal/parser/Parser.java index 0188926636..70fab6413a 100644 --- a/theme-compiler/src/com/vaadin/sass/internal/parser/Parser.java +++ b/theme-compiler/src/com/vaadin/sass/internal/parser/Parser.java @@ -70,6 +70,10 @@ public class Parser implements org.w3c.css.sac.Parser, ParserConstants { throw new CSSException(CSSException.SAC_NOT_SUPPORTED_ERR); } + public InputSource getInputSource(){ + return source; + } + /** * Set the document handler for this parser */ @@ -278,65 +282,70 @@ public class Parser implements org.w3c.css.sac.Parser, ParserConstants { } } } - String encoding = "ASCII"; + //use UTF-8 as the default encoding. + String encoding = source.getEncoding(); InputStream input = source.getByteStream(); - char c = ' '; - if (!input.markSupported()) { input = new BufferedInputStream(input); source.setByteStream(input); + input.mark(100); } - input.mark(100); - c = (char) input.read(); - - if (c == '@') { - // hum, is it a charset ? - int size = 100; - byte[] buf = new byte[size]; - input.read(buf, 0, 7); - String keyword = new String(buf, 0, 7); - if (keyword.equals("charset")) { - // Yes, this is the charset declaration ! - - // here I don't use the right declaration : white space are ' '. - while ((c = (char) input.read()) == ' ') { - // find the first quote - } - char endChar = c; - int i = 0; + if(encoding == null){ + encoding = "ASCII"; - if ((endChar != '"') && (endChar != '\u005c'')) { - // hum this is not a quote. - throw new CSSException("invalid charset declaration"); - } + char c = ' '; + + c = (char) input.read(); - while ((c = (char) input.read()) != endChar) { - buf[i++] = (byte) c; - if (i == size) { - byte[] old = buf; - buf = new byte[size + 100]; - System.arraycopy(old, 0, buf, 0, size); - size += 100; + if (c == '@') { + // hum, is it a charset ? + int size = 100; + byte[] buf = new byte[size]; + input.read(buf, 0, 7); + String keyword = new String(buf, 0, 7); + if (keyword.equals("charset")) { + // Yes, this is the charset declaration ! + + // here I don't use the right declaration : white space are ' '. + while ((c = (char) input.read()) == ' ') { + // find the first quote } - } - while ((c = (char) input.read()) == ' ') { - // find the next relevant character - } - if (c != ';') { - // no semi colon at the end ? - throw new CSSException("invalid charset declaration: " + char endChar = c; + int i = 0; + + if ((endChar != '"') && (endChar != '\u005c'')) { + // hum this is not a quote. + throw new CSSException("invalid charset declaration"); + } + + while ((c = (char) input.read()) != endChar) { + buf[i++] = (byte) c; + if (i == size) { + byte[] old = buf; + buf = new byte[size + 100]; + System.arraycopy(old, 0, buf, 0, size); + size += 100; + } + } + while ((c = (char) input.read()) == ' ') { + // find the next relevant character + } + if (c != ';') { + // no semi colon at the end ? + throw new CSSException("invalid charset declaration: " + "missing semi colon"); - } - encoding = new String(buf, 0, i); - if (source.getEncoding() != null) { - // compare the two encoding informations. - // For example, I don't accept to have ASCII and after UTF-8. - // Is it really good ? That is the question. - if (!encoding.equals(source.getEncoding())) { - throw new CSSException("invalid encoding information."); } - } - } // else no charset declaration available + encoding = new String(buf, 0, i); + if (source.getEncoding() != null) { + // compare the two encoding informations. + // For example, I don't accept to have ASCII and after UTF-8. + // Is it really good ? That is the question. + if (!encoding.equals(source.getEncoding())) { + throw new CSSException("invalid encoding information."); + } + } + } // else no charset declaration available + } } // ok set the real encoding of this source. source.setEncoding(encoding); @@ -5571,26 +5580,10 @@ LexicalUnitImpl result = null; case '5': case '6': case '7': case '8': case '9': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - int numValue = Character.digit(c, 16); - int count = 0; - int p = 16; - - while (index + 1 < len && count < 6) { - c = s.charAt(index+1); - - if (Character.digit(c, 16) != -1) { - numValue = (numValue * 16) + Character.digit(c, 16); - p *= 16; - index++; - } else { - if (c == ' ') { - // skip the latest white space - index++; - } - break; - } + buf.append('\u005c\u005c'); + while (index < len) { + buf.append(s.charAt(index++)); } - buf.append((char) numValue); break; case '\u005cn': case '\u005cf': @@ -5920,34 +5913,6 @@ LexicalUnitImpl result = null; finally { jj_save(12, xla); } } - private boolean jj_3R_196() { - Token xsp; - xsp = jj_scanpos; - if (jj_3_3()) { - jj_scanpos = xsp; - if (jj_3R_239()) { - jj_scanpos = xsp; - if (jj_3R_240()) return true; - } - } - return false; - } - - private boolean jj_3_3() { - if (jj_3R_166()) return true; - return false; - } - - private boolean jj_3_6() { - if (jj_3R_169()) return true; - return false; - } - - private boolean jj_3_1() { - if (jj_3R_165()) return true; - return false; - } - private boolean jj_3R_368() { Token xsp; xsp = jj_scanpos; @@ -7939,6 +7904,34 @@ LexicalUnitImpl result = null; return false; } + private boolean jj_3R_196() { + Token xsp; + xsp = jj_scanpos; + if (jj_3_3()) { + jj_scanpos = xsp; + if (jj_3R_239()) { + jj_scanpos = xsp; + if (jj_3R_240()) return true; + } + } + return false; + } + + private boolean jj_3_3() { + if (jj_3R_166()) return true; + return false; + } + + private boolean jj_3_6() { + if (jj_3R_169()) return true; + return false; + } + + private boolean jj_3_1() { + if (jj_3R_165()) return true; + return false; + } + /** Generated Token Manager. */ public ParserTokenManager token_source; /** Current token. */ diff --git a/theme-compiler/src/com/vaadin/sass/internal/parser/Parser.jj b/theme-compiler/src/com/vaadin/sass/internal/parser/Parser.jj index c26407c196..4e76022ae1 100644 --- a/theme-compiler/src/com/vaadin/sass/internal/parser/Parser.jj +++ b/theme-compiler/src/com/vaadin/sass/internal/parser/Parser.jj @@ -87,6 +87,10 @@ public class Parser implements org.w3c.css.sac.Parser { public void setLocale(Locale locale) throws CSSException { throw new CSSException(CSSException.SAC_NOT_SUPPORTED_ERR); } + + public InputSource getInputSource(){ + return source; + } /** * Set the document handler for this parser @@ -296,65 +300,70 @@ public class Parser implements org.w3c.css.sac.Parser { } } } - String encoding = "ASCII"; + //use UTF-8 as the default encoding. + String encoding = source.getEncoding(); InputStream input = source.getByteStream(); - char c = ' '; - if (!input.markSupported()) { - input = new BufferedInputStream(input); - source.setByteStream(input); - } - input.mark(100); - c = (char) input.read(); - - if (c == '@') { - // hum, is it a charset ? - int size = 100; - byte[] buf = new byte[size]; - input.read(buf, 0, 7); - String keyword = new String(buf, 0, 7); - if (keyword.equals("charset")) { - // Yes, this is the charset declaration ! - - // here I don't use the right declaration : white space are ' '. - while ((c = (char) input.read()) == ' ') { - // find the first quote - } - char endChar = c; - int i = 0; + input = new BufferedInputStream(input); + source.setByteStream(input); + input.mark(100); + } + if(encoding == null){ + encoding = "ASCII"; + + char c = ' '; + + c = (char) input.read(); + + if (c == '@') { + // hum, is it a charset ? + int size = 100; + byte[] buf = new byte[size]; + input.read(buf, 0, 7); + String keyword = new String(buf, 0, 7); + if (keyword.equals("charset")) { + // Yes, this is the charset declaration ! + + // here I don't use the right declaration : white space are ' '. + while ((c = (char) input.read()) == ' ') { + // find the first quote + } + char endChar = c; + int i = 0; - if ((endChar != '"') && (endChar != '\'')) { - // hum this is not a quote. - throw new CSSException("invalid charset declaration"); - } + if ((endChar != '"') && (endChar != '\'')) { + // hum this is not a quote. + throw new CSSException("invalid charset declaration"); + } - while ((c = (char) input.read()) != endChar) { - buf[i++] = (byte) c; - if (i == size) { - byte[] old = buf; - buf = new byte[size + 100]; - System.arraycopy(old, 0, buf, 0, size); - size += 100; + while ((c = (char) input.read()) != endChar) { + buf[i++] = (byte) c; + if (i == size) { + byte[] old = buf; + buf = new byte[size + 100]; + System.arraycopy(old, 0, buf, 0, size); + size += 100; + } } - } - while ((c = (char) input.read()) == ' ') { - // find the next relevant character - } - if (c != ';') { - // no semi colon at the end ? - throw new CSSException("invalid charset declaration: " + while ((c = (char) input.read()) == ' ') { + // find the next relevant character + } + if (c != ';') { + // no semi colon at the end ? + throw new CSSException("invalid charset declaration: " + "missing semi colon"); - } - encoding = new String(buf, 0, i); - if (source.getEncoding() != null) { - // compare the two encoding informations. - // For example, I don't accept to have ASCII and after UTF-8. - // Is it really good ? That is the question. - if (!encoding.equals(source.getEncoding())) { - throw new CSSException("invalid encoding information."); } - } - } // else no charset declaration available + encoding = new String(buf, 0, i); + if (source.getEncoding() != null) { + // compare the two encoding informations. + // For example, I don't accept to have ASCII and after UTF-8. + // Is it really good ? That is the question. + if (!encoding.equals(source.getEncoding())) { + throw new CSSException("invalid encoding information."); + } + } + } // else no charset declaration available + } } // ok set the real encoding of this source. source.setEncoding(encoding); @@ -2710,26 +2719,10 @@ String convertStringIndex(String s, int start, int len) { case '5': case '6': case '7': case '8': case '9': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - int numValue = Character.digit(c, 16); - int count = 0; - int p = 16; - - while (index + 1 < len && count < 6) { - c = s.charAt(index+1); - - if (Character.digit(c, 16) != -1) { - numValue = (numValue * 16) + Character.digit(c, 16); - p *= 16; - index++; - } else { - if (c == ' ') { - // skip the latest white space - index++; - } - break; - } + buf.append('\\'); + while (index < len) { + buf.append(s.charAt(index++)); } - buf.append((char) numValue); break; case '\n': case '\f': diff --git a/theme-compiler/src/com/vaadin/sass/internal/visitor/ImportNodeHandler.java b/theme-compiler/src/com/vaadin/sass/internal/visitor/ImportNodeHandler.java index 946d56ba89..5593241297 100644 --- a/theme-compiler/src/com/vaadin/sass/internal/visitor/ImportNodeHandler.java +++ b/theme-compiler/src/com/vaadin/sass/internal/visitor/ImportNodeHandler.java @@ -48,8 +48,9 @@ public class ImportNodeHandler { filePathBuilder.append(".scss"); } - ScssStylesheet imported = ScssStylesheet - .get(filePathBuilder.toString()); + // set parent's charset to imported node. + ScssStylesheet imported = ScssStylesheet.get( + filePathBuilder.toString(), node.getCharset()); if (imported == null) { imported = ScssStylesheet.get(importNode.getUri()); } diff --git a/theme-compiler/tests/resources/automatic/css/utf8.css b/theme-compiler/tests/resources/automatic/css/utf8.css new file mode 100644 index 0000000000..b27d6cedf9 --- /dev/null +++ b/theme-compiler/tests/resources/automatic/css/utf8.css @@ -0,0 +1,5 @@ +@charset "UTF-8"; +.imported { content: "\1f4c5"; } +.imported_raw_utf { content: "♥"; } +.bar { content: "\1f4c5"; } +.raw_utf { content: "📈"; } \ No newline at end of file diff --git a/theme-compiler/tests/resources/automatic/scss/utf8-imported/to-be-imported-scss-file-contains-utf8.scss b/theme-compiler/tests/resources/automatic/scss/utf8-imported/to-be-imported-scss-file-contains-utf8.scss new file mode 100644 index 0000000000..f8a08a4a96 --- /dev/null +++ b/theme-compiler/tests/resources/automatic/scss/utf8-imported/to-be-imported-scss-file-contains-utf8.scss @@ -0,0 +1,3 @@ +@charset "abc"; +.imported{content: '\1f4c5';} +.imported_raw_utf{content: "♥";} diff --git a/theme-compiler/tests/resources/automatic/scss/utf8.scss b/theme-compiler/tests/resources/automatic/scss/utf8.scss new file mode 100644 index 0000000000..b568674073 --- /dev/null +++ b/theme-compiler/tests/resources/automatic/scss/utf8.scss @@ -0,0 +1,4 @@ +@charset "UTF-8"; +@import "utf8-imported/to-be-imported-scss-file-contains-utf8"; +.bar {content: "\1f4c5";} +.raw_utf {content: "📈";} \ No newline at end of file -- 2.39.5