]> source.dussan.org Git - poi.git/commitdiff
XSLX2CSV: Do not double-encode if the value is already having quotes and escape doubl...
authorDominik Stadler <centic@apache.org>
Tue, 6 Apr 2021 06:11:01 +0000 (06:11 +0000)
committerDominik Stadler <centic@apache.org>
Tue, 6 Apr 2021 06:11:01 +0000 (06:11 +0000)
Most CSV formats use "" (two quotes) to escape a "-character, we should do this in this
example as well to produce files that can be parsed by other CSV processors correctly.

Also cases where the value is already enclosed in quotes should not lead to additional quotes

Add a simple initial test to module "examples" verify basic functionality of XSLX2CSV
as I often rely on it for converting some very large xlsx-files to csv

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1888418 13f79535-47bb-0310-9956-ffa450edef68

examples/build.gradle
examples/src/main/java/org/apache/poi/examples/xssf/eventusermodel/XLSX2CSV.java
ooxml/build.gradle

index 559dd449cced10a017ad8f494ed50cc8e4686c09..15ab280aa14167646e48b3c202c1bbfc01195ed9 100644 (file)
@@ -20,4 +20,7 @@ dependencies {
     implementation project(':scratchpad')
 
     implementation "org.apache.logging.log4j:log4j-core:${log4jVersion}"
+
+       testImplementation project(path: ':ooxml', configuration: 'tests')
+       testImplementation project(path: ':main', configuration: 'tests')
 }
index 627e0f7fb51dc37edbe719c9b7a35f333ebda2df..81235f0ef543ad311dd9370a45348cd9cb17840d 100644 (file)
@@ -128,6 +128,12 @@ public class XLSX2CSV {
             for (int i=0; i<missedCols; i++) {
                 output.append(',');
             }
+
+            // no need to append anything if we do not have a value
+            if (formattedValue == null) {
+                return;
+            }
+
             currentCol = thisCol;
 
             // Number or string?
@@ -136,8 +142,14 @@ public class XLSX2CSV {
                 Double.parseDouble(formattedValue);
                 output.append(formattedValue);
             } catch (Exception e) {
+                // let's remove quotes if they are already there
+                if (formattedValue.startsWith("\"") && formattedValue.endsWith("\"")) {
+                    formattedValue = formattedValue.substring(1, formattedValue.length()-1);
+                }
+
                 output.append('"');
-                output.append(formattedValue);
+                // encode double-quote with two double-quotes to produce a valid CSV format
+                output.append(formattedValue.replace("\"", "\"\""));
                 output.append('"');
             }
         }
index 2937e482bf9a195f57ac37b9ed85c6af3140c054..3fac94efbe19e147617c0ca6f7e5183fd63db9e4 100644 (file)
@@ -50,7 +50,17 @@ jar {
     }
 }
 
-test {
-    // for some reason catching the OOM does not work when run from Gradle
-    exclude '**/MemoryUsage.class'
-}
\ No newline at end of file
+// Create a separate jar for test-code to depend on it in other projects
+// See http://stackoverflow.com/questions/5144325/gradle-test-dependency
+task testJar(type: Jar, dependsOn: testClasses) {
+       baseName = "test-${project.archivesBaseName}"
+       from sourceSets.test.output
+}
+
+configurations {
+       tests
+}
+
+artifacts {
+       tests testJar
+}