Fixes #16559 - Do not trim leading spaces for tab delimited (#17442)

* Fixes #16559 - Do not trim leading spaces for tab delimited * Adds back semicolon delimited test * Fixes linting * Adds nolint directive to test because uses strings starting with spaces Co-authored-by: zeripath <art27@cantab.net>
author: Richard Mahn <richmahn@users.noreply.github.com> 2021-10-26 15:46:56 -0600
committer: GitHub <noreply@github.com> 2021-10-26 16:46:56 -0500
commit: 8f9ac439cae544a7bad3acfba2fdd08f84c75ba8 (patch)
tree: c448c14c36f76c1a6d9228f95f19dcb519921f70 /modules
parent: 6e2c64f27adffbb0ffd8a8fc8d7315a66fc90b49 (diff)
download: gitea-8f9ac439cae544a7bad3acfba2fdd08f84c75ba8.tar.gz
gitea-8f9ac439cae544a7bad3acfba2fdd08f84c75ba8.zip
2 files changed, 53 insertions, 8 deletions
diff --git a/modules/csv/csv.go b/modules/csv/csv.go
index c239c7f647..cba23ec8d9 100644
--- a/modules/csv/csv.go
+++ b/modules/csv/csv.go
@@ -22,7 +22,11 @@ var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`)
 func CreateReader(input io.Reader, delimiter rune) *stdcsv.Reader {
 	rd := stdcsv.NewReader(input)
 	rd.Comma = delimiter
-	rd.TrimLeadingSpace = true
+	if delimiter != '\t' && delimiter != ' ' {
+		// TrimLeadingSpace can't be true when delimiter is a tab or a space as the value for a column might be empty,
+		// thus would change `\t\t` to just `\t` or `  ` (two spaces) to just ` ` (single space)
+		rd.TrimLeadingSpace = true
+	}
 	return rd
 }
 
diff --git a/modules/csv/csv_test.go b/modules/csv/csv_test.go
index 3cc09c40aa..9b7fa1f4fa 100644
--- a/modules/csv/csv_test.go
+++ b/modules/csv/csv_test.go
@@ -17,16 +17,57 @@ func TestCreateReader(t *testing.T) {
 	assert.Equal(t, ',', rd.Comma)
 }
 
+//nolint
 func TestCreateReaderAndGuessDelimiter(t *testing.T) {
-	input := "a;b;c\n1;2;3\n4;5;6"
+	var csvToRowsMap = map[string][][]string{
+		`a;b;c
+1;2;3
+4;5;6`: {{"a", "b", "c"}, {"1", "2", "3"}, {"4", "5", "6"}},
+		`col1	col2	col3
+a	b	c
+	e	f
+g	h	i
+j		l
+m	n	
+p	q	r
+		u
+v	w	x
+y		
+		`: {{"col1", "col2", "col3"},
+			{"a", "b", "c"},
+			{"", "e", "f"},
+			{"g", "h", "i"},
+			{"j", "", "l"},
+			{"m", "n", ""},
+			{"p", "q", "r"},
+			{"", "", "u"},
+			{"v", "w", "x"},
+			{"y", "", ""},
+			{"", "", ""}},
+		` col1,col2,col3
+ a, b, c
+d,e,f
+ ,h, i
+j, , 
+ , , `: {{"col1", "col2", "col3"},
+			{"a", "b", "c"},
+			{"d", "e", "f"},
+			{"", "h", "i"},
+			{"j", "", ""},
+			{"", "", ""}},
+	}
 
-	rd, err := CreateReaderAndGuessDelimiter(strings.NewReader(input))
-	assert.NoError(t, err)
-	assert.Equal(t, ';', rd.Comma)
+	for csv, expectedRows := range csvToRowsMap {
+		rd, err := CreateReaderAndGuessDelimiter(strings.NewReader(csv))
+		assert.NoError(t, err)
+		rows, err := rd.ReadAll()
+		assert.NoError(t, err)
+		assert.EqualValues(t, rows, expectedRows)
+	}
 }
 
 func TestGuessDelimiter(t *testing.T) {
-	var kases = map[string]rune{
+	var csvToDelimiterMap = map[string]rune{
 		"a":                         ',',
 		"1,2":                       ',',
 		"1;2":                       ';',
@@ -37,7 +78,7 @@ func TestGuessDelimiter(t *testing.T) {
 		"<br/>":                     ',',
 	}
 
-	for k, v := range kases {
-		assert.EqualValues(t, guessDelimiter([]byte(k)), v)
+	for csv, expectedDelimiter := range csvToDelimiterMap {
+		assert.EqualValues(t, guessDelimiter([]byte(csv)), expectedDelimiter)
 	}
 }
author	Richard Mahn <richmahn@users.noreply.github.com>	2021-10-26 15:46:56 -0600
committer	GitHub <noreply@github.com>	2021-10-26 16:46:56 -0500
commit	8f9ac439cae544a7bad3acfba2fdd08f84c75ba8 (patch)
tree	c448c14c36f76c1a6d9228f95f19dcb519921f70 /modules
parent	6e2c64f27adffbb0ffd8a8fc8d7315a66fc90b49 (diff)
download	gitea-8f9ac439cae544a7bad3acfba2fdd08f84c75ba8.tar.gz gitea-8f9ac439cae544a7bad3acfba2fdd08f84c75ba8.zip