You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

typesniffer_test.go 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. // Copyright 2021 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package typesniffer
  4. import (
  5. "bytes"
  6. "encoding/base64"
  7. "encoding/hex"
  8. "strings"
  9. "testing"
  10. "github.com/stretchr/testify/assert"
  11. )
  12. func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
  13. // Pre-condition: Shorter than sniffLen detects SVG.
  14. assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
  15. // Longer than sniffLen detects something else.
  16. assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType)
  17. }
  18. func TestIsTextFile(t *testing.T) {
  19. assert.True(t, DetectContentType([]byte{}).IsText())
  20. assert.True(t, DetectContentType([]byte("lorem ipsum")).IsText())
  21. }
  22. func TestIsSvgImage(t *testing.T) {
  23. assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage())
  24. assert.True(t, DetectContentType([]byte(" <svg></svg>")).IsSvgImage())
  25. assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage())
  26. assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage())
  27. assert.True(t, DetectContentType([]byte(`<!-- Comment -->
  28. <svg></svg>`)).IsSvgImage())
  29. assert.True(t, DetectContentType([]byte(`<!-- Multiple -->
  30. <!-- Comments -->
  31. <svg></svg>`)).IsSvgImage())
  32. assert.True(t, DetectContentType([]byte(`<!-- Multiline
  33. Comment -->
  34. <svg></svg>`)).IsSvgImage())
  35. assert.True(t, DetectContentType([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN"
  36. "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
  37. <svg></svg>`)).IsSvgImage())
  38. assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
  39. <!-- Comment -->
  40. <svg></svg>`)).IsSvgImage())
  41. assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
  42. <!-- Multiple -->
  43. <!-- Comments -->
  44. <svg></svg>`)).IsSvgImage())
  45. assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
  46. <!-- Multline
  47. Comment -->
  48. <svg></svg>`)).IsSvgImage())
  49. assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
  50. <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
  51. <!-- Multline
  52. Comment -->
  53. <svg></svg>`)).IsSvgImage())
  54. // the DetectContentType should work for incomplete data, because only beginning bytes are used for detection
  55. assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage())
  56. assert.False(t, DetectContentType([]byte{}).IsSvgImage())
  57. assert.False(t, DetectContentType([]byte("svg")).IsSvgImage())
  58. assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage())
  59. assert.False(t, DetectContentType([]byte("text<svg></svg>")).IsSvgImage())
  60. assert.False(t, DetectContentType([]byte("<html><body><svg></svg></body></html>")).IsSvgImage())
  61. assert.False(t, DetectContentType([]byte(`<script>"<svg></svg>"</script>`)).IsSvgImage())
  62. assert.False(t, DetectContentType([]byte(`<!-- <svg></svg> inside comment -->
  63. <foo></foo>`)).IsSvgImage())
  64. assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
  65. <!-- <svg></svg> inside comment -->
  66. <foo></foo>`)).IsSvgImage())
  67. assert.False(t, DetectContentType([]byte(`
  68. <!-- comment1 -->
  69. <div>
  70. <!-- comment2 -->
  71. <svg></svg>
  72. </div>
  73. `)).IsSvgImage())
  74. assert.False(t, DetectContentType([]byte(`
  75. <!-- comment1
  76. -->
  77. <div>
  78. <!-- comment2
  79. -->
  80. <svg></svg>
  81. </div>
  82. `)).IsSvgImage())
  83. assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage())
  84. assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage())
  85. }
  86. func TestIsPDF(t *testing.T) {
  87. pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe")
  88. assert.True(t, DetectContentType(pdf).IsPDF())
  89. assert.False(t, DetectContentType([]byte("plain text")).IsPDF())
  90. }
  91. func TestIsVideo(t *testing.T) {
  92. mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA")
  93. assert.True(t, DetectContentType(mp4).IsVideo())
  94. assert.False(t, DetectContentType([]byte("plain text")).IsVideo())
  95. }
  96. func TestIsAudio(t *testing.T) {
  97. mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
  98. assert.True(t, DetectContentType(mp3).IsAudio())
  99. assert.False(t, DetectContentType([]byte("plain text")).IsAudio())
  100. assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio())
  101. assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText()) // test ID3 tag for plain text
  102. assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
  103. }
  104. func TestDetectContentTypeFromReader(t *testing.T) {
  105. mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
  106. st, err := DetectContentTypeFromReader(bytes.NewReader(mp3))
  107. assert.NoError(t, err)
  108. assert.True(t, st.IsAudio())
  109. }
  110. func TestDetectContentTypeOgg(t *testing.T) {
  111. oggAudio, _ := hex.DecodeString("4f67675300020000000000000000352f0000000000007dc39163011e01766f72626973000000000244ac0000000000000071020000000000b8014f6767530000")
  112. st, err := DetectContentTypeFromReader(bytes.NewReader(oggAudio))
  113. assert.NoError(t, err)
  114. assert.True(t, st.IsAudio())
  115. oggVideo, _ := hex.DecodeString("4f676753000200000000000000007d9747ef000000009b59daf3012a807468656f7261030201001e00110001e000010e00020000001e00000001000001000001")
  116. st, err = DetectContentTypeFromReader(bytes.NewReader(oggVideo))
  117. assert.NoError(t, err)
  118. assert.True(t, st.IsVideo())
  119. }