aboutsummaryrefslogtreecommitdiffstats
path: root/modules/sitemap
diff options
context:
space:
mode:
authorJason Song <i@wolfogre.com>2022-12-30 23:31:00 +0800
committerGitHub <noreply@github.com>2022-12-30 23:31:00 +0800
commitd34f3a22137cd628f275407c495b7fbe5d88ec27 (patch)
treee463d8eae929a59555010f07bf4d99a8e331eb7d /modules/sitemap
parent9dcaf14a148fdf748c41afc7e0aa2e6b3b273fd8 (diff)
downloadgitea-d34f3a22137cd628f275407c495b7fbe5d88ec27.tar.gz
gitea-d34f3a22137cd628f275407c495b7fbe5d88ec27.zip
Fix sitemap (#22272)
Fix #22270. Related to #18407. The old code treated both sitemap and sitemap index as the format like: ```xml ... <url> <loc>http://localhost:3000/explore/users/sitemap-1.xml</loc> </url> ... ``` Actually, it's incorrect for sitemap index, it should be: ```xml ... <sitemap> <loc>http://localhost:3000/explore/users/sitemap-1.xml</loc> </sitemap> ... ``` See https://www.sitemaps.org/protocol.html Co-authored-by: Lauris BH <lauris@nix.lv> Co-authored-by: delvh <dev.lh@web.de>
Diffstat (limited to 'modules/sitemap')
-rw-r--r--modules/sitemap/sitemap.go44
-rw-r--r--modules/sitemap/sitemap_test.go192
2 files changed, 170 insertions, 66 deletions
diff --git a/modules/sitemap/sitemap.go b/modules/sitemap/sitemap.go
index ceb65c1c8d..280ca1d710 100644
--- a/modules/sitemap/sitemap.go
+++ b/modules/sitemap/sitemap.go
@@ -11,48 +11,62 @@ import (
"time"
)
-// sitemapFileLimit contains the maximum size of a sitemap file
-const sitemapFileLimit = 50 * 1024 * 1024
+const (
+ sitemapFileLimit = 50 * 1024 * 1024 // the maximum size of a sitemap file
+ urlsLimit = 50000
-// Url represents a single sitemap entry
+ schemaURL = "http://www.sitemaps.org/schemas/sitemap/0.9"
+ urlsetName = "urlset"
+ sitemapindexName = "sitemapindex"
+)
+
+// URL represents a single sitemap entry
type URL struct {
URL string `xml:"loc"`
LastMod *time.Time `xml:"lastmod,omitempty"`
}
-// SitemapUrl represents a sitemap
+// Sitemap represents a sitemap
type Sitemap struct {
XMLName xml.Name
Namespace string `xml:"xmlns,attr"`
- URLs []URL `xml:"url"`
+ URLs []URL `xml:"url"`
+ Sitemaps []URL `xml:"sitemap"`
}
// NewSitemap creates a sitemap
func NewSitemap() *Sitemap {
return &Sitemap{
- XMLName: xml.Name{Local: "urlset"},
- Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9",
+ XMLName: xml.Name{Local: urlsetName},
+ Namespace: schemaURL,
}
}
-// NewSitemap creates a sitemap index.
+// NewSitemapIndex creates a sitemap index.
func NewSitemapIndex() *Sitemap {
return &Sitemap{
- XMLName: xml.Name{Local: "sitemapindex"},
- Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9",
+ XMLName: xml.Name{Local: sitemapindexName},
+ Namespace: schemaURL,
}
}
// Add adds a URL to the sitemap
func (s *Sitemap) Add(u URL) {
- s.URLs = append(s.URLs, u)
+ if s.XMLName.Local == sitemapindexName {
+ s.Sitemaps = append(s.Sitemaps, u)
+ } else {
+ s.URLs = append(s.URLs, u)
+ }
}
-// Write writes the sitemap to a response
+// WriteTo writes the sitemap to a response
func (s *Sitemap) WriteTo(w io.Writer) (int64, error) {
- if len(s.URLs) > 50000 {
- return 0, fmt.Errorf("The sitemap contains too many URLs: %d", len(s.URLs))
+ if l := len(s.URLs); l > urlsLimit {
+ return 0, fmt.Errorf("The sitemap contains %d URLs, but only %d are allowed", l, urlsLimit)
+ }
+ if l := len(s.Sitemaps); l > urlsLimit {
+ return 0, fmt.Errorf("The sitemap contains %d sub-sitemaps, but only %d are allowed", l, urlsLimit)
}
buf := bytes.NewBufferString(xml.Header)
if err := xml.NewEncoder(buf).Encode(s); err != nil {
@@ -62,7 +76,7 @@ func (s *Sitemap) WriteTo(w io.Writer) (int64, error) {
return 0, err
}
if buf.Len() > sitemapFileLimit {
- return 0, fmt.Errorf("The sitemap is too big: %d", buf.Len())
+ return 0, fmt.Errorf("The sitemap has %d bytes, but only %d are allowed", buf.Len(), sitemapFileLimit)
}
return buf.WriteTo(w)
}
diff --git a/modules/sitemap/sitemap_test.go b/modules/sitemap/sitemap_test.go
index ab879b272e..1180463cd7 100644
--- a/modules/sitemap/sitemap_test.go
+++ b/modules/sitemap/sitemap_test.go
@@ -6,7 +6,6 @@ package sitemap
import (
"bytes"
"encoding/xml"
- "fmt"
"strings"
"testing"
"time"
@@ -14,63 +13,154 @@ import (
"github.com/stretchr/testify/assert"
)
-func TestOk(t *testing.T) {
- testReal := func(s *Sitemap, name string, urls []URL, expected string) {
- for _, url := range urls {
- s.Add(url)
- }
- buf := &bytes.Buffer{}
- _, err := s.WriteTo(buf)
- assert.NoError(t, nil, err)
- assert.Equal(t, xml.Header+"<"+name+" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"+expected+"</"+name+">\n", buf.String())
+func TestNewSitemap(t *testing.T) {
+ ts := time.Unix(1651322008, 0).UTC()
+
+ tests := []struct {
+ name string
+ urls []URL
+ want string
+ wantErr string
+ }{
+ {
+ name: "empty",
+ urls: []URL{},
+ want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "" +
+ "</urlset>\n",
+ },
+ {
+ name: "regular",
+ urls: []URL{
+ {URL: "https://gitea.io/test1", LastMod: &ts},
+ },
+ want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>" +
+ "</urlset>\n",
+ },
+ {
+ name: "without lastmod",
+ urls: []URL{
+ {URL: "https://gitea.io/test1"},
+ },
+ want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<url><loc>https://gitea.io/test1</loc></url>" +
+ "</urlset>\n",
+ },
+ {
+ name: "multiple",
+ urls: []URL{
+ {URL: "https://gitea.io/test1", LastMod: &ts},
+ {URL: "https://gitea.io/test2", LastMod: nil},
+ },
+ want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>" +
+ "<url><loc>https://gitea.io/test2</loc></url>" +
+ "</urlset>\n",
+ },
+ {
+ name: "too many urls",
+ urls: make([]URL, 50001),
+ wantErr: "The sitemap contains 50001 URLs, but only 50000 are allowed",
+ },
+ {
+ name: "too big file",
+ urls: []URL{
+ {URL: strings.Repeat("b", 50*1024*1024+1)},
+ },
+ wantErr: "The sitemap has 52428932 bytes, but only 52428800 are allowed",
+ },
}
- test := func(urls []URL, expected string) {
- testReal(NewSitemap(), "urlset", urls, expected)
- testReal(NewSitemapIndex(), "sitemapindex", urls, expected)
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ s := NewSitemap()
+ for _, url := range tt.urls {
+ s.Add(url)
+ }
+ buf := &bytes.Buffer{}
+ _, err := s.WriteTo(buf)
+ if tt.wantErr != "" {
+ assert.EqualError(t, err, tt.wantErr)
+ } else {
+ assert.NoError(t, err)
+ assert.Equalf(t, tt.want, buf.String(), "NewSitemap()")
+ }
+ })
}
+}
+func TestNewSitemapIndex(t *testing.T) {
ts := time.Unix(1651322008, 0).UTC()
- test(
- []URL{},
- "",
- )
- test(
- []URL{
- {URL: "https://gitea.io/test1", LastMod: &ts},
+ tests := []struct {
+ name string
+ urls []URL
+ want string
+ wantErr string
+ }{
+ {
+ name: "empty",
+ urls: []URL{},
+ want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "" +
+ "</sitemapindex>\n",
},
- "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>",
- )
- test(
- []URL{
- {URL: "https://gitea.io/test2", LastMod: nil},
+ {
+ name: "regular",
+ urls: []URL{
+ {URL: "https://gitea.io/test1", LastMod: &ts},
+ },
+ want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<sitemap><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></sitemap>" +
+ "</sitemapindex>\n",
},
- "<url><loc>https://gitea.io/test2</loc></url>",
- )
- test(
- []URL{
- {URL: "https://gitea.io/test1", LastMod: &ts},
- {URL: "https://gitea.io/test2", LastMod: nil},
+ {
+ name: "without lastmod",
+ urls: []URL{
+ {URL: "https://gitea.io/test1"},
+ },
+ want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<sitemap><loc>https://gitea.io/test1</loc></sitemap>" +
+ "</sitemapindex>\n",
+ },
+ {
+ name: "multiple",
+ urls: []URL{
+ {URL: "https://gitea.io/test1", LastMod: &ts},
+ {URL: "https://gitea.io/test2", LastMod: nil},
+ },
+ want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` +
+ "<sitemap><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></sitemap>" +
+ "<sitemap><loc>https://gitea.io/test2</loc></sitemap>" +
+ "</sitemapindex>\n",
+ },
+ {
+ name: "too many sitemaps",
+ urls: make([]URL, 50001),
+ wantErr: "The sitemap contains 50001 sub-sitemaps, but only 50000 are allowed",
+ },
+ {
+ name: "too big file",
+ urls: []URL{
+ {URL: strings.Repeat("b", 50*1024*1024+1)},
+ },
+ wantErr: "The sitemap has 52428952 bytes, but only 52428800 are allowed",
},
- "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>"+
- "<url><loc>https://gitea.io/test2</loc></url>",
- )
-}
-
-func TestTooManyURLs(t *testing.T) {
- s := NewSitemap()
- for i := 0; i < 50001; i++ {
- s.Add(URL{URL: fmt.Sprintf("https://gitea.io/test%d", i)})
}
- buf := &bytes.Buffer{}
- _, err := s.WriteTo(buf)
- assert.EqualError(t, err, "The sitemap contains too many URLs: 50001")
-}
-
-func TestSitemapTooBig(t *testing.T) {
- s := NewSitemap()
- s.Add(URL{URL: strings.Repeat("b", sitemapFileLimit)})
- buf := &bytes.Buffer{}
- _, err := s.WriteTo(buf)
- assert.EqualError(t, err, "The sitemap is too big: 52428931")
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ s := NewSitemapIndex()
+ for _, url := range tt.urls {
+ s.Add(url)
+ }
+ buf := &bytes.Buffer{}
+ _, err := s.WriteTo(buf)
+ if tt.wantErr != "" {
+ assert.EqualError(t, err, tt.wantErr)
+ } else {
+ assert.NoError(t, err)
+ assert.Equalf(t, tt.want, buf.String(), "NewSitemapIndex()")
+ }
+ })
+ }
}