diff options
author | Jason Song <i@wolfogre.com> | 2022-12-30 23:31:00 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-30 23:31:00 +0800 |
commit | d34f3a22137cd628f275407c495b7fbe5d88ec27 (patch) | |
tree | e463d8eae929a59555010f07bf4d99a8e331eb7d /modules | |
parent | 9dcaf14a148fdf748c41afc7e0aa2e6b3b273fd8 (diff) | |
download | gitea-d34f3a22137cd628f275407c495b7fbe5d88ec27.tar.gz gitea-d34f3a22137cd628f275407c495b7fbe5d88ec27.zip |
Fix sitemap (#22272)
Fix #22270.
Related to #18407.
The old code treated both sitemap and sitemap index as the format like:
```xml
...
<url>
<loc>http://localhost:3000/explore/users/sitemap-1.xml</loc>
</url>
...
```
Actually, it's incorrect for sitemap index, it should be:
```xml
...
<sitemap>
<loc>http://localhost:3000/explore/users/sitemap-1.xml</loc>
</sitemap>
...
```
See https://www.sitemaps.org/protocol.html
Co-authored-by: Lauris BH <lauris@nix.lv>
Co-authored-by: delvh <dev.lh@web.de>
Diffstat (limited to 'modules')
-rw-r--r-- | modules/sitemap/sitemap.go | 44 | ||||
-rw-r--r-- | modules/sitemap/sitemap_test.go | 192 |
2 files changed, 170 insertions, 66 deletions
diff --git a/modules/sitemap/sitemap.go b/modules/sitemap/sitemap.go index ceb65c1c8d..280ca1d710 100644 --- a/modules/sitemap/sitemap.go +++ b/modules/sitemap/sitemap.go @@ -11,48 +11,62 @@ import ( "time" ) -// sitemapFileLimit contains the maximum size of a sitemap file -const sitemapFileLimit = 50 * 1024 * 1024 +const ( + sitemapFileLimit = 50 * 1024 * 1024 // the maximum size of a sitemap file + urlsLimit = 50000 -// Url represents a single sitemap entry + schemaURL = "http://www.sitemaps.org/schemas/sitemap/0.9" + urlsetName = "urlset" + sitemapindexName = "sitemapindex" +) + +// URL represents a single sitemap entry type URL struct { URL string `xml:"loc"` LastMod *time.Time `xml:"lastmod,omitempty"` } -// SitemapUrl represents a sitemap +// Sitemap represents a sitemap type Sitemap struct { XMLName xml.Name Namespace string `xml:"xmlns,attr"` - URLs []URL `xml:"url"` + URLs []URL `xml:"url"` + Sitemaps []URL `xml:"sitemap"` } // NewSitemap creates a sitemap func NewSitemap() *Sitemap { return &Sitemap{ - XMLName: xml.Name{Local: "urlset"}, - Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9", + XMLName: xml.Name{Local: urlsetName}, + Namespace: schemaURL, } } -// NewSitemap creates a sitemap index. +// NewSitemapIndex creates a sitemap index. func NewSitemapIndex() *Sitemap { return &Sitemap{ - XMLName: xml.Name{Local: "sitemapindex"}, - Namespace: "http://www.sitemaps.org/schemas/sitemap/0.9", + XMLName: xml.Name{Local: sitemapindexName}, + Namespace: schemaURL, } } // Add adds a URL to the sitemap func (s *Sitemap) Add(u URL) { - s.URLs = append(s.URLs, u) + if s.XMLName.Local == sitemapindexName { + s.Sitemaps = append(s.Sitemaps, u) + } else { + s.URLs = append(s.URLs, u) + } } -// Write writes the sitemap to a response +// WriteTo writes the sitemap to a response func (s *Sitemap) WriteTo(w io.Writer) (int64, error) { - if len(s.URLs) > 50000 { - return 0, fmt.Errorf("The sitemap contains too many URLs: %d", len(s.URLs)) + if l := len(s.URLs); l > urlsLimit { + return 0, fmt.Errorf("The sitemap contains %d URLs, but only %d are allowed", l, urlsLimit) + } + if l := len(s.Sitemaps); l > urlsLimit { + return 0, fmt.Errorf("The sitemap contains %d sub-sitemaps, but only %d are allowed", l, urlsLimit) } buf := bytes.NewBufferString(xml.Header) if err := xml.NewEncoder(buf).Encode(s); err != nil { @@ -62,7 +76,7 @@ func (s *Sitemap) WriteTo(w io.Writer) (int64, error) { return 0, err } if buf.Len() > sitemapFileLimit { - return 0, fmt.Errorf("The sitemap is too big: %d", buf.Len()) + return 0, fmt.Errorf("The sitemap has %d bytes, but only %d are allowed", buf.Len(), sitemapFileLimit) } return buf.WriteTo(w) } diff --git a/modules/sitemap/sitemap_test.go b/modules/sitemap/sitemap_test.go index ab879b272e..1180463cd7 100644 --- a/modules/sitemap/sitemap_test.go +++ b/modules/sitemap/sitemap_test.go @@ -6,7 +6,6 @@ package sitemap import ( "bytes" "encoding/xml" - "fmt" "strings" "testing" "time" @@ -14,63 +13,154 @@ import ( "github.com/stretchr/testify/assert" ) -func TestOk(t *testing.T) { - testReal := func(s *Sitemap, name string, urls []URL, expected string) { - for _, url := range urls { - s.Add(url) - } - buf := &bytes.Buffer{} - _, err := s.WriteTo(buf) - assert.NoError(t, nil, err) - assert.Equal(t, xml.Header+"<"+name+" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">"+expected+"</"+name+">\n", buf.String()) +func TestNewSitemap(t *testing.T) { + ts := time.Unix(1651322008, 0).UTC() + + tests := []struct { + name string + urls []URL + want string + wantErr string + }{ + { + name: "empty", + urls: []URL{}, + want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` + + "" + + "</urlset>\n", + }, + { + name: "regular", + urls: []URL{ + {URL: "https://gitea.io/test1", LastMod: &ts}, + }, + want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` + + "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>" + + "</urlset>\n", + }, + { + name: "without lastmod", + urls: []URL{ + {URL: "https://gitea.io/test1"}, + }, + want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` + + "<url><loc>https://gitea.io/test1</loc></url>" + + "</urlset>\n", + }, + { + name: "multiple", + urls: []URL{ + {URL: "https://gitea.io/test1", LastMod: &ts}, + {URL: "https://gitea.io/test2", LastMod: nil}, + }, + want: xml.Header + `<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` + + "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>" + + "<url><loc>https://gitea.io/test2</loc></url>" + + "</urlset>\n", + }, + { + name: "too many urls", + urls: make([]URL, 50001), + wantErr: "The sitemap contains 50001 URLs, but only 50000 are allowed", + }, + { + name: "too big file", + urls: []URL{ + {URL: strings.Repeat("b", 50*1024*1024+1)}, + }, + wantErr: "The sitemap has 52428932 bytes, but only 52428800 are allowed", + }, } - test := func(urls []URL, expected string) { - testReal(NewSitemap(), "urlset", urls, expected) - testReal(NewSitemapIndex(), "sitemapindex", urls, expected) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := NewSitemap() + for _, url := range tt.urls { + s.Add(url) + } + buf := &bytes.Buffer{} + _, err := s.WriteTo(buf) + if tt.wantErr != "" { + assert.EqualError(t, err, tt.wantErr) + } else { + assert.NoError(t, err) + assert.Equalf(t, tt.want, buf.String(), "NewSitemap()") + } + }) } +} +func TestNewSitemapIndex(t *testing.T) { ts := time.Unix(1651322008, 0).UTC() - test( - []URL{}, - "", - ) - test( - []URL{ - {URL: "https://gitea.io/test1", LastMod: &ts}, + tests := []struct { + name string + urls []URL + want string + wantErr string + }{ + { + name: "empty", + urls: []URL{}, + want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` + + "" + + "</sitemapindex>\n", }, - "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>", - ) - test( - []URL{ - {URL: "https://gitea.io/test2", LastMod: nil}, + { + name: "regular", + urls: []URL{ + {URL: "https://gitea.io/test1", LastMod: &ts}, + }, + want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` + + "<sitemap><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></sitemap>" + + "</sitemapindex>\n", }, - "<url><loc>https://gitea.io/test2</loc></url>", - ) - test( - []URL{ - {URL: "https://gitea.io/test1", LastMod: &ts}, - {URL: "https://gitea.io/test2", LastMod: nil}, + { + name: "without lastmod", + urls: []URL{ + {URL: "https://gitea.io/test1"}, + }, + want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` + + "<sitemap><loc>https://gitea.io/test1</loc></sitemap>" + + "</sitemapindex>\n", + }, + { + name: "multiple", + urls: []URL{ + {URL: "https://gitea.io/test1", LastMod: &ts}, + {URL: "https://gitea.io/test2", LastMod: nil}, + }, + want: xml.Header + `<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">` + + "<sitemap><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></sitemap>" + + "<sitemap><loc>https://gitea.io/test2</loc></sitemap>" + + "</sitemapindex>\n", + }, + { + name: "too many sitemaps", + urls: make([]URL, 50001), + wantErr: "The sitemap contains 50001 sub-sitemaps, but only 50000 are allowed", + }, + { + name: "too big file", + urls: []URL{ + {URL: strings.Repeat("b", 50*1024*1024+1)}, + }, + wantErr: "The sitemap has 52428952 bytes, but only 52428800 are allowed", }, - "<url><loc>https://gitea.io/test1</loc><lastmod>2022-04-30T12:33:28Z</lastmod></url>"+ - "<url><loc>https://gitea.io/test2</loc></url>", - ) -} - -func TestTooManyURLs(t *testing.T) { - s := NewSitemap() - for i := 0; i < 50001; i++ { - s.Add(URL{URL: fmt.Sprintf("https://gitea.io/test%d", i)}) } - buf := &bytes.Buffer{} - _, err := s.WriteTo(buf) - assert.EqualError(t, err, "The sitemap contains too many URLs: 50001") -} - -func TestSitemapTooBig(t *testing.T) { - s := NewSitemap() - s.Add(URL{URL: strings.Repeat("b", sitemapFileLimit)}) - buf := &bytes.Buffer{} - _, err := s.WriteTo(buf) - assert.EqualError(t, err, "The sitemap is too big: 52428931") + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := NewSitemapIndex() + for _, url := range tt.urls { + s.Add(url) + } + buf := &bytes.Buffer{} + _, err := s.WriteTo(buf) + if tt.wantErr != "" { + assert.EqualError(t, err, tt.wantErr) + } else { + assert.NoError(t, err) + assert.Equalf(t, tt.want, buf.String(), "NewSitemapIndex()") + } + }) + } } |