diff options
author | yp05327 <576951401@qq.com> | 2024-10-02 04:25:08 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-01 15:25:08 -0400 |
commit | 70b7df0e5e5cce5a3561fa5a6a8abd4ebc902e68 (patch) | |
tree | 6532a12e349fe59e4c9eb66161246360aa6350d2 /services | |
parent | f4b8f6fc40ce2869135372a5c6ec6418d27ebfba (diff) | |
download | gitea-70b7df0e5e5cce5a3561fa5a6a8abd4ebc902e68.tar.gz gitea-70b7df0e5e5cce5a3561fa5a6a8abd4ebc902e68.zip |
Support repo license (#24872)
Close #278
Close #24076
## Solutions:
- Use
[google/licenseclassifier](https://github.com/google/licenseclassifier/)
Test result between
[google/licensecheck](https://github.com/google/licensecheck) and
[go-license-detector](https://github.com/go-enry/go-license-detector):
https://github.com/go-gitea/gitea/pull/24872#issuecomment-1560361167
Test result between
[google/licensecheck](https://github.com/google/licensecheck) and
[google/licenseclassifier](https://github.com/google/licenseclassifier/):
https://github.com/go-gitea/gitea/pull/24872#issuecomment-1576092178
- Generate License Convert Name List to avoid import license templates
with same contents
Gitea automatically get latest license data from[
spdx/license-list-data](https://github.com/spdx/license-list-data).
But unfortunately, some license templates have same contents. #20915
[click here to see the
list](https://github.com/go-gitea/gitea/pull/24872#issuecomment-1584141684)
So we will generate a list of these license templates with same contents
and create a new file to save the result when using `make
generate-license`. (Need to decide the save path)
- Save License info into a new table `repo_license`
Can easily support searching repo by license in the future.
## Screen shot
Single License:

Multiple Licenses:

Triggers:
- [x] Push commit to default branch
- [x] Create repo
- [x] Mirror repo
- [x] When Default Branch is changed, licenses should be updated
Todo:
- [x] Save Licenses info in to DB when there's a change to license file
in the commit
- [x] DB Migration
- [x] A nominal test?
- [x] Select which library to
use(https://github.com/go-gitea/gitea/pull/24872#issuecomment-1560361167)
- [x] API Support
- [x] Add repo license table
- ~Select license in settings if there are several licenses(Not
recommended)~
- License board(later, not in this PR)

---------
Co-authored-by: silverwind <me@silverwind.io>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: Denys Konovalov <kontakt@denyskon.de>
Co-authored-by: delvh <dev.lh@web.de>
Co-authored-by: KN4CK3R <admin@oldschoolhack.me>
Co-authored-by: 6543 <6543@obermui.de>
Co-authored-by: 6543 <m.huber@kithara.com>
Co-authored-by: a1012112796 <1012112796@qq.com>
Co-authored-by: techknowlogick <techknowlogick@gitea.com>
Diffstat (limited to 'services')
-rw-r--r-- | services/context/repo.go | 7 | ||||
-rw-r--r-- | services/convert/repository.go | 6 | ||||
-rw-r--r-- | services/cron/tasks_basic.go | 11 | ||||
-rw-r--r-- | services/migrations/gitea_uploader_test.go | 3 | ||||
-rw-r--r-- | services/mirror/mirror_pull.go | 9 | ||||
-rw-r--r-- | services/repository/branch.go | 8 | ||||
-rw-r--r-- | services/repository/create.go | 19 | ||||
-rw-r--r-- | services/repository/delete.go | 1 | ||||
-rw-r--r-- | services/repository/fork.go | 3 | ||||
-rw-r--r-- | services/repository/license.go | 205 | ||||
-rw-r--r-- | services/repository/license_test.go | 73 | ||||
-rw-r--r-- | services/repository/migrate.go | 5 | ||||
-rw-r--r-- | services/repository/repository.go | 7 |
13 files changed, 357 insertions, 0 deletions
diff --git a/services/context/repo.go b/services/context/repo.go index e0d3a0bfd3..c001255283 100644 --- a/services/context/repo.go +++ b/services/context/repo.go @@ -404,6 +404,13 @@ func repoAssignment(ctx *Context, repo *repo_model.Repository) { ctx.Data["PushMirrors"] = pushMirrors ctx.Data["RepoName"] = ctx.Repo.Repository.Name ctx.Data["IsEmptyRepo"] = ctx.Repo.Repository.IsEmpty + + repoLicenses, err := repo_model.GetRepoLicenses(ctx, ctx.Repo.Repository) + if err != nil { + ctx.ServerError("GetRepoLicenses", err) + return + } + ctx.Data["DetectedRepoLicenses"] = repoLicenses.StringList() } // RepoAssignment returns a middleware to handle repository assignment diff --git a/services/convert/repository.go b/services/convert/repository.go index 751260a45d..e026d0f440 100644 --- a/services/convert/repository.go +++ b/services/convert/repository.go @@ -175,6 +175,11 @@ func innerToRepo(ctx context.Context, repo *repo_model.Repository, permissionInR language = repo.PrimaryLanguage.Language } + repoLicenses, err := repo_model.GetRepoLicenses(ctx, repo) + if err != nil { + return nil + } + repoAPIURL := repo.APIURL() return &api.Repository{ @@ -238,6 +243,7 @@ func innerToRepo(ctx context.Context, repo *repo_model.Repository, permissionInR RepoTransfer: transfer, Topics: repo.Topics, ObjectFormatName: repo.ObjectFormatName, + Licenses: repoLicenses.StringList(), } } diff --git a/services/cron/tasks_basic.go b/services/cron/tasks_basic.go index 2a213ae515..fb5938745e 100644 --- a/services/cron/tasks_basic.go +++ b/services/cron/tasks_basic.go @@ -156,6 +156,16 @@ func registerCleanupPackages() { }) } +func registerSyncRepoLicenses() { + RegisterTaskFatal("sync_repo_licenses", &BaseConfig{ + Enabled: false, + RunAtStart: false, + Schedule: "@annually", + }, func(ctx context.Context, _ *user_model.User, config Config) error { + return repo_service.SyncRepoLicenses(ctx) + }) +} + func initBasicTasks() { if setting.Mirror.Enabled { registerUpdateMirrorTask() @@ -172,4 +182,5 @@ func initBasicTasks() { if setting.Packages.Enabled { registerCleanupPackages() } + registerSyncRepoLicenses() } diff --git a/services/migrations/gitea_uploader_test.go b/services/migrations/gitea_uploader_test.go index c9b9248098..f2379dadf8 100644 --- a/services/migrations/gitea_uploader_test.go +++ b/services/migrations/gitea_uploader_test.go @@ -26,6 +26,7 @@ import ( "code.gitea.io/gitea/modules/optional" "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/test" + repo_service "code.gitea.io/gitea/services/repository" "github.com/stretchr/testify/assert" ) @@ -302,6 +303,8 @@ func TestGiteaUploadUpdateGitForPullRequest(t *testing.T) { toRepoName := "migrated" uploader := NewGiteaLocalUploader(context.Background(), fromRepoOwner, fromRepoOwner.Name, toRepoName) uploader.gitServiceType = structs.GiteaService + + assert.NoError(t, repo_service.Init(context.Background())) assert.NoError(t, uploader.CreateRepo(&base.Repository{ Description: "description", OriginalURL: fromRepo.RepoPath(), diff --git a/services/mirror/mirror_pull.go b/services/mirror/mirror_pull.go index 9f7ffb29c9..654a50d11e 100644 --- a/services/mirror/mirror_pull.go +++ b/services/mirror/mirror_pull.go @@ -24,6 +24,7 @@ import ( "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" notify_service "code.gitea.io/gitea/services/notify" + repo_service "code.gitea.io/gitea/services/repository" ) // gitShortEmptySha Git short empty SHA @@ -559,6 +560,14 @@ func SyncPullMirror(ctx context.Context, repoID int64) bool { } } + // Update License + if err = repo_service.AddRepoToLicenseUpdaterQueue(&repo_service.LicenseUpdaterOptions{ + RepoID: m.Repo.ID, + }); err != nil { + log.Error("SyncMirrors [repo: %-v]: unable to add repo to license updater queue: %v", m.Repo, err) + return false + } + log.Trace("SyncMirrors [repo: %-v]: Successfully updated", m.Repo) return true diff --git a/services/repository/branch.go b/services/repository/branch.go index f5cdb72a7b..67df4363e4 100644 --- a/services/repository/branch.go +++ b/services/repository/branch.go @@ -612,6 +612,14 @@ func SetRepoDefaultBranch(ctx context.Context, repo *repo_model.Repository, gitR return err } + if !repo.IsEmpty { + if err := AddRepoToLicenseUpdaterQueue(&LicenseUpdaterOptions{ + RepoID: repo.ID, + }); err != nil { + log.Error("AddRepoToLicenseUpdaterQueue: %v", err) + } + } + notify_service.ChangeDefaultBranch(ctx, repo) return nil diff --git a/services/repository/create.go b/services/repository/create.go index 971793bcc6..282b2d3e58 100644 --- a/services/repository/create.go +++ b/services/repository/create.go @@ -303,6 +303,25 @@ func CreateRepositoryDirectly(ctx context.Context, doer, u *user_model.User, opt rollbackRepo.OwnerID = u.ID return fmt.Errorf("CreateRepository(git update-server-info): %w", err) } + + // update licenses + var licenses []string + if len(opts.License) > 0 { + licenses = append(licenses, ConvertLicenseName(opts.License)) + + stdout, _, err := git.NewCommand(ctx, "rev-parse", "HEAD"). + SetDescription(fmt.Sprintf("CreateRepository(git rev-parse HEAD): %s", repoPath)). + RunStdString(&git.RunOpts{Dir: repoPath}) + if err != nil { + log.Error("CreateRepository(git rev-parse HEAD) in %v: Stdout: %s\nError: %v", repo, stdout, err) + rollbackRepo = repo + rollbackRepo.OwnerID = u.ID + return fmt.Errorf("CreateRepository(git rev-parse HEAD): %w", err) + } + if err := repo_model.UpdateRepoLicenses(ctx, repo, stdout, licenses); err != nil { + return err + } + } return nil }); err != nil { if rollbackRepo != nil { diff --git a/services/repository/delete.go b/services/repository/delete.go index cd779b05c3..e580833140 100644 --- a/services/repository/delete.go +++ b/services/repository/delete.go @@ -140,6 +140,7 @@ func DeleteRepositoryDirectly(ctx context.Context, doer *user_model.User, repoID &git_model.Branch{RepoID: repoID}, &git_model.LFSLock{RepoID: repoID}, &repo_model.LanguageStat{RepoID: repoID}, + &repo_model.RepoLicense{RepoID: repoID}, &issues_model.Milestone{RepoID: repoID}, &repo_model.Mirror{RepoID: repoID}, &activities_model.Notification{RepoID: repoID}, diff --git a/services/repository/fork.go b/services/repository/fork.go index f074fd1082..e114555679 100644 --- a/services/repository/fork.go +++ b/services/repository/fork.go @@ -198,6 +198,9 @@ func ForkRepository(ctx context.Context, doer, owner *user_model.User, opts Fork if err := repo_model.CopyLanguageStat(ctx, opts.BaseRepo, repo); err != nil { log.Error("Copy language stat from oldRepo failed: %v", err) } + if err := repo_model.CopyLicense(ctx, opts.BaseRepo, repo); err != nil { + return nil, err + } gitRepo, err := gitrepo.OpenRepository(ctx, repo) if err != nil { diff --git a/services/repository/license.go b/services/repository/license.go new file mode 100644 index 0000000000..2453be3c87 --- /dev/null +++ b/services/repository/license.go @@ -0,0 +1,205 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package repository + +import ( + "context" + "fmt" + "io" + + "code.gitea.io/gitea/models/db" + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/container" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/json" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/options" + "code.gitea.io/gitea/modules/queue" + + licenseclassifier "github.com/google/licenseclassifier/v2" +) + +var ( + classifier *licenseclassifier.Classifier + LicenseFileName = "LICENSE" + licenseAliases map[string]string + + // licenseUpdaterQueue represents a queue to handle update repo licenses + licenseUpdaterQueue *queue.WorkerPoolQueue[*LicenseUpdaterOptions] +) + +func AddRepoToLicenseUpdaterQueue(opts *LicenseUpdaterOptions) error { + if opts == nil { + return nil + } + return licenseUpdaterQueue.Push(opts) +} + +func loadLicenseAliases() error { + if licenseAliases != nil { + return nil + } + + data, err := options.AssetFS().ReadFile("license", "etc", "license-aliases.json") + if err != nil { + return err + } + err = json.Unmarshal(data, &licenseAliases) + if err != nil { + return err + } + return nil +} + +func ConvertLicenseName(name string) string { + if err := loadLicenseAliases(); err != nil { + return name + } + + v, ok := licenseAliases[name] + if ok { + return v + } + return name +} + +func InitLicenseClassifier() error { + // threshold should be 0.84~0.86 or the test will be failed + classifier = licenseclassifier.NewClassifier(.85) + licenseFiles, err := options.AssetFS().ListFiles("license", true) + if err != nil { + return err + } + + existLicense := make(container.Set[string]) + if len(licenseFiles) > 0 { + for _, licenseFile := range licenseFiles { + licenseName := ConvertLicenseName(licenseFile) + if existLicense.Contains(licenseName) { + continue + } + existLicense.Add(licenseName) + data, err := options.License(licenseFile) + if err != nil { + return err + } + classifier.AddContent("License", licenseFile, licenseName, data) + } + } + return nil +} + +type LicenseUpdaterOptions struct { + RepoID int64 +} + +func repoLicenseUpdater(items ...*LicenseUpdaterOptions) []*LicenseUpdaterOptions { + ctx := graceful.GetManager().ShutdownContext() + + for _, opts := range items { + repo, err := repo_model.GetRepositoryByID(ctx, opts.RepoID) + if err != nil { + log.Error("repoLicenseUpdater [%d] failed: GetRepositoryByID: %v", opts.RepoID, err) + continue + } + if repo.IsEmpty { + continue + } + + gitRepo, err := gitrepo.OpenRepository(ctx, repo) + if err != nil { + log.Error("repoLicenseUpdater [%d] failed: OpenRepository: %v", opts.RepoID, err) + continue + } + defer gitRepo.Close() + + commit, err := gitRepo.GetBranchCommit(repo.DefaultBranch) + if err != nil { + log.Error("repoLicenseUpdater [%d] failed: GetBranchCommit: %v", opts.RepoID, err) + continue + } + if err = UpdateRepoLicenses(ctx, repo, commit); err != nil { + log.Error("repoLicenseUpdater [%d] failed: updateRepoLicenses: %v", opts.RepoID, err) + } + } + return nil +} + +func SyncRepoLicenses(ctx context.Context) error { + log.Trace("Doing: SyncRepoLicenses") + + if err := db.Iterate( + ctx, + nil, + func(ctx context.Context, repo *repo_model.Repository) error { + select { + case <-ctx.Done(): + return db.ErrCancelledf("before sync repo licenses for %s", repo.FullName()) + default: + } + return AddRepoToLicenseUpdaterQueue(&LicenseUpdaterOptions{RepoID: repo.ID}) + }, + ); err != nil { + log.Trace("Error: SyncRepoLicenses: %v", err) + return err + } + + log.Trace("Finished: SyncReposLicenses") + return nil +} + +// UpdateRepoLicenses will update repository licenses col if license file exists +func UpdateRepoLicenses(ctx context.Context, repo *repo_model.Repository, commit *git.Commit) error { + if commit == nil { + return nil + } + + b, err := commit.GetBlobByPath(LicenseFileName) + if err != nil && !git.IsErrNotExist(err) { + return fmt.Errorf("GetBlobByPath: %w", err) + } + + if git.IsErrNotExist(err) { + return repo_model.CleanRepoLicenses(ctx, repo) + } + + licenses := make([]string, 0) + if b != nil { + r, err := b.DataAsync() + if err != nil { + return err + } + defer r.Close() + + licenses, err = detectLicense(r) + if err != nil { + return fmt.Errorf("detectLicense: %w", err) + } + } + return repo_model.UpdateRepoLicenses(ctx, repo, commit.ID.String(), licenses) +} + +// detectLicense returns the licenses detected by the given content buff +func detectLicense(r io.Reader) ([]string, error) { + if r == nil { + return nil, nil + } + + matches, err := classifier.MatchFrom(r) + if err != nil { + return nil, err + } + if len(matches.Matches) > 0 { + results := make(container.Set[string], len(matches.Matches)) + for _, r := range matches.Matches { + if r.MatchType == "License" && !results.Contains(r.Variant) { + results.Add(r.Variant) + } + } + return results.Values(), nil + } + return nil, nil +} diff --git a/services/repository/license_test.go b/services/repository/license_test.go new file mode 100644 index 0000000000..39e9738145 --- /dev/null +++ b/services/repository/license_test.go @@ -0,0 +1,73 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package repository + +import ( + "fmt" + "strings" + "testing" + + repo_module "code.gitea.io/gitea/modules/repository" + + "github.com/stretchr/testify/assert" +) + +func Test_detectLicense(t *testing.T) { + type DetectLicenseTest struct { + name string + arg string + want []string + } + + tests := []DetectLicenseTest{ + { + name: "empty", + arg: "", + want: nil, + }, + { + name: "no detected license", + arg: "Copyright (c) 2023 Gitea", + want: nil, + }, + } + + repo_module.LoadRepoConfig() + err := loadLicenseAliases() + assert.NoError(t, err) + for _, licenseName := range repo_module.Licenses { + license, err := repo_module.GetLicense(licenseName, &repo_module.LicenseValues{ + Owner: "Gitea", + Email: "teabot@gitea.io", + Repo: "gitea", + Year: "2024", + }) + assert.NoError(t, err) + + tests = append(tests, DetectLicenseTest{ + name: fmt.Sprintf("single license test: %s", licenseName), + arg: string(license), + want: []string{ConvertLicenseName(licenseName)}, + }) + } + + err = InitLicenseClassifier() + assert.NoError(t, err) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + license, err := detectLicense(strings.NewReader(tt.arg)) + assert.NoError(t, err) + assert.Equal(t, tt.want, license) + }) + } + + result, err := detectLicense(strings.NewReader(tests[2].arg + tests[3].arg + tests[4].arg)) + assert.NoError(t, err) + t.Run("multiple licenses test", func(t *testing.T) { + assert.Equal(t, 3, len(result)) + assert.Contains(t, result, tests[2].want[0]) + assert.Contains(t, result, tests[3].want[0]) + assert.Contains(t, result, tests[4].want[0]) + }) +} diff --git a/services/repository/migrate.go b/services/repository/migrate.go index 2e901791b4..c627b46fab 100644 --- a/services/repository/migrate.go +++ b/services/repository/migrate.go @@ -172,6 +172,11 @@ func MigrateRepositoryGitData(ctx context.Context, u *user_model.User, return repo, fmt.Errorf("StoreMissingLfsObjectsInRepository: %w", err) } } + + // Update repo license + if err := AddRepoToLicenseUpdaterQueue(&LicenseUpdaterOptions{RepoID: repo.ID}); err != nil { + log.Error("Failed to add repo to license updater queue: %v", err) + } } ctx, committer, err := db.TxContext(ctx) diff --git a/services/repository/repository.go b/services/repository/repository.go index 5306e7d45c..59b4491132 100644 --- a/services/repository/repository.go +++ b/services/repository/repository.go @@ -18,6 +18,7 @@ import ( user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/queue" repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/structs" @@ -96,6 +97,12 @@ func PushCreateRepo(ctx context.Context, authUser, owner *user_model.User, repoN // Init start repository service func Init(ctx context.Context) error { + licenseUpdaterQueue = queue.CreateUniqueQueue(graceful.GetManager().ShutdownContext(), "repo_license_updater", repoLicenseUpdater) + if licenseUpdaterQueue == nil { + return fmt.Errorf("unable to create repo_license_updater queue") + } + go graceful.GetManager().RunWithCancel(licenseUpdaterQueue) + if err := repo_module.LoadRepoConfig(); err != nil { return err } |