diff --git a/assets/urls.yaml b/assets/urls.yaml new file mode 100644 index 00000000..c520a03f --- /dev/null +++ b/assets/urls.yaml @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +Apache-2.0: + - apache.org/licenses/license-2.0 + - apache.org/licenses/license-2.0.txt + - apache.org/license/license-2.0 + - apache.org/license/license-2.0.txt + - www.apache.org/licenses/license-2.0.txt + - opensource.org/licenses/apache-2.0 + - www.apache.org/license/LICENSE-2.0.txt + +MIT: + - bouncycastle.org/licence + - opensource.org/licenses/mit-license.php + - opensource.org/licenses/mit + - www.bouncycastle.org/licence.html + - www.opensource.org/licenses/mit-license.php + +LGPL-2.1: + - gnu.org/licenses/old-licenses/lgpl-2.1 + - www.gnu.org/licenses/old-licenses/lgpl-2.1.html + - www.gnu.org/licenses/lgpl-2.1.html + +GPL-2.0-with-classpath-exception: + - gnu.org/software/classpath/license + - www.gnu.org/software/classpath/license.html + +EPL-1.0: + - eclipse.org/legal/epl-v10 + - www.eclipse.org/legal/epl-v10.html + +BSD-2-Clause: + - opensource.org/licenses/BSD-2-Clause + +BSD-3-Clause: + - opensource.org/licenses/BSD-3-Clause + +CC0-1.0: + - creativecommons.org/publicdomain/zero/1.0/ + +MPL-1.1: + - www.mozilla.org/MPL/MPL-1.1.html diff --git a/pkg/deps/golang.go b/pkg/deps/golang.go index 7483005f..842be063 100644 --- a/pkg/deps/golang.go +++ b/pkg/deps/golang.go @@ -27,6 +27,7 @@ import ( "os/exec" "path/filepath" "regexp" + "strings" "github.com/apache/skywalking-eyes/internal/logger" "github.com/apache/skywalking-eyes/pkg/license" @@ -86,13 +87,15 @@ func (resolver *GoModResolver) ResolvePackages(modules []*packages.Module, confi for _, module := range modules { func() { for _, l := range config.Licenses { - if l.Name == module.Path && l.Version == module.Version { - report.Resolve(&Result{ - Dependency: module.Path, - LicenseSpdxID: l.License, - Version: module.Version, - }) - return + for _, version := range strings.Split(l.Version, ",") { + if l.Name == module.Path && version == module.Version { + report.Resolve(&Result{ + Dependency: module.Path, + LicenseSpdxID: l.License, + Version: module.Version, + }) + return + } } } err := resolver.ResolvePackageLicense(config, module, report) diff --git a/pkg/deps/jar.go b/pkg/deps/jar.go index 3c56188f..2e816ad5 100644 --- a/pkg/deps/jar.go +++ b/pkg/deps/jar.go @@ -29,31 +29,40 @@ import ( "github.com/apache/skywalking-eyes/internal/logger" "github.com/apache/skywalking-eyes/pkg/license" + + "github.com/bmatcuk/doublestar/v2" ) type JarResolver struct{} -func (resolver *JarResolver) CanResolve(jarFile string) bool { - return filepath.Ext(jarFile) == ".jar" +func (resolver *JarResolver) CanResolve(jarFiles string) bool { + return filepath.Ext(jarFiles) == ".jar" } -func (resolver *JarResolver) Resolve(jarFile string, config *ConfigDeps, report *Report) error { - state := NotFound - if err := resolver.ResolveJar(config, &state, jarFile, Unknown, report); err != nil { - dep := filepath.Base(jarFile) - logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", dep, state.String()) - report.Skip(&Result{ - Dependency: dep, - LicenseSpdxID: Unknown, - }) +func (resolver *JarResolver) Resolve(jarFiles string, config *ConfigDeps, report *Report) error { + fs, err := doublestar.Glob(jarFiles) + if err != nil { + return err + } + for _, jarFile := range fs { + state := NotFound + if err := resolver.ResolveJar(config, &state, jarFile, Unknown, report); err != nil { + dep := filepath.Base(jarFile) + logger.Log.Warnf("Failed to resolve the license of <%s>: %v\n", dep, state.String()) + report.Skip(&Result{ + Dependency: dep, + LicenseSpdxID: Unknown, + }) + } } - return nil } func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFile, version string, report *Report) error { dep := filepath.Base(jarFile) + logger.Log.Debugf("Resolving the license of <%s> from jar\n", dep) + compressedJar, err := zip.OpenReader(jarFile) if err != nil { return err @@ -66,10 +75,7 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFil for _, compressedFile := range compressedJar.File { archiveFile := compressedFile.Name switch { - case reHaveManifestFile.MatchString(archiveFile): - manifestFile = compressedFile - - case possibleLicenseFileName.MatchString(archiveFile): + case reMaybeLicense.MatchString(archiveFile): *state |= FoundLicenseInJarLicenseFile buf, err := resolver.ReadFileFromZip(compressedFile) if err != nil { @@ -77,6 +83,8 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFil } return resolver.IdentifyLicense(config, jarFile, dep, buf.String(), version, report) + case reHaveManifestFile.MatchString(archiveFile): + manifestFile = compressedFile } } @@ -90,11 +98,15 @@ func (resolver *JarResolver) ResolveJar(config *ConfigDeps, state *State, jarFil r := reSearchLicenseInManifestFile.FindStringSubmatch(content) if len(r) != 0 { + lcs := strings.TrimSpace(r[1]) + if l, err := license.Identify(lcs, config.Threshold); err == nil { + lcs = l + } report.Resolve(&Result{ Dependency: dep, LicenseFilePath: jarFile, LicenseContent: strings.TrimSpace(r[1]), - LicenseSpdxID: strings.TrimSpace(r[1]), + LicenseSpdxID: lcs, Version: version, }) return nil @@ -123,16 +135,21 @@ func (resolver *JarResolver) ReadFileFromZip(archiveFile *zip.File) (*bytes.Buff } func (resolver *JarResolver) IdentifyLicense(config *ConfigDeps, path, dep, content, version string, report *Report) error { - identifier, err := license.Identify(content, config.Threshold) - if err != nil { - return err + contents := strings.Split(content, "[, \\s]+") + identifiers := make([]string, 0, len(contents)) + for _, c := range contents { + identifier, err := license.Identify(c, config.Threshold) + if err != nil { + return err + } + identifiers = append(identifiers, identifier) } report.Resolve(&Result{ Dependency: dep, LicenseFilePath: path, LicenseContent: content, - LicenseSpdxID: identifier, + LicenseSpdxID: strings.Join(identifiers, " and "), Version: version, }) return nil diff --git a/pkg/deps/maven.go b/pkg/deps/maven.go index 23c6324b..d76ce365 100644 --- a/pkg/deps/maven.go +++ b/pkg/deps/maven.go @@ -32,6 +32,7 @@ import ( "golang.org/x/net/html/charset" "github.com/apache/skywalking-eyes/internal/logger" + "github.com/apache/skywalking-eyes/pkg/license" ) type MavenPomResolver struct { @@ -131,7 +132,7 @@ func (resolver *MavenPomResolver) LoadDependencies() ([]*Dependency, error) { cmd.Stdout = bufio.NewWriter(buf) cmd.Stderr = os.Stderr - logger.Log.Debugf("Run command: 「%v」, please wait", cmd.String()) + logger.Log.Debugf("Running command: [%v], please wait", cmd.String()) err := cmd.Run() if err != nil { return nil, err @@ -146,13 +147,15 @@ func (resolver *MavenPomResolver) ResolveDependencies(deps []*Dependency, config for _, dep := range deps { func() { for _, l := range config.Licenses { - if l.Name == fmt.Sprintf("%s:%s", dep.GroupID, dep.ArtifactID) && l.Version == dep.Version { - report.Resolve(&Result{ - Dependency: dep.Jar(), - LicenseSpdxID: l.License, - Version: dep.Version, - }) - return + for _, version := range strings.Split(l.Version, ",") { + if l.Name == fmt.Sprintf("%s:%s", strings.Join(dep.GroupID, "."), dep.ArtifactID) && version == dep.Version { + report.Resolve(&Result{ + Dependency: dep.Jar(), + LicenseSpdxID: l.License, + Version: dep.Version, + }) + return + } } } state := NotFound @@ -187,12 +190,14 @@ func (resolver *MavenPomResolver) ResolveLicenseFromPom(config *ConfigDeps, stat pom, err := resolver.ReadLicensesFromPom(pomFile) if err != nil { return err - } else if pom != nil && len(pom.Licenses) != 0 { + } + + if pom != nil && len(pom.Licenses) != 0 { report.Resolve(&Result{ Dependency: dep.Jar(), LicenseFilePath: pomFile, LicenseContent: pom.Raw(), - LicenseSpdxID: pom.AllLicenses(), + LicenseSpdxID: pom.AllLicenses(config), Version: dep.Version, }) @@ -266,7 +271,7 @@ loop: } var ( - reMaybeLicense = regexp.MustCompile(`(?i)licen[sc]e|copyright|copying`) + reMaybeLicense = regexp.MustCompile(`(?i)licen[sc]e|copyright|copying$`) reHaveManifestFile = regexp.MustCompile(`(?i)^(\S*/)?manifest\.MF$`) reSearchLicenseInManifestFile = regexp.MustCompile(`(?im)^.*?licen[cs]e.*?(http.+)`) ) @@ -310,18 +315,18 @@ func LoadDependenciesTree(data []byte) []*Dependency { stack := []Elem{} unique := make(map[string]struct{}) - reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) (\b.+):(\b.+):(\b.+):(\b.+):(\b.+)$`) + reFind := regexp.MustCompile(`(?im)^.*? ([| ]*)(\+-|\\-) (?P\b.+?):(?P\b.+?):(?P\b.+)(:\b.+)?:(?P\b.+):(?P\b.+?)$`) //nolint:lll // can't break down regex rawDeps := reFind.FindAllSubmatch(data, -1) deps := make([]*Dependency, 0, len(rawDeps)) for _, rawDep := range rawDeps { - gid := strings.Split(string(rawDep[3]), ".") + gid := strings.Split(string(rawDep[reFind.SubexpIndex("gid")]), ".") dep := &Dependency{ GroupID: gid, - ArtifactID: string(rawDep[4]), - Packaging: string(rawDep[5]), - Version: string(rawDep[6]), - Scope: string(rawDep[7]), + ArtifactID: string(rawDep[reFind.SubexpIndex("aid")]), + Packaging: string(rawDep[reFind.SubexpIndex("packaging")]), + Version: string(rawDep[reFind.SubexpIndex("version")]), + Scope: string(rawDep[reFind.SubexpIndex("scope")]), } if _, have := unique[dep.Path()]; have { @@ -391,7 +396,7 @@ func (s *State) String() string { m = append(m, "failed to resolve license content from manifest file found in jar") } - return strings.Join(m, "|") + return strings.Join(m, " | ") } type Dependency struct { @@ -457,12 +462,12 @@ type PomFile struct { } // AllLicenses return all licenses found in pom.xml file -func (pom *PomFile) AllLicenses() string { +func (pom *PomFile) AllLicenses(config *ConfigDeps) string { licenses := []string{} for _, l := range pom.Licenses { - licenses = append(licenses, l.Item()) + licenses = append(licenses, l.Item(config)) } - return strings.Join(licenses, ", ") + return strings.Join(licenses, " and ") } // Raw return raw data @@ -481,14 +486,23 @@ type XMLLicense struct { Comments string `xml:"comments,omitempty"` } -func (l *XMLLicense) Item() string { - return GetLicenseFromURL(l.URL) +func (l *XMLLicense) Item(config *ConfigDeps) string { + if l.URL != "" { + return GetLicenseFromURL(l.URL, config) + } + if l.Name != "" { + return l.Name + } + return l.URL } func (l *XMLLicense) Raw() string { return fmt.Sprintf(`License: {Name: %s, URL: %s, Distribution: %s, Comments: %s, }`, l.Name, l.URL, l.Distribution, l.Comments) } -func GetLicenseFromURL(url string) string { +func GetLicenseFromURL(url string, config *ConfigDeps) string { + if l, err := license.Identify(url, config.Threshold); err == nil { + return l + } return url } diff --git a/pkg/deps/npm.go b/pkg/deps/npm.go index 3fc1a0c6..4cdb28fa 100644 --- a/pkg/deps/npm.go +++ b/pkg/deps/npm.go @@ -212,9 +212,11 @@ func (resolver *NpmResolver) ResolvePkgFile(result *Result, pkgPath string, lice result.Version = packageInfo.Version for _, l := range licenses { - if l.Name == packageInfo.Name && l.Version == packageInfo.Version { - result.LicenseSpdxID = l.License - return nil + for _, version := range strings.Split(l.Version, ",") { + if l.Name == packageInfo.Name && version == packageInfo.Version { + result.LicenseSpdxID = l.License + return nil + } } } @@ -286,9 +288,11 @@ func (resolver *NpmResolver) ResolveLcsFile(result *Result, pkgPath string, conf return nil } for _, l := range config.Licenses { - if l.Name == info.Name() && l.Version == result.Version { - result.LicenseSpdxID = l.License - return nil + for _, version := range strings.Split(l.Version, ",") { + if l.Name == info.Name() && version == result.Version { + result.LicenseSpdxID = l.License + return nil + } } } identifier, err := license.Identify(string(content), config.Threshold) diff --git a/pkg/deps/resolve.go b/pkg/deps/resolve.go index b48eb906..8769d798 100644 --- a/pkg/deps/resolve.go +++ b/pkg/deps/resolve.go @@ -30,6 +30,7 @@ var Resolvers = []Resolver{ new(GoModResolver), new(NpmResolver), new(MavenPomResolver), + new(JarResolver), } func Resolve(config *ConfigDeps, report *Report) error { diff --git a/pkg/license/identifier.go b/pkg/license/identifier.go index 5c42cf31..d12376b4 100644 --- a/pkg/license/identifier.go +++ b/pkg/license/identifier.go @@ -24,6 +24,7 @@ import ( "sync" "github.com/google/licensecheck" + "gopkg.in/yaml.v3" "github.com/apache/skywalking-eyes/assets" "github.com/apache/skywalking-eyes/internal/logger" @@ -40,8 +41,24 @@ var ( // It will be initialized once. func scanner() *licensecheck.Scanner { scannerOnce.Do(func() { + licenses := licensecheck.BuiltinLicenses() + if bs, err := assets.Asset("urls.yaml"); err == nil { + licenseURLs := make(map[string][]string) + if err := yaml.Unmarshal(bs, &licenseURLs); err == nil { + logger.Log.Debug("license URLs:", licenseURLs) + for id, urls := range licenseURLs { + for _, url := range urls { + licenses = append(licenses, licensecheck.License{ + ID: id, + URL: strings.ToLower(url), + Type: licensecheck.Unknown, + }) + } + } + } + } var err error - _scanner, err = licensecheck.NewScanner(licensecheck.BuiltinLicenses()) + _scanner, err = licensecheck.NewScanner(licenses) if err != nil { logger.Log.Fatalf("licensecheck.NewScanner: %v", err) } @@ -57,10 +74,17 @@ func Identify(content string, threshold int) (string, error) { return "", fmt.Errorf("cannot identify the license, coverage: %.1f%%", coverage.Percent) } + seen := make(map[string]bool) + var sb strings.Builder sb.WriteString(coverage.Match[0].ID) + seen[coverage.Match[0].ID] = true for i := 1; i < len(coverage.Match); i++ { + if seen[coverage.Match[i].ID] { + continue + } + seen[coverage.Match[i].ID] = true sb.WriteString(" and ") sb.WriteString(coverage.Match[i].ID) } diff --git a/pkg/license/identifier_test.go b/pkg/license/identifier_test.go index ae5b6661..7ac706f0 100644 --- a/pkg/license/identifier_test.go +++ b/pkg/license/identifier_test.go @@ -323,6 +323,36 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. `, want: "BSD-3-Clause", }, + { + name: "URL http://www.apache.org/licenses/LICENSE-2.0", + content: "http://www.apache.org/licenses/LICENSE-2.0", + want: "Apache-2.0", + }, + { + name: "URL https://opensource.org/licenses/Apache-2.0", + content: "https://opensource.org/licenses/Apache-2.0", + want: "Apache-2.0", + }, + { + name: "URL http://www.apache.org/licenses/LICENSE-2.0.txt", + content: "http://www.apache.org/licenses/LICENSE-2.0.txt", + want: "Apache-2.0", + }, + { + name: "URL https://www.bouncycastle.org/licence.html", + content: "https://www.bouncycastle.org/licence.html", + want: "MIT", + }, + { + name: "URL https://www.bouncycastle.org/licence.html", + content: "http://www.gnu.org/software/classpath/license.html, http://opensource.org/licenses/MIT", + want: "GPL-2.0-with-classpath-exception and MIT", + }, + { + name: "URL", + content: "http://www.mozilla.org/MPL/MPL-1.1.html, http://www.gnu.org/licenses/lgpl-2.1.html", + want: "MPL-1.1 and LGPL-2.1", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {