From 1aa3ad45d89e5590fa7b9e7da0ad5a28e2c37e08 Mon Sep 17 00:00:00 2001 From: Julian Zhu Date: Fri, 22 May 2026 22:11:09 +0800 Subject: [PATCH 01/10] chore: ignore local build artifacts Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 8cf2792..24248be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # Binaries for programs and plugins +go2spec *.exe *.exe~ *.dll @@ -24,6 +25,11 @@ go.work.sum # env file .env +# Local go2spec validation/output artifacts +comparison-runs/ +go-github-sandrolain-httpcache/ +go-github-sandrolain-httpcache_*.orig.tar.gz + # Editor/IDE .idea/ .vscode/ -- 2.52.0 From f41dfb5711774bf7080433047d6274a152622afd Mon Sep 17 00:00:00 2001 From: Julian Zhu Date: Fri, 22 May 2026 22:15:35 +0800 Subject: [PATCH 02/10] fix(spec): pass package path to library subpackage description Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- spec.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec.go b/spec.go index 029fef0..d5145a2 100644 --- a/spec.go +++ b/spec.go @@ -72,7 +72,7 @@ func writeSpec(dir, gopkg, openRuyiSrc, openRuyiLib, openRuyiProgram, version st // Header fmt.Fprintf(f, "Name: %s\n", openRuyiSrc) - // Some times typeLibrary is treat as typeProgram, + // Some times typeLibrary is treat as typeProgram, // So we add an additional Name line, and keep one of those mannually switch pkgType { case typeProgram: @@ -175,7 +175,7 @@ func writeRPMLibrarySubpackage(f *os.File, gopkg, openRuyiLib, openRuyiSrc, long fmt.Fprintf(f, "%%description -n %s\n", openRuyiLib) fmt.Fprintf(f, "%s\n", longdesc) fmt.Fprintf(f, "\n") - fmt.Fprintf(f, "This package provides the Go source files of %s for development.\n") + fmt.Fprintf(f, "This package provides the Go source files of %s for development.\n", gopkg) } // For program subpackage -- 2.52.0 From 437d50ba0f5942ff970dddcf2a1b79a37b341671 Mon Sep 17 00:00:00 2001 From: Julian Zhu Date: Fri, 22 May 2026 22:18:13 +0800 Subject: [PATCH 03/10] feat(hosters): add go.yaml.in short host mapping Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pack.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pack.go b/pack.go index b70aa62..ae0a5c6 100644 --- a/pack.go +++ b/pack.go @@ -478,6 +478,7 @@ func shortHostName(gopkg string, allowUnknownHoster bool) (host string, err erro "go.opentelemetry.io": "opentelemetry", "go.step.sm": "step", "go.uber.org": "uber", + "go.yaml.in": "yaml", "go4.org": "go4", "gocloud.dev": "gocloud", "golang.org": "golang", -- 2.52.0 From 2758d5f404009b315cee2a57b65fe54dd6c20188 Mon Sep 17 00:00:00 2001 From: Julian Zhu Date: Fri, 22 May 2026 22:20:38 +0800 Subject: [PATCH 04/10] refactor(pack): split runtime and test dependencies Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pack.go | 145 ++++++++++++++++++++++++++++++++++++++------------------ spec.go | 16 ++++--- 2 files changed, 108 insertions(+), 53 deletions(-) diff --git a/pack.go b/pack.go index ae0a5c6..48f37d6 100644 --- a/pack.go +++ b/pack.go @@ -1,6 +1,7 @@ package main import ( + "encoding/json" "errors" "flag" "fmt" @@ -11,6 +12,7 @@ import ( "os" "os/exec" "path/filepath" + "sort" "strings" "golang.org/x/net/publicsuffix" @@ -29,19 +31,21 @@ const ( // upstream describes the upstream repo we are about to package. type upstream struct { - rr *vcs.RepoRoot - tarPath string // path to the downloaded or generated orig tarball tempfile - compression string // compression method, either "gz" or "xz" - version string // upstream version number, e.g. 0.0~git20180204.1d24609 - tag string // Latest upstream tag, if any - commitIsh string // commit-ish corresponding to upstream version to be packaged - remote string // git remote, set to short hostname if upstream git history is included - firstMain string // import path of the first main package within repo, if any - vendorDirs []string // all vendor sub directories, relative to the repo directory - repoDeps []string // the repository paths of all dependencies (e.g. github.com/zyedidia/glob) - hasGodeps bool // whether the Godeps/_workspace directory exists - hasRelease bool // whether any release tags exist, for debian/watch - isRelease bool // whether what we end up packaging is a tagged release + rr *vcs.RepoRoot + tarPath string // path to the downloaded or generated orig tarball tempfile + compression string // compression method, either "gz" or "xz" + version string // upstream version number, e.g. 0.0~git20180204.1d24609 + tag string // Latest upstream tag, if any + commitIsh string // commit-ish corresponding to upstream version to be packaged + remote string // git remote, set to short hostname if upstream git history is included + firstMain string // import path of the first main package within repo, if any + vendorDirs []string // all vendor sub directories, relative to the repo directory + repoDeps []string // all non-stdlib imports needed for build or tests + repoRunDeps []string // non-stdlib imports needed by normal builds, excluding test-only imports + repoTestDeps []string // non-stdlib imports needed only by tests + hasGodeps bool // whether the Godeps/_workspace directory exists + hasRelease bool // whether any release tags exist, for debian/watch + isRelease bool // whether what we end up packaging is a tagged release } var errUnsupportedHoster = errors.New("unsupported hoster") @@ -263,10 +267,69 @@ func (u *upstream) findMains(gopath, repo string) error { return nil } +type goListPackage struct { + ImportPath string + Imports []string + TestImports []string + XTestImports []string + Error *goListPackageError +} + +type goListPackageError struct { + Err string +} + +func addGoDependencies(deps map[string]bool, repo string, imports []string) { + for _, p := range imports { + if p == "" { + continue + } + // Strip packages that are included in the repository we are packaging. + if strings.HasPrefix(p, repo+"/") || p == repo { + continue + } + if p == "C" { + // TODO: maybe parse the comments to figure out C deps from pkg-config files? + continue + } + deps[p] = true + } +} + +func sortedDependencySet(deps map[string]bool) []string { + result := make([]string, 0, len(deps)) + for dep := range deps { + result = append(result, dep) + } + sort.Strings(result) + return result +} + +// setRepoDependencies stores three views of imports: repoDeps is the union for +// BuildRequires, repoRunDeps is non-test imports for library Requires, and +// repoTestDeps is imports used only by tests. +func (u *upstream) setRepoDependencies(runtimeDeps, testDeps map[string]bool) { + buildDeps := make(map[string]bool, len(runtimeDeps)+len(testDeps)) + for dep := range runtimeDeps { + buildDeps[dep] = true + } + testOnlyDeps := make(map[string]bool) + for dep := range testDeps { + buildDeps[dep] = true + if !runtimeDeps[dep] { + testOnlyDeps[dep] = true + } + } + + u.repoDeps = sortedDependencySet(buildDeps) + u.repoRunDeps = sortedDependencySet(runtimeDeps) + u.repoTestDeps = sortedDependencySet(testOnlyDeps) +} + func (u *upstream) findDependencies(gopath, repo string) error { log.Printf("Determining dependencies\n") - cmd := exec.Command("go", "list", "-e", "-f", "{{join .Imports \"\\n\"}}\n{{join .TestImports \"\\n\"}}\n{{join .XTestImports \"\\n\"}}", repo+"/...") + cmd := exec.Command("go", "list", "-e", "-json", repo+"/...") cmd.Dir = filepath.Join(gopath, "src", repo) cmd.Env = passthroughEnv() cmd.Stderr = os.Stderr @@ -276,33 +339,37 @@ func (u *upstream) findDependencies(gopath, repo string) error { log.Println("WARNING: In findDependencies:", fmt.Errorf("%q: %w", cmd.Args, err)) // See https://bugs.debian.org/992610 log.Printf("Retrying without appending \"/...\" to repo") - cmd = exec.Command("go", "list", "-e", "-f", "{{join .Imports \"\\n\"}}\n{{join .TestImports \"\\n\"}}\n{{join .XTestImports \"\\n\"}}", repo) + cmd = exec.Command("go", "list", "-e", "-json", repo) cmd.Dir = filepath.Join(gopath, "src", repo) cmd.Env = passthroughEnv() cmd.Stderr = os.Stderr out, err = cmd.Output() if err != nil { - log.Println("WARNING: In findDependencies:", fmt.Errorf("%q: %w", cmd.Args, err)) + return fmt.Errorf("go list dependencies: %q: %w", cmd.Args, err) } } - godependencies := make(map[string]bool) - for _, p := range strings.Split(strings.TrimSpace(string(out)), "\n") { - if p == "" { - continue // skip separators between import types + runtimeDeps := make(map[string]bool) + testDeps := make(map[string]bool) + dec := json.NewDecoder(strings.NewReader(string(out))) + for { + var pkg goListPackage + err := dec.Decode(&pkg) + if errors.Is(err, io.EOF) { + break } - // Strip packages that are included in the repository we are packaging. - if strings.HasPrefix(p, repo+"/") || p == repo { - continue + if err != nil { + return fmt.Errorf("decode go list dependency output: %w", err) } - if p == "C" { - // TODO: maybe parse the comments to figure out C deps from pkg-config files? - } else { - godependencies[p] = true + if pkg.Error != nil && pkg.Error.Err != "" { + log.Printf("WARNING: go list reported package load error for %q: %s\n", pkg.ImportPath, pkg.Error.Err) } + addGoDependencies(runtimeDeps, repo, pkg.Imports) + addGoDependencies(testDeps, repo, pkg.TestImports) + addGoDependencies(testDeps, repo, pkg.XTestImports) } - if len(godependencies) == 0 { + if len(runtimeDeps) == 0 && len(testDeps) == 0 { return nil } @@ -317,7 +384,8 @@ func (u *upstream) findDependencies(gopath, repo string) error { } for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { - delete(godependencies, line) + delete(runtimeDeps, line) + delete(testDeps, line) } // Resolve all packages to the root of their repository. @@ -336,11 +404,7 @@ func (u *upstream) findDependencies(gopath, repo string) error { // u.repoDeps = append(u.repoDeps, root) //} - // Alternatively, just list all import paths as dependencies. - u.repoDeps = make([]string, 0, len(godependencies)) - for dep := range godependencies { - u.repoDeps = append(u.repoDeps, dep) - } + u.setRepoDependencies(runtimeDeps, testDeps) return nil } @@ -735,19 +799,8 @@ func mainPack(args []string, usage func()) { log.Fatalf("Could not create repository: %v\n", err) } - seen := make(map[string]bool) - pkgdependencies := make([]string, 0, len(u.repoDeps)) - for _, dep := range u.repoDeps { - pkgname := nameFromGopkg(dep, typeLibrary, "", allowUnknownHoster) - if !seen[pkgname] { - seen[pkgname] = true - pkgdependencies = append(pkgdependencies, pkgname) - } - } - // optional: sort.Strings(debdependencies) - if err := writeSpec(dir, gopkg, openRuyiSrc, openRuyiLib, openRuyiProgram, u.version, - pkgType, pkgdependencies, u); err != nil { + pkgType, u); err != nil { log.Fatalf("Could not create spec file: %v\n", err) } diff --git a/spec.go b/spec.go index d5145a2..4e88042 100644 --- a/spec.go +++ b/spec.go @@ -11,7 +11,7 @@ import ( ) func writeSpec(dir, gopkg, openRuyiSrc, openRuyiLib, openRuyiProgram, version string, - pkgType packageType, dependencies []string, u *upstream) error { + pkgType packageType, u *upstream) error { f, err := os.Create(filepath.Join(dir, "", openRuyiSrc+".spec")) if err != nil { @@ -105,26 +105,28 @@ func writeSpec(dir, gopkg, openRuyiSrc, openRuyiLib, openRuyiProgram, version st fmt.Fprintf(f, "BuildRequires: go\n") fmt.Fprintf(f, "BuildRequires: go-rpm-macros\n") // And other BuildRequires from dependencies - rpmDeps := convertDependenciesToRPM(u.repoDeps) - sort.Strings(rpmDeps) - for _, dep := range rpmDeps { + rpmBuildDeps := convertDependenciesToRPM(u.repoDeps) + sort.Strings(rpmBuildDeps) + for _, dep := range rpmBuildDeps { fmt.Fprintf(f, "BuildRequires: %s\n", dep) } + rpmRuntimeDeps := convertDependenciesToRPM(u.repoRunDeps) + sort.Strings(rpmRuntimeDeps) // For different package types, write different sections switch pkgType { case typeLibrary: - writeRPMLibraryPackage(f, gopkg, openRuyiLib, longdescription, rpmDeps) + writeRPMLibraryPackage(f, gopkg, openRuyiLib, longdescription, rpmRuntimeDeps) case typeProgram: log.Printf("Nothing to do for program package.\n") // TODO: what can this be used for? ExclusiveArch %%{go_arches}? // writeRPMProgramPackage(f, gopkg, openRuyiProgram, longdescription) case typeLibraryProgram: - writeRPMLibraryPackage(f, gopkg, openRuyiLib, longdescription, rpmDeps) + writeRPMLibraryPackage(f, gopkg, openRuyiLib, longdescription, rpmRuntimeDeps) writeRPMProgramSubpackage(f, gopkg, openRuyiProgram, openRuyiSrc, description) case typeProgramLibrary: //writeRPMProgramPackage(f, gopkg, openRuyiProgram, longdescription) - writeRPMLibrarySubpackage(f, gopkg, openRuyiLib, openRuyiSrc, longdescription, rpmDeps) + writeRPMLibrarySubpackage(f, gopkg, openRuyiLib, openRuyiSrc, longdescription, rpmRuntimeDeps) default: log.Fatalf("Invalid pkgType %d in writeRPMSpec(), aborting", pkgType) } -- 2.52.0 From dbbf920578d5efde2c2c1a8583dcd3b7bd0f1b44 Mon Sep 17 00:00:00 2001 From: Julian Zhu Date: Fri, 22 May 2026 22:23:02 +0800 Subject: [PATCH 05/10] feat(pack): fall back to git archive when source download fails Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pack.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pack.go b/pack.go index 48f37d6..b3a0f72 100644 --- a/pack.go +++ b/pack.go @@ -208,7 +208,7 @@ func (u *upstream) tar(gopath, repo string) error { } else if err == errUnsupportedHoster { log.Printf("INFO: Hoster does not provide release tarball\n") } else { - return fmt.Errorf("tarball from hoster: %w", err) + log.Printf("WARNING: Could not download release tarball from hoster, falling back to local git archive: %v\n", err) } } } -- 2.52.0 From e28166a77312b2cfd6307bb8b0fd5560f11184b6 Mon Sep 17 00:00:00 2001 From: Julian Zhu Date: Fri, 22 May 2026 22:26:20 +0800 Subject: [PATCH 06/10] feat(pkgsite): add pkg.go.dev v1beta client Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- pkgsite.go | 300 ++++++++++++++++++++++++++++++++++++++++++++++++ pkgsite_test.go | 107 +++++++++++++++++ 2 files changed, 407 insertions(+) create mode 100644 pkgsite.go create mode 100644 pkgsite_test.go diff --git a/pkgsite.go b/pkgsite.go new file mode 100644 index 0000000..1519950 --- /dev/null +++ b/pkgsite.go @@ -0,0 +1,300 @@ +package main + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "sort" + "strings" + "sync" + "time" +) + +const pkgsiteAPIBase = "https://pkg.go.dev/v1beta" +const pkgsiteMaxResponseBytes = 20 << 20 + +var ( + pkgsiteHTTPClient = http.DefaultClient + pkgsiteMu sync.Mutex + pkgsiteInfoCache = make(map[string]*pkgsiteInfo) +) + +type pkgsiteInfo struct { + Package pkgsitePackage + Module pkgsiteModule +} + +type pkgsitePackage struct { + ModulePath string `json:"modulePath"` + Version string `json:"version"` + Path string `json:"path"` + Name string `json:"name"` + Synopsis string `json:"synopsis"` + Imports []string `json:"imports"` + Licenses []pkgsiteLicense `json:"licenses"` + IsRedistributable bool `json:"isRedistributable"` + IsStandardLibrary bool `json:"isStandardLibrary"` + AmbiguousCandidates []pkgsiteCandidate +} + +type pkgsiteModule struct { + Path string `json:"path"` + Version string `json:"version"` + RepoURL string `json:"repoUrl"` + Readme *pkgsiteReadme `json:"readme"` + Licenses []pkgsiteLicense `json:"licenses"` + IsRedistributable bool `json:"isRedistributable"` + IsStandardLibrary bool `json:"isStandardLibrary"` + HasGoMod bool `json:"hasGoMod"` +} + +type pkgsiteReadme struct { + Contents string `json:"contents"` + Filepath string `json:"filepath"` +} + +type pkgsiteLicense struct { + Types []string `json:"types"` + FilePath string `json:"filePath"` + Contents string `json:"contents"` +} + +type pkgsiteCandidate struct { + ModulePath string `json:"modulePath"` + PackagePath string `json:"packagePath"` +} + +type pkgsiteAPIError struct { + Code int `json:"code"` + Message string `json:"message"` + Fixes []string `json:"fixes"` + Candidates []pkgsiteCandidate `json:"candidates"` + Status string `json:"-"` +} + +func (e *pkgsiteAPIError) Error() string { + if e.Message != "" { + return e.Message + } + if e.Status != "" { + return e.Status + } + if e.Code != 0 { + return fmt.Sprintf("pkgsite API error: HTTP %d", e.Code) + } + return "pkgsite API error" +} + +func pkgsiteEscapedPath(importPath string) string { + parts := strings.Split(strings.Trim(importPath, "/"), "/") + for i, part := range parts { + parts[i] = url.PathEscape(part) + } + return strings.Join(parts, "/") +} + +func pkgsiteURL(endpoint, importPath string, values url.Values) string { + u := pkgsiteAPIBase + "/" + endpoint + "/" + pkgsiteEscapedPath(importPath) + if len(values) > 0 { + u += "?" + values.Encode() + } + return u +} + +// pkgsiteGetJSON performs a pkg.go.dev v1beta GET with up to 3 attempts and +// linear backoff for transport errors, HTTP 429, and 5xx responses. +func pkgsiteGetJSON(ctx context.Context, endpoint, importPath string, values url.Values, v any) error { + client := pkgsiteHTTPClient + if client == nil { + client = http.DefaultClient + } + + var lastErr error + for attempt := 1; attempt <= 3; attempt++ { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, pkgsiteURL(endpoint, importPath, values), nil) + if err != nil { + return fmt.Errorf("create pkgsite request: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + lastErr = fmt.Errorf("pkgsite request: %w", err) + if attempt < 3 { + time.Sleep(time.Duration(attempt) * time.Second) + continue + } + return lastErr + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, pkgsiteMaxResponseBytes+1)) + resp.Body.Close() + if err != nil { + return fmt.Errorf("read pkgsite response: %w", err) + } + if len(body) > pkgsiteMaxResponseBytes { + return fmt.Errorf("pkgsite response exceeds %d bytes", pkgsiteMaxResponseBytes) + } + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + var apiErr pkgsiteAPIError + if err := json.Unmarshal(body, &apiErr); err != nil || apiErr.Error() == "pkgsite API error" { + apiErr.Message = strings.TrimSpace(string(body)) + } + apiErr.Code = resp.StatusCode + apiErr.Status = resp.Status + lastErr = &apiErr + if attempt < 3 && (resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode >= 500) { + time.Sleep(time.Duration(attempt) * time.Second) + continue + } + return lastErr + } + + if err := json.Unmarshal(body, v); err != nil { + return fmt.Errorf("decode pkgsite response: %w", err) + } + return nil + } + return lastErr +} + +func getPkgsitePackage(ctx context.Context, gopkg, modulePath string) (pkgsitePackage, error) { + values := url.Values{} + values.Set("imports", "true") + values.Set("licenses", "true") + if modulePath != "" { + values.Set("module", modulePath) + } + + var p pkgsitePackage + err := pkgsiteGetJSON(ctx, "package", gopkg, values, &p) + if err == nil { + return p, nil + } + + var apiErr *pkgsiteAPIError + if modulePath != "" || !isPkgsiteAPIError(err, &apiErr) || len(apiErr.Candidates) == 0 { + return pkgsitePackage{}, err + } + + // Ambiguous import paths return candidate module/package pairs. Retry once + // with the longest module path, which is the most specific match. + best := apiErr.Candidates[0] + for _, candidate := range apiErr.Candidates[1:] { + if len(candidate.ModulePath) > len(best.ModulePath) { + best = candidate + } + } + if best.ModulePath == "" { + return pkgsitePackage{}, err + } + return getPkgsitePackage(ctx, gopkg, best.ModulePath) +} + +func isPkgsiteAPIError(err error, target **pkgsiteAPIError) bool { + return errors.As(err, target) +} + +func getPkgsiteModule(ctx context.Context, modulePath, version string) (pkgsiteModule, error) { + values := url.Values{} + values.Set("licenses", "true") + values.Set("readme", "true") + if version != "" { + values.Set("version", version) + } + + var m pkgsiteModule + if err := pkgsiteGetJSON(ctx, "module", modulePath, values, &m); err != nil { + return pkgsiteModule{}, err + } + return m, nil +} + +func getPkgsiteInfo(ctx context.Context, gopkg string) (*pkgsiteInfo, error) { + pkgsiteMu.Lock() + if info := pkgsiteInfoCache[gopkg]; info != nil { + pkgsiteMu.Unlock() + return info, nil + } + pkgsiteMu.Unlock() + + p, err := getPkgsitePackage(ctx, gopkg, "") + if err != nil { + return nil, fmt.Errorf("get pkgsite package: %w", err) + } + if p.ModulePath == "" { + return nil, fmt.Errorf("pkgsite package %q has empty module path", gopkg) + } + + m, err := getPkgsiteModule(ctx, p.ModulePath, p.Version) + if err != nil { + return nil, fmt.Errorf("get pkgsite module: %w", err) + } + + info := &pkgsiteInfo{Package: p, Module: m} + + pkgsiteMu.Lock() + pkgsiteInfoCache[gopkg] = info + if p.Path == p.ModulePath { + // Only cache module-path lookups when the package data also describes + // the module root. Subpackage metadata would give callers the wrong + // package name for later module-root lookups. + pkgsiteInfoCache[p.ModulePath] = info + } + pkgsiteMu.Unlock() + + return info, nil +} + +func pkgsiteLicenseExpression(licenses []pkgsiteLicense) string { + topLevel := make([]pkgsiteLicense, 0, len(licenses)) + for _, license := range licenses { + if !strings.Contains(strings.Trim(license.FilePath, "/"), "/") { + topLevel = append(topLevel, license) + } + } + if len(topLevel) > 0 { + // Prefer root license files; subdirectory licenses often describe vendored + // or generated code that should not affect the spec License field. + licenses = topLevel + } + + seenGroups := make(map[string]bool) + for _, license := range licenses { + seenTypes := make(map[string]bool) + for _, typ := range license.Types { + typ = strings.TrimSpace(typ) + if typ != "" { + seenTypes[typ] = true + } + } + if len(seenTypes) == 0 { + continue + } + types := make([]string, 0, len(seenTypes)) + for typ := range seenTypes { + types = append(types, typ) + } + sort.Strings(types) + group := types[0] + if len(types) > 1 { + group = "(" + strings.Join(types, " OR ") + ")" + } + seenGroups[group] = true + } + if len(seenGroups) == 0 { + return "TODO" + } + + groups := make([]string, 0, len(seenGroups)) + for group := range seenGroups { + groups = append(groups, group) + } + sort.Strings(groups) + return strings.Join(groups, " AND ") +} diff --git a/pkgsite_test.go b/pkgsite_test.go new file mode 100644 index 0000000..d0022be --- /dev/null +++ b/pkgsite_test.go @@ -0,0 +1,107 @@ +package main + +import ( + "io" + "net/http" + "strings" + "testing" +) + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req) +} + +func TestPkgsiteLicenseExpression(t *testing.T) { + tests := []struct { + name string + licenses []pkgsiteLicense + want string + }{ + { + name: "prefers top-level licenses", + licenses: []pkgsiteLicense{ + {FilePath: "LICENSE", Types: []string{"MIT"}}, + {FilePath: "internal/LICENSE", Types: []string{"Apache-2.0"}}, + }, + want: "MIT", + }, + { + name: "joins separate license files with AND", + licenses: []pkgsiteLicense{ + {FilePath: "LICENSE", Types: []string{"MIT"}}, + {FilePath: "COPYING", Types: []string{"BSD-3-Clause"}}, + }, + want: "BSD-3-Clause AND MIT", + }, + { + name: "joins multiple matches in one license file with OR", + licenses: []pkgsiteLicense{ + {FilePath: "LICENSE", Types: []string{"MIT", "Apache-2.0"}}, + }, + want: "(Apache-2.0 OR MIT)", + }, + { + name: "uses TODO when pkgsite has no SPDX type", + licenses: []pkgsiteLicense{{FilePath: "LICENSE"}}, + want: "TODO", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := pkgsiteLicenseExpression(tt.licenses); got != tt.want { + t.Fatalf("pkgsiteLicenseExpression() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestGetPkgsitePackageRetriesAmbiguousPathWithLongestModule(t *testing.T) { + oldClient := pkgsiteHTTPClient + defer func() { pkgsiteHTTPClient = oldClient }() + + var requested []string + pkgsiteHTTPClient = &http.Client{ + Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + requested = append(requested, req.URL.RawQuery) + if req.URL.Query().Get("module") == "" { + return &http.Response{ + StatusCode: http.StatusBadRequest, + Status: "400 Bad Request", + Body: io.NopCloser(strings.NewReader(`{ + "message":"ambiguous package path", + "candidates":[ + {"modulePath":"example.com/a","packagePath":"example.com/a/b/c"}, + {"modulePath":"example.com/a/b","packagePath":"example.com/a/b/c"} + ] + }`)), + Header: make(http.Header), + }, nil + } + return &http.Response{ + StatusCode: http.StatusOK, + Status: "200 OK", + Body: io.NopCloser(strings.NewReader(`{ + "modulePath":"example.com/a/b", + "version":"v1.2.3", + "path":"example.com/a/b/c", + "name":"c" + }`)), + Header: make(http.Header), + }, nil + }), + } + + p, err := getPkgsitePackage(t.Context(), "example.com/a/b/c", "") + if err != nil { + t.Fatalf("getPkgsitePackage() returned error: %v", err) + } + if p.ModulePath != "example.com/a/b" { + t.Fatalf("ModulePath = %q, want %q", p.ModulePath, "example.com/a/b") + } + if len(requested) != 2 || requested[1] != "imports=true&licenses=true&module=example.com%2Fa%2Fb" { + t.Fatalf("requests = %#v, want retry with longest module path", requested) + } +} -- 2.52.0 From 1f596caf577dff4001e69e3dd1511285ae086e10 Mon Sep 17 00:00:00 2001 From: Julian Zhu Date: Fri, 22 May 2026 22:30:54 +0800 Subject: [PATCH 07/10] refactor: migrate metadata discovery to pkgsite Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- check_version.go | 7 +- description.go | 36 ++--- go.mod | 3 - go.sum | 10 -- main.go | 41 +----- metadata.go | 207 +++++++++++++++------------- pack.go | 345 ++++++++++++++++++++++++++++++++++++---------- pkgsite_test.go | 352 +++++++++++++++++++++++++++++++++++++++++++++++ spec.go | 120 ++++++++++++---- 9 files changed, 852 insertions(+), 269 deletions(-) diff --git a/check_version.go b/check_version.go index cb66361..665c945 100644 --- a/check_version.go +++ b/check_version.go @@ -24,7 +24,6 @@ var ( // uversionPrereleaseRegexp checks for upstream pre-release // so that '-' can be replaced with '~' in pkgVersionFromGit. - // To be kept in sync with the regexp portion of uversionmanglePattern in spec.go uversionPrereleaseRegexp = regexp.MustCompile(`(\d)[_\.\-\+]?(RC|rc|pre|dev|beta|alpha)[.]?(\d*)$`) ) @@ -42,8 +41,10 @@ func pkgVersionFromGit(gitdir string, u *upstream, preferredRev string, forcePre var cmd *exec.Cmd // the temporary shell commands we execute // If the user specifies a valid tag as the preferred revision, that tag should be used without additional heuristics. - if u.rr != nil { - if out, err := u.rr.VCS.Tags(gitdir); err == nil && slices.Contains(out, preferredRev) { + if preferredRev != "" { + cmd = exec.Command("git", "tag", "--list", preferredRev) + cmd.Dir = gitdir + if out, err := cmd.Output(); err == nil && slices.Contains(strings.Fields(string(out)), preferredRev) { latestTag = preferredRev } } diff --git a/description.go b/description.go index b6a9069..31b642d 100644 --- a/description.go +++ b/description.go @@ -13,8 +13,7 @@ import ( //go:embed description.json var descriptionJSONBytes []byte -// reformatForControl reformats the wrapped description -// to conform to Debian’s control format. +// reformatForControl reformats wrapped text for the RPM spec's %description. func reformatForControl(raw string) string { output := "" next_prefix := "" @@ -47,8 +46,8 @@ func reformatForControl(raw string) string { return output } -// markdownToLongDescription converts Markdown to plain text -// and reformat it for expanded description in debian/control. +// markdownToLongDescription converts Markdown to plain text for the RPM spec's +// %description section. func markdownToLongDescription(markdown string) (string, error) { r, _ := glamour.NewTermRenderer( glamour.WithStylesFromJSONBytes(descriptionJSONBytes), @@ -63,23 +62,17 @@ func markdownToLongDescription(markdown string) (string, error) { return reformatForControl(out), nil } -// getDescriptionForGopkg reads from README.md (or equivalent) from GitHub, -// intended for extended description in debian/control. +// getLongDescriptionForGopkg reads README.md (or equivalent) from pkg.go.dev, +// intended for the RPM spec's %description section. func getLongDescriptionForGopkg(gopkg string) (string, error) { - owner, repo, err := findGitHubRepo(gopkg) + info, err := getPkgsiteInfo(context.TODO(), gopkg) if err != nil { - return "", fmt.Errorf("find github repo: %w", err) + return "", fmt.Errorf("get pkgsite metadata: %w", err) } - - rr, _, err := gitHub.Repositories.GetReadme(context.TODO(), owner, repo, nil) - if err != nil { - return "", fmt.Errorf("get readme: %w", err) - } - - content, err := rr.GetContent() - if err != nil { - return "", fmt.Errorf("get content: %w", err) + if info.Module.Readme == nil || strings.TrimSpace(info.Module.Readme.Contents) == "" { + return "", fmt.Errorf("pkgsite module %q has no README", info.Module.Path) } + content := info.Module.Readme.Contents // Supported filename suffixes are from // https://github.com/github/markup/blob/master/README.md @@ -88,10 +81,11 @@ func getLongDescriptionForGopkg(gopkg string) (string, error) { // fairly involved, but it’d be the most correct solution to the problem at // hand. Our current code just knows markdown, which is good enough since // most (Go?) projects in fact use markdown for their README files. - if !strings.HasSuffix(rr.GetName(), "md") && - !strings.HasSuffix(rr.GetName(), "markdown") && - !strings.HasSuffix(rr.GetName(), "mdown") && - !strings.HasSuffix(rr.GetName(), "mkdn") { + readmeName := strings.ToLower(info.Module.Readme.Filepath) + if !strings.HasSuffix(readmeName, "md") && + !strings.HasSuffix(readmeName, "markdown") && + !strings.HasSuffix(readmeName, "mdown") && + !strings.HasSuffix(readmeName, "mkdn") { return reformatForControl(content), nil } diff --git a/go.mod b/go.mod index 6297e56..5fbaabb 100644 --- a/go.mod +++ b/go.mod @@ -4,11 +4,9 @@ go 1.25.3 require ( github.com/charmbracelet/glamour v0.10.0 - github.com/google/go-github/v60 v60.0.0 github.com/mattn/go-isatty v0.0.20 github.com/sandrolain/httpcache v1.4.0 golang.org/x/net v0.47.0 - golang.org/x/tools/go/vcs v0.1.0-deprecated ) require ( @@ -22,7 +20,6 @@ require ( github.com/charmbracelet/x/exp/slice v0.0.0-20250327172914-2fdc97757edf // indirect github.com/charmbracelet/x/term v0.2.1 // indirect github.com/dlclark/regexp2 v1.11.0 // indirect - github.com/google/go-querystring v1.1.0 // indirect github.com/gorilla/css v1.0.1 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect diff --git a/go.sum b/go.sum index c517765..67d4107 100644 --- a/go.sum +++ b/go.sum @@ -28,13 +28,6 @@ github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQ github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg= github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= -github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-github/v60 v60.0.0 h1:oLG98PsLauFvvu4D/YPxq374jhSxFYdzQGNCyONLfn8= -github.com/google/go-github/v60 v60.0.0/go.mod h1:ByhX2dP9XT9o/ll2yXAu2VD8l5eNVg8hD4Cr0S/LmQk= -github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= -github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17icRSOU623lUBU= github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= @@ -76,6 +69,3 @@ golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= -golang.org/x/tools/go/vcs v0.1.0-deprecated h1:cOIJqWBl99H1dH5LWizPa+0ImeeJq3t3cJjaeOWUAL4= -golang.org/x/tools/go/vcs v0.1.0-deprecated/go.mod h1:zUrvATBAvEI9535oC0yWYsLsHIV4Z7g63sNPVMtuBy8= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/main.go b/main.go index 7aad5ed..18b334e 100644 --- a/main.go +++ b/main.go @@ -3,26 +3,11 @@ package main import ( "net/http" "os" + "time" - "github.com/google/go-github/v60/github" "github.com/sandrolain/httpcache" ) -var ( - gitHub *github.Client -) - -// TokenTransport implements http.RoundTripper for Bearer token authentication -type TokenTransport struct { - Token string - Transport http.RoundTripper -} - -func (t *TokenTransport) RoundTrip(req *http.Request) (*http.Response, error) { - req.Header.Add("Authorization", "Bearer "+t.Token) - return t.Transport.RoundTrip(req) -} - func printHelp() { helpText := `go2spec - A tool to package Go modules into RPM spec files. @@ -41,28 +26,10 @@ If there are no commands provided, the tool will default to executing the 'pack' } func main() { - token := os.Getenv("GITHUB_TOKEN") - - var client *http.Client - if token != "" { - // Use token authentication for better rate limits - client = &http.Client{ - Transport: &TokenTransport{ - Token: token, - Transport: httpcache.NewMemoryCacheTransport(), - }, - } - } else { - // Fallback to basic auth if token is not provided - transport := github.BasicAuthTransport{ - Username: os.Getenv("GITHUB_USERNAME"), - Password: os.Getenv("GITHUB_PASSWORD"), - OTP: os.Getenv("GITHUB_OTP"), - Transport: httpcache.NewMemoryCacheTransport(), - } - client = transport.Client() + pkgsiteHTTPClient = &http.Client{ + Timeout: 30 * time.Second, + Transport: httpcache.NewMemoryCacheTransport(), } - gitHub = github.NewClient(client) args := os.Args[1:] diff --git a/metadata.go b/metadata.go index 80ca8aa..47420b3 100644 --- a/metadata.go +++ b/metadata.go @@ -3,128 +3,139 @@ package main import ( "context" "fmt" - "net/http" + "html" + "path" "regexp" "strings" - - "golang.org/x/net/html" ) -// To update, use: -// curl -s https://api.github.com/licenses | jq '.[].key' -var githubLicenseToSPDXLicense = map[string]string{ - //"agpl-3.0" - "apache-2.0": "Apache-2.0", - "artistic-2.0": "Artistic-2.0", - "bsd-2-clause": "BSD-2-Clause", - "bsd-3-clause": "BSD-3-Clause", - "cc0-1.0": "CC0-1.0", - //"epl-1.0" (eclipse public license) - "gpl-2.0": "GPL-2.0-only", - "gpl-3.0": "GPL-3.0-only", - "isc": "ISC", - "lgpl-2.1": "LGPL-2.1-only", - "lgpl-3.0": "LGPL-3.0-only", - "mit": "MIT", - "mpl-2.0": "MPL-2.0", - //"unlicense" +func getRepoURLForGopkg(gopkg string) (string, error) { + info, err := getPkgsiteInfo(context.TODO(), gopkg) + if err != nil { + return "", err + } + repoURL := strings.TrimSpace(info.Module.RepoURL) + if repoURL == "" { + return "", fmt.Errorf("pkgsite module %q has no repository URL", info.Module.Path) + } + return gitCloneURLFromRepoURL(repoURL), nil } -var githubRegexp = regexp.MustCompile(`github\.com/([^/]+/[^/]+)`) +var ( + htmlTagRegexp = regexp.MustCompile(`<[^>]*>`) + markdownImageRegex = regexp.MustCompile(`!\[[^\]]*\]\([^)]+\)`) + markdownLinkRegex = regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`) + packagePrefixRegex = regexp.MustCompile(`^Package\s+\S+\s+`) +) -func findGitHubOwnerRepo(gopkg string) (string, error) { - if strings.HasPrefix(gopkg, "github.com/") { - return strings.TrimPrefix(gopkg, "github.com/"), nil +// cleanSummaryCandidate turns a godoc synopsis or README line into an +// RPM-style Summary by stripping markup, keeping the first sentence, dropping +// the leading "Package foo" convention, and capitalizing the result. +func cleanSummaryCandidate(summary string) string { + summary = html.UnescapeString(strings.TrimSpace(summary)) + summary = markdownImageRegex.ReplaceAllString(summary, "") + summary = markdownLinkRegex.ReplaceAllString(summary, "$1") + summary = htmlTagRegexp.ReplaceAllString(summary, " ") + summary = strings.ReplaceAll(summary, "`", "") + summary = strings.Join(strings.Fields(summary), " ") + summary = strings.Trim(summary, " \t\n\r#*-_") + if end := strings.Index(summary, ". "); end >= 0 { + summary = summary[:end] } - resp, err := http.Get("https://" + gopkg + "?go-get=1") - if err != nil { - return "", fmt.Errorf("HTTP get: %w", err) + summary = packagePrefixRegex.ReplaceAllString(summary, "") + if summary != "" && summary[0] >= 'a' && summary[0] <= 'z' { + summary = string(summary[0]-('a'-'A')) + summary[1:] } - defer resp.Body.Close() - z := html.NewTokenizer(resp.Body) - for { - tt := z.Next() - if tt == html.ErrorToken { - return "", fmt.Errorf("%q is not on GitHub", gopkg) - } - token := z.Token() - if token.Data != "meta" { + if summary == "" || + strings.HasPrefix(summary, "[!") || + strings.HasPrefix(summary, "![") || + strings.HasPrefix(summary, "