Compare commits

...

3 Commits

Author SHA1 Message Date
Ludovic Courtès
e43958af27 DRAFT substitute: Fetch digests and restore store items from digests.
DRAFT: Tests missing, compression support missing.

* guix/scripts/substitute.scm (digest-cache-file, cache-digest!)
(digest-request, lookup-digest): New procedures.
(fetch-narinfos)[%not-slash]: New variable.
[handle-digest-response, handle-response]: New procedures.
[do-fetch]: Append digest requests to narinfo requests.  Pass
'handle-response' to 'http-multiple-get' instead of
'handle-narinfo-response'.
(process-substitution): Rename to...
(process-substitution/nar): ... this.  Make 'narinfo' a parameter.
(http-fetch-files, nar-hash)
(process-substitution, process-substitution/digest): New procedures.
(guix-substitute): Pass #:delete-entry to 'maybe-remove-expired-cache-entries'.
* guix/digests.scm (sexp->digest): New procedure.
2021-01-03 21:49:46 +01:00
Ludovic Courtès
f44a1e0b52 DRAFT publish: Handle /digest and /content URLs.
DRAFT: Missing tests, missing compression for /content, missing
'--cache' support for /content and /digest.

* guix/digests.scm (digest->sexp): New procedure.
* guix/scripts/publish.scm (render-digest)
(render-content-addressed-data): New procedures.
(make-request-handler): Handle /content and /digest.
2021-01-03 21:49:27 +01:00
Ludovic Courtès
a6c1dbff13 DRAFT Add (guix digests).
DRAFT: Missing tests.

* guix/digests.scm: New file.
* Makefile.am (MODULES): Add it.
* guix/serialization.scm (filter/sort-directory-entries): Export.
2021-01-03 21:44:58 +01:00
5 changed files with 489 additions and 13 deletions

View File

@@ -103,6 +103,7 @@ MODULES = \
guix/profiles.scm \
guix/serialization.scm \
guix/nar.scm \
guix/digests.scm \
guix/derivations.scm \
guix/grafts.scm \
guix/repl.scm \

257
guix/digests.scm Normal file
View File

@@ -0,0 +1,257 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2020, 2021 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (guix digests)
#:use-module (gcrypt hash)
#:use-module (guix base32)
#:use-module ((guix store) #:select (%store-prefix))
#:use-module (guix serialization)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-26)
#:use-module (srfi srfi-71)
#:use-module (ice-9 ftw)
#:use-module (ice-9 match)
#:export (digest?
digest-type
digest-size
digest-content
digest-entry?
digest-entry-name
digest-entry-value
store-deduplication-link
file-tree-digest
file-digest
restore-digest
digest->sexp
sexp->digest))
;;; Commentary:
;;;
;;; This module implements "digests", which can be thought of as
;;; content-addressed archives. A digest describes a directory (recursively),
;;; symlink, or regular file; in lieu of actual file contents, it contains the
;;; hash of those contents.
;;;
;;; Code:
;; Digest of a file.
(define-record-type <digest>
(digest type size content)
digest?
(type digest-type) ;'regular | 'executable | ...
(size digest-size) ;integer
(content digest-content)) ;hash | symlink target | entries
;; Directory entry for a digest with type = 'directory.
(define-record-type <digest-entry>
(digest-entry name value)
digest-entry?
(name digest-entry-name)
(value digest-entry-value))
(define* (file-tree-digest file
#:key
file-type+size
file-port
symlink-target
directory-entries
(postprocess-entries
filter/sort-directory-entries)
(hash-algorithm (hash-algorithm sha256)))
"Return a digest of FILE. The calling convention is the same as for
'write-file-tree'."
(let dump ((file file))
(define-values (type size)
(file-type+size file))
(define (nar-hash)
(let ((port get-hash (open-hash-port hash-algorithm)))
(write-file-tree file port
#:file-type+size (lambda _ (values type size))
#:file-port file-port)
(force-output port)
(let ((hash (get-hash)))
(close-port port)
hash)))
(match type
((or 'regular 'executable)
(digest type size
(list (hash-algorithm-name hash-algorithm) (nar-hash))))
('symlink
(digest 'symlink 0 (symlink-target file)))
('directory
(let ((entries (postprocess-entries (directory-entries file))))
(digest 'directory 0
(map (lambda (entry)
(digest-entry entry
(dump (string-append file "/" entry))))
entries)))))))
(define* (file-digest file
#:key (select? (const #t)))
"Return a digest for FILE, recursing into it and its sub-directories and
discarding files that do not pass SELECT?."
(file-tree-digest file
;; FIXME: deduplicate arguments
#:file-type+size
(lambda (file)
(let* ((stat (lstat file))
(size (stat:size stat)))
(case (stat:type stat)
((directory)
(values 'directory size))
((regular)
(values (if (zero? (logand (stat:mode stat)
#o100))
'regular
'executable)
size))
(else
(values (stat:type stat) size)))))
#:file-port (cut open-file <> "r0b")
#:symlink-target readlink
#:directory-entries
(lambda (directory)
;; 'scandir' defaults to 'string-locale<?' to sort files,
;; but this happens to be case-insensitive (at least in
;; 'en_US' locale on libc 2.18.) Conversely, we want
;; files to be sorted in a case-sensitive fashion.
(define basenames
(scandir directory (negate (cut member <> '("." "..")))
string<?))
(filter-map (lambda (base)
(let ((file (string-append directory
"/" base)))
(and (select? file (lstat file))
base)))
basenames))))
(define (store-deduplication-link hash)
"Return the file name in the content-addressed store for HASH, a nar hash."
(string-append (%store-prefix) "/.links/"
(bytevector->nix-base32-string hash)))
(define (copy-file-from-store digest target)
"Attempt to copy DIGEST from the content-addressed store into TARGET.
Return #t on success, and #f if DIGEST could not be found."
(match (digest-content digest)
(('sha256 hash)
(let* ((address (store-deduplication-link hash))
(perms (match (digest-type digest)
('executable #o555)
('regular #O444)))
(stat (stat address #f)))
(and stat (= (stat:perms stat) perms)
(= (stat:size stat) (digest-size digest))
(begin
(catch 'system-error
(lambda ()
(link address target))
(lambda args
(if (= EXDEV (system-error-errno args))
(begin
(copy-file address target)
(chmod target perms)
(utime target 1 1 0 0)
#t))))))))
(_
#f)))
(define* (restore-digest digest target
#:key
(copy-file copy-file-from-store))
"Restore DIGEST into directory TARGET. Copy files from the local
content-addressed store using COPY-FILE. Return the list of target
directory/digest pairs for all the digests for which 'copy-file' returned
false."
(let loop ((target target)
(digest digest)
(missing '()))
(match digest
(($ <digest> 'directory _ (entries ...))
(mkdir target)
(let ((missing* (fold (lambda (entry missing)
(match entry
(($ <digest-entry> name value)
(loop (string-append target "/" name)
value missing))))
missing
entries)))
;; If there are were missing files among ENTRIES, leave TARGET
;; untouched so that the caller can eventually create files
;; therein.
(unless (eq? missing missing*)
(chmod target #o555)
(utime target 1 1 0 0))
missing*))
(($ <digest> (or 'regular 'executable))
(if (copy-file digest target)
missing
(cons (cons target digest) missing)))
(($ <digest> 'symlink _ source)
(symlink source target)
(utime target 1 1 0 0 AT_SYMLINK_NOFOLLOW)
missing))))
(define (digest->sexp digest)
"Return an sexp serialization of DIGEST."
(define (->sexp digest)
(match digest
(($ <digest> 'directory _ entries)
`(d ,@(map (match-lambda
(($ <digest-entry> name digest)
`(,name ,(->sexp digest))))
entries)))
(($ <digest> (and type (or 'executable 'regular)) size
(algorithm hash))
`(,(if (eq? type 'executable) 'x 'f) ,size
(,algorithm ,(bytevector->nix-base32-string hash))))
(($ <digest> 'symlink _ target)
`(l ,target))))
`(digest (version 0)
,(->sexp digest)))
(define (sexp->digest sexp)
"Return a digest deserialized from SEXP."
(define (->digest sexp)
(match sexp
(('x size (algorithm hash) _ ...)
(digest 'executable size (list algorithm hash)))
(('f size (algorithm hash) _ ...)
(digest 'regular size
(list algorithm (nix-base32-string->bytevector hash))))
(('d entries ...)
(digest 'directory 0
(map (match-lambda
((name digest)
(digest-entry name (->digest digest))))
entries)))
(('l target)
(digest 'symlink 0 target))))
(match sexp
(('digest ('version 0) sexp)
(->digest sexp))))

View File

@@ -53,6 +53,8 @@
#:use-module (guix workers)
#:use-module (guix store)
#:use-module ((guix serialization) #:select (write-file))
#:autoload (guix digests) (store-deduplication-link
file-digest digest->sexp)
#:use-module (zlib)
#:autoload (lzlib) (call-with-lzip-output-port
make-lzip-output-port)
@@ -405,6 +407,13 @@ appropriate duration. NAR-PATH specifies the prefix for nar URLs."
#:compressions compressions)
<>)))))
(define* (render-digest store request hash)
(let ((item (hash-part->path store hash)))
(if (string-null? item)
(not-found request #:phrase "")
(values `((content-type . (application/x-guix-digest)))
(object->string (digest->sexp (file-digest item)))))))
(define* (nar-cache-file directory item
#:key (compression %no-compression))
(string-append directory "/"
@@ -746,6 +755,21 @@ has the given HASH of type ALGO."
(not-found request)))
(not-found request)))
(define* (render-content-addressed-data request algo hash
#:key (compression %no-compression))
"Return the file with HASH, a nar hash, from the content-addressed store."
(if (and (eq? algo 'sha256) (= 32 (bytevector-length hash)))
(let* ((file (store-deduplication-link hash))
(stat (stat file #f)))
(if stat
(values `((content-type . (application/octet-stream
(charset . "ISO-8859-1")))
;; TODO: Set 'Content-Encoding' to COMPRESSION.
(x-raw-file . ,file))
#f)
(not-found request)))
(not-found request)))
(define (render-log-file store request name)
"Render the log file for NAME, the base name of a store item. Don't attempt
to compress or decompress the log file; just return it as-is."
@@ -1006,7 +1030,7 @@ methods, return the applicable compression."
#:ttl narinfo-ttl
#:nar-path nar-path
#:compressions compressions)))
;; /nar/file/NAME/sha256/HASH
;; /file/NAME/sha256/HASH
(("file" name "sha256" hash)
(guard (c ((invalid-base32-character? c)
(not-found request)))
@@ -1014,6 +1038,19 @@ methods, return the applicable compression."
(render-content-addressed-file store request
name 'sha256 hash))))
;; /content/sha256/HASH
(("content" "sha256" hash)
(guard (c ((invalid-base32-character? c)
(not-found request)))
(let ((hash (nix-base32-string->bytevector hash)))
(render-content-addressed-data request 'sha256 hash
#:compression
(first compressions)))))
;; /digest/HASH
(("digest" hash)
(render-digest store request hash))
;; /log/OUTPUT
(("log" name)
(render-log-file store request name))

View File

@@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2014 Nikita Karetnikov <nikita@karetnikov.org>
;;; Copyright © 2018 Kyle Meyer <kyle@kyleam.com>
;;;
@@ -28,7 +28,8 @@
#:use-module (guix records)
#:use-module (guix diagnostics)
#:use-module (guix i18n)
#:use-module ((guix serialization) #:select (restore-file dump-file))
#:use-module ((guix serialization)
#:select (restore-file write-file dump-file dump-port*))
#:autoload (guix store deduplication) (dump-file/deduplicate)
#:autoload (guix scripts discover) (read-substitute-urls)
#:use-module (gcrypt hash)
@@ -43,7 +44,7 @@
(open-connection-for-uri
. guix:open-connection-for-uri)
store-path-abbreviation byte-count->string))
#:use-module (guix progress)
#:use-module ((guix progress) #:hide (dump-port*))
#:use-module ((guix build syscalls)
#:select (set-thread-name))
#:use-module (ice-9 rdelim)
@@ -66,6 +67,8 @@
#:use-module (web request)
#:use-module (web response)
#:use-module (guix http-client)
#:autoload (guix digests) (digest->sexp sexp->digest restore-digest
digest-type digest-size digest-content)
#:export (narinfo-signature->canonical-sexp
narinfo?
@@ -433,6 +436,19 @@ entry is stored in a sub-directory specific to CACHE-URL."
(bytevector->base32-string (sha256 (string->utf8 cache-url)))
"/" hash-part))))
(define (digest-cache-file cache-url path)
"Return the name of the local file that contains an entry for PATH. The
entry is stored in a sub-directory specific to CACHE-URL."
;; The daemon does not sanitize its input, so PATH could be something like
;; "/gnu/store/foo". Gracefully handle that.
(match (store-path-hash-part path)
(#f
(leave (G_ "'~a' does not name a store item~%") path))
((? string? hash-part)
(string-append %narinfo-cache-directory "/"
(bytevector->base32-string (sha256 (string->utf8 cache-url)))
"/" hash-part ".digest"))))
(define (cached-narinfo cache-url path)
"Check locally if we have valid info about PATH coming from CACHE-URL.
Return two values: a Boolean indicating whether we have valid cached info, and
@@ -498,6 +514,23 @@ indicates that PATH is unavailable at CACHE-URL."
(headers '((User-Agent . "GNU Guile"))))
(build-request (string->uri url) #:method 'GET #:headers headers)))
(define (cache-digest! cache-url path data)
"Cache DATA, a bytevector, as the digest for PATH obtained from CACHE-URL."
(define now
(current-time time-monotonic))
(let ((file (digest-cache-file cache-url path)))
(mkdir-p (dirname file))
(with-atomic-file-output file
(lambda (out)
(put-bytevector out data)))))
(define (digest-request cache-url path)
"Return an HTTP request for the digest of PATH at CACHE-URL."
(let ((url (string-append cache-url "/digest/" (store-path-hash-part path)))
(headers '((User-Agent . "GNU Guile"))))
(build-request (string->uri url) #:method 'GET #:headers headers)))
(define (at-most max-length lst)
"If LST is shorter than MAX-LENGTH, return it and the empty list; otherwise
return its MAX-LENGTH first elements and its tail."
@@ -686,20 +719,45 @@ port to it, or, if connection failed, print a warning and return #f. Pass
%narinfo-transient-error-ttl))
result))))
(define %not-slash
(char-set-complement (char-set #\/)))
(define (handle-digest-response request response port result)
(when (= 200 (response-code response))
(let ((len (response-content-length response)))
(match (string-tokenize (uri-path (request-uri request))
%not-slash)
(("digest" hash-part)
(let* ((data (if len
(get-bytevector-n port len)
(read-to-eof port)))
(digest (sexp->digest
(read (open-bytevector-input-port data)))))
(cache-digest! url (hash-part->path hash-part) data)))
(_ #f))))
result)
(define (handle-response request response port result)
(if (string-contains (uri-path (request-uri request))
"/digest/")
(handle-digest-response request response port result)
(handle-narinfo-response request response port result)))
(define (do-fetch uri)
(case (and=> uri uri-scheme)
((http https)
;; Note: Do not check HTTPS server certificates to avoid depending
;; on the X.509 PKI. We can do it because we authenticate
;; narinfos, which provides a much stronger guarantee.
(let* ((requests (map (cut narinfo-request url <>) paths))
(let* ((requests (append (map (cut narinfo-request url <>) paths)
(map (cut digest-request url <>) paths)))
(result (call-with-cached-connection uri
(lambda (port)
(if port
(begin
(update-progress!)
(http-multiple-get uri
handle-narinfo-response '()
handle-response '()
requests
#:open-connection
open-connection-for-uri/cached
@@ -806,6 +864,18 @@ was found."
((answer) answer)
(_ #f)))
(define (lookup-digest cache-url path)
"Return the digest for PATH in CACHE-URL or #f if it could not be found in
cache."
(catch 'system-error
(lambda ()
(call-with-input-file (digest-cache-file cache-url path)
(compose sexp->digest read)))
(lambda args
(if (= ENOENT (system-error-errno args))
#f
(apply throw args)))))
(define (cached-narinfo-expiration-time file)
"Return the expiration time for FILE, which is a cached narinfo."
(catch 'system-error
@@ -1065,18 +1135,14 @@ server certificates."
"Bind PORT with EXP... to a socket connected to URI."
(call-with-cached-connection uri (lambda (port) exp ...)))
(define* (process-substitution store-item destination
#:key cache-urls acl
deduplicate? print-build-trace?)
(define* (process-substitution/nar store-item narinfo destination
#:key cache-urls
deduplicate? print-build-trace?)
"Substitute STORE-ITEM (a store file name) from CACHE-URLS, and write it to
DESTINATION as a nar file. Verify the substitute against ACL, and verify its
hash against what appears in the narinfo. When DEDUPLICATE? is true, and if
DESTINATION is in the store, deduplicate its files. Print a status line on
the current output port."
(define narinfo
(lookup-narinfo cache-urls store-item
(cut valid-narinfo? <> acl)))
(define destination-in-store?
(string-prefix? (string-append (%store-prefix) "/")
destination))
@@ -1160,6 +1226,115 @@ the current output port."
(bytevector->nix-base32-string expected)
(bytevector->nix-base32-string actual)))))))
(define (http-fetch-files base-url files+digests)
"Fetch the files in FILES+DIGESTS, a list of file name/digest pairs as
returned by 'restore-digest'.scm"
(define (content-uri digest)
(match (digest-content digest)
(((algorithm hash) _ ...)
(string->uri
(string-append base-url "/content/" algorithm "/"
(bytevector->base32-string hash))))))
(define (content-request digest)
(build-request (content-uri digest)
#:method 'GET
#:headers '((User-Agent . "GNU Guile"))))
(define request->file
(fold (lambda (file+digest result)
(match file+digest
((file . digest)
(vhash-consq (content-request digest) file
result))))
vlist-null
files+digests))
(define total-size
(match files+digests
(((_ . digests) ...)
(fold (lambda (digest size)
(+ size (digest-size digest)))
0
digests))))
;; TODO: decompression
;; TODO: progress report
(http-multiple-get (string->uri base-url)
(lambda (request response port result)
(match (vhash-assq request request->file)
((digest . file)
;; TODO: deduplicate
(with-atomic-file-output file
(lambda (output)
(let ((len (response-content-length response)))
(dump-port* port output len))))
(chmod file (if (eq? (digest-type digest) 'regular)
#o444
#o555))
(utime file 1 1 0 0))))
#t
(vhash-fold-right (lambda (file request result)
(cons request result))
'()
request->file)))
(define (nar-hash file algorithm)
"Return the ALGORITHM hash of FILE."
(let-values (((port get-hash) (open-hash-port algorithm)))
(write-file file port)
(force-output port)
(let ((hash (get-hash)))
(close-port port)
hash)))
(define* (process-substitution/digest store-item narinfo destination
#:key digest
deduplicate? print-build-trace?)
(define destination-in-store?
(string-prefix? (string-append (%store-prefix) "/")
destination))
(let ((missing-files (restore-digest digest destination)))
(unless (null? missing-files)
(http-fetch-files (narinfo-uri-base narinfo) missing-files)))
(let*-values (((algorithm expected)
(narinfo-hash-algorithm+value narinfo))
((actual) (nar-hash destination algorithm)))
(if (bytevector=? actual expected)
;; Tell the daemon that we're done.
(format (current-output-port) "success ~a ~a~%"
(narinfo-hash narinfo) (narinfo-size narinfo))
;; The actual data has a different hash than that in NARINFO.
(format (current-output-port) "hash-mismatch ~a ~a ~a~%"
(hash-algorithm-name algorithm)
(bytevector->nix-base32-string expected)
(bytevector->nix-base32-string actual)))))
(define* (process-substitution store-item destination
#:key cache-urls acl
deduplicate? print-build-trace?)
(define narinfo
(lookup-narinfo cache-urls store-item
(cut valid-narinfo? <> acl)))
(define digest
(and narinfo
(lookup-digest (narinfo-uri-base narinfo) store-item)))
(if digest
(process-substitution/digest store-item narinfo destination
#:digest digest
#:deduplicate? deduplicate?
#:print-build-trace? print-build-trace?)
(process-substitution/nar store-item narinfo destination
#:cache-urls cache-urls
#:deduplicate? deduplicate?
#:print-build-trace? print-build-trace?)))
;;;
;;; Entry point.
@@ -1301,6 +1476,11 @@ default value."
cached-narinfo-files
#:entry-expiration
cached-narinfo-expiration-time
#:delete-entry
(lambda (file)
(delete-file* file)
(delete-file*
(string-append file ".digest")))
#:cleanup-period
%narinfo-expired-cache-entry-removal-delay)
(check-acl-initialized)

View File

@@ -50,6 +50,7 @@
write-file
write-file-tree
filter/sort-directory-entries
fold-archive
restore-file
dump-file))