Skip to content

Commit e883486

Browse files
committed
Repo contents cache
This mostly follows the design doc at https://docs.google.com/document/d/1ZScqiIQi9l7_8eikbsGI-rjupdbCI7wgm1RYD76FJfM/edit?pli=1&tab=t.0#heading=h.5mcn15i0e1ch - Adds a new flag `--repo_contents_cache` to specify the path of the repo contents cache, where we'll store fetched contents of repos that can be cached safely across workspaces. - The flag defaults to `<value of --repository_cache>/contents`. - If this path ends up being inside the main repo, we throw an error. This is because files inside the source tree are considered immutable during the lifetime of a Bazel invocation, which means writing into the repo contents cache can cause spurious failures. - A repo rule can indicate readiness for caching by returning `repository_ctx.repo_metadata(reproducible=True)` from its implementation function, similarly to module extensions. - Note that reproducibility/cacheability is not per repo rule (like "local"-ness), but per repo. Example: `http_archive` is only cacheable if the checksum is provided. - Before we fetch a repo, we first check if matching entries exist in the repo contents cache under the key of the hash of all its predeclared inputs (`HVal(Pre(R))` in the doc). If not, fetch as normal. - These include repo attrs, repo rule impl .bzl hashes, Starlark semantics, etc. - If matching entries exist, we go through each of them and examine if it's up-to-date by examining its "recorded inputs file" (analogous to the marker file in `outputBase/external`). If we find an up-to-date entry, we set up a symlink, and declare victory. Otherwise, fetch as normal. - After fetching, if the repo rule indicates cacheability, we move the fetched contents into the repo contents cache by appending a new entry under the predeclared inputs hash key. RELNOTES: Added a new flag `--repo_contents_cache` (defaults to the `contents` directory under the `--repository_cache`) where Bazel stores fetched contents of repos that can be safely cached across workspaces. A repo rule can indicate cacheability by returning `repository_ctx.repo_metadata(reproducible=True)` from its implementation function. Work towards #12227. Closes #25938. PiperOrigin-RevId: 758682171 Change-Id: Ie703152a98745f7382c3d095a1dbf4b35c3408eb
1 parent 2e51bc3 commit e883486

29 files changed

+628
-96
lines changed

scripts/bootstrap/bootstrap.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ _BAZEL_ARGS="--spawn_strategy=standalone \
3838
--strategy=Javac=worker --worker_quit_after_build --ignore_unsupported_sandboxing \
3939
--compilation_mode=opt \
4040
--repository_cache=derived/repository_cache \
41+
--repo_contents_cache= \
4142
--repo_env=BAZEL_HTTP_RULES_URLS_AS_DEFAULT_CANONICAL_ID=0 \
4243
--extra_toolchains=//scripts/bootstrap:all \
4344
--extra_toolchains=@rules_python//python:autodetecting_toolchain \

src/main/java/com/google/devtools/build/lib/bazel/BazelRepositoryModule.java

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -356,35 +356,41 @@ public void beforeCommand(CommandEnvironment env) throws AbruptExitException {
356356
starlarkRepositoryFunction.setTimeoutScaling(1.0);
357357
singleExtensionEvalFunction.setTimeoutScaling(1.0);
358358
}
359-
if (repoOptions.experimentalRepositoryCache != null) {
360-
Path repositoryCachePath;
361-
if (repoOptions.experimentalRepositoryCache.isEmpty()) {
362-
// A set but empty path indicates a request to disable the repository cache.
363-
repositoryCachePath = null;
364-
} else if (repoOptions.experimentalRepositoryCache.isAbsolute()) {
365-
repositoryCachePath = filesystem.getPath(repoOptions.experimentalRepositoryCache);
366-
} else {
367-
repositoryCachePath =
368-
env.getBlazeWorkspace()
369-
.getWorkspace()
370-
.getRelative(repoOptions.experimentalRepositoryCache);
371-
}
372-
repositoryCache.setPath(repositoryCachePath);
359+
if (repoOptions.repositoryCache != null) {
360+
repositoryCache.setPath(toPath(repoOptions.repositoryCache, env));
373361
} else {
374-
Path repositoryCachePath =
362+
repositoryCache.setPath(
375363
env.getDirectories()
376364
.getServerDirectories()
377365
.getOutputUserRoot()
378-
.getRelative(DEFAULT_CACHE_LOCATION);
379-
try {
380-
repositoryCachePath.createDirectoryAndParents();
381-
repositoryCache.setPath(repositoryCachePath);
382-
} catch (IOException e) {
383-
env.getReporter()
384-
.handle(
385-
Event.warn(
386-
"Failed to set up cache at " + repositoryCachePath + ": " + e.getMessage()));
387-
}
366+
.getRelative(DEFAULT_CACHE_LOCATION));
367+
}
368+
// Note that the repo contents cache stuff has to happen _after_ the repo cache stuff, because
369+
// the specific settings about the repo contents cache might overwrite the repo cache
370+
// settings. In particular, if `--repo_contents_cache` is not set (it's null), we use whatever
371+
// default set by `repositoryCache.setPath(...)`.
372+
if (repoOptions.repoContentsCache != null) {
373+
repositoryCache.getRepoContentsCache().setPath(toPath(repoOptions.repoContentsCache, env));
374+
}
375+
Path repoContentsCachePath = repositoryCache.getRepoContentsCache().getPath();
376+
if (repoContentsCachePath != null
377+
&& env.getWorkspace() != null
378+
&& repoContentsCachePath.startsWith(env.getWorkspace())) {
379+
// Having the repo contents cache inside the workspace is very dangerous. During the
380+
// lifetime of a Bazel invocation, we treat files inside the workspace as immutable. This
381+
// can cause mysterious failures if we write files inside the workspace during the
382+
// invocation, as is often the case with the repo contents cache.
383+
// TODO: wyv@ - This is a crude check that disables some use cases (such as when the output
384+
// base itself is inside the main repo). Investigate a better check.
385+
throw new AbruptExitException(
386+
detailedExitCode(
387+
"""
388+
The repo contents cache [%s] is inside the workspace [%s]. This can cause spurious \
389+
failures. Disable the repo contents cache with `--repo_contents_cache=`, or \
390+
specify `--repo_contents_cache=<path outside the workspace>`.
391+
"""
392+
.formatted(repoContentsCachePath, env.getWorkspace()),
393+
Code.BAD_REPO_CONTENTS_CACHE));
388394
}
389395

390396
try {
@@ -630,6 +636,18 @@ private String getAbsolutePath(String path, CommandEnvironment env) {
630636
return path;
631637
}
632638

639+
/**
640+
* An empty path fragment is turned into {@code null}; otherwise, it's treated as relative to the
641+
* workspace root.
642+
*/
643+
@Nullable
644+
private Path toPath(PathFragment path, CommandEnvironment env) {
645+
if (path.isEmpty()) {
646+
return null;
647+
}
648+
return env.getBlazeWorkspace().getWorkspace().getRelative(path);
649+
}
650+
633651
@Override
634652
public ImmutableList<Injected> getPrecomputedValues() {
635653
Instant now = clock.now();

src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ java_library(
5353
name = "vendor",
5454
srcs = ["VendorManager.java"],
5555
deps = [
56+
"//src/main/java/com/google/devtools/build/lib/bazel/repository/cache",
5657
"//src/main/java/com/google/devtools/build/lib/bazel/repository/downloader",
5758
"//src/main/java/com/google/devtools/build/lib/cmdline",
5859
"//src/main/java/com/google/devtools/build/lib/profiler",

src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorManager.java

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import com.google.common.collect.ImmutableList;
1919
import com.google.common.hash.HashCode;
2020
import com.google.common.hash.Hasher;
21+
import com.google.devtools.build.lib.bazel.repository.cache.RepoContentsCache;
2122
import com.google.devtools.build.lib.bazel.repository.downloader.Checksum;
2223
import com.google.devtools.build.lib.cmdline.RepositoryName;
2324
import com.google.devtools.build.lib.profiler.Profiler;
@@ -75,30 +76,56 @@ public void vendorRepos(Path externalRepoRoot, ImmutableList<RepositoryName> rep
7576
continue;
7677
}
7778

78-
// At this point, the repo should exist under external dir, but check if the vendor src is
79-
// already up-to-date.
8079
Path markerUnderExternal = externalRepoRoot.getChild(repo.getMarkerFileName());
8180
Path markerUnderVendor = vendorDirectory.getChild(repo.getMarkerFileName());
82-
if (isRepoUpToDate(markerUnderVendor, markerUnderExternal)) {
81+
// If the marker file doesn't exist under outputBase/external, then the repo is either local
82+
// (which cannot be in this case since local repos aren't vendored) or in the repo contents
83+
// cache.
84+
boolean isCached = !markerUnderExternal.exists();
85+
Path actualMarkerFile;
86+
if (isCached) {
87+
Path cacheRepoDir = repoUnderExternal.resolveSymbolicLinks();
88+
actualMarkerFile =
89+
cacheRepoDir.replaceName(
90+
cacheRepoDir.getBaseName() + RepoContentsCache.RECORDED_INPUTS_SUFFIX);
91+
} else {
92+
actualMarkerFile = markerUnderExternal;
93+
}
94+
95+
// At this point, the repo should exist under external dir, but check if the vendor src is
96+
// already up-to-date.
97+
if (isRepoUpToDate(markerUnderVendor, actualMarkerFile)) {
8398
continue;
8499
}
85100

86-
// Actually vendor the repo:
101+
// Actually vendor the repo. If the repo is cached, copy it; otherwise move it.
87102
// 1. Clean up existing marker file and vendor dir.
88103
markerUnderVendor.delete();
89104
repoUnderVendor.deleteTree();
90105
repoUnderVendor.createDirectory();
91-
// 2. Move the marker file to a temporary one under vendor dir.
92-
Path tMarker = vendorDirectory.getChild(repo.getMarkerFileName() + ".tmp");
93-
FileSystemUtils.moveFile(markerUnderExternal, tMarker);
94-
// 3. Move the external repo to vendor dir. It's fine if this step fails or is interrupted,
95-
// because the marker file under external is gone anyway.
96-
FileSystemUtils.moveTreesBelow(repoUnderExternal, repoUnderVendor);
106+
// 2. Copy/move the marker file to a temporary one under vendor dir.
107+
Path temporaryMarker = vendorDirectory.getChild(repo.getMarkerFileName() + ".tmp");
108+
if (isCached) {
109+
FileSystemUtils.copyFile(actualMarkerFile, temporaryMarker);
110+
} else {
111+
FileSystemUtils.moveFile(actualMarkerFile, temporaryMarker);
112+
}
113+
// 3. Move/copy the external repo to vendor dir. Note that, in the "move" case, it's fine if
114+
// this step fails or is interrupted, because the marker file under external is gone anyway.
115+
if (isCached) {
116+
FileSystemUtils.copyTreesBelow(repoUnderExternal.resolveSymbolicLinks(), repoUnderVendor);
117+
} else {
118+
try {
119+
repoUnderExternal.renameTo(repoUnderVendor);
120+
} catch (IOException e) {
121+
FileSystemUtils.moveTreesBelow(repoUnderExternal, repoUnderVendor);
122+
}
123+
}
97124
// 4. Re-plant symlinks pointing a path under the external root to a relative path
98125
// to make sure the vendor src keep working after being moved or output base changed
99126
replantSymlinks(repoUnderVendor, externalRepoRoot);
100-
// 5. Rename the temporary marker file after the move is done.
101-
tMarker.renameTo(markerUnderVendor);
127+
// 5. Rename the temporary marker file after the move/copy is done.
128+
temporaryMarker.renameTo(markerUnderVendor);
102129
// 6. Leave a symlink in external dir to keep things working.
103130
repoUnderExternal.deleteTree();
104131
FileSystemUtils.ensureSymbolicLink(repoUnderExternal, repoUnderVendor);

src/main/java/com/google/devtools/build/lib/bazel/repository/LocalConfigPlatformFunction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public Object getResolvedInformation(XattrProvider xattrProvider) {
9696
});
9797

9898
// Return the needed info.
99-
return new FetchResult(ImmutableMap.of());
99+
return new FetchResult(ImmutableMap.of(), Reproducibility.YES);
100100
}
101101

102102
private static String workspaceFileContent(String repositoryName) {

src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryOptions.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,26 @@ public class RepositoryOptions extends OptionsBase {
4343
"Specifies the cache location of the downloaded values obtained "
4444
+ "during the fetching of external repositories. An empty string "
4545
+ "as argument requests the cache to be disabled, "
46-
+ "otherwise the default of '<output_user_root>/cache/repos/v1' is used")
47-
public PathFragment experimentalRepositoryCache;
46+
+ "otherwise the default of '<--output_user_root>/cache/repos/v1' is used")
47+
public PathFragment repositoryCache;
48+
49+
@Option(
50+
name = "repo_contents_cache",
51+
oldName = "repository_contents_cache",
52+
oldNameWarning = false,
53+
defaultValue = "null",
54+
documentationCategory = OptionDocumentationCategory.BAZEL_CLIENT_OPTIONS,
55+
effectTags = {OptionEffectTag.BAZEL_INTERNAL_CONFIGURATION},
56+
converter = OptionsUtils.PathFragmentConverter.class,
57+
help =
58+
"""
59+
Specifies the location of the repo contents cache, which contains fetched repo \
60+
directories shareable across workspaces. An empty string as argument requests the repo \
61+
contents cache to be disabled, otherwise the default of '<--repository_cache>/contents' \
62+
is used. Note that this means setting '--repository_cache=' would by default disable the \
63+
repo contents cache as well, unless '--repo_contents_cache=<some_path>' is also set.
64+
""")
65+
public PathFragment repoContentsCache;
4866

4967
@Option(
5068
name = "registry",

src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedEvent.java

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ public class RepositoryResolvedEvent implements ResolvedEvent {
6767
private final boolean informationReturned;
6868
private final String message;
6969

70-
public RepositoryResolvedEvent(Rule rule, StructImpl attrs, Path outputDirectory, Object result) {
70+
public RepositoryResolvedEvent(
71+
Rule rule, StructImpl attrs, Path outputDirectory, Map<?, ?> result) {
7172
this.outputDirectory = outputDirectory;
7273

7374
String originalClass =
@@ -102,17 +103,17 @@ public RepositoryResolvedEvent(Rule rule, StructImpl attrs, Path outputDirectory
102103

103104
repositoryBuilder.put(ResolvedFileValue.RULE_CLASS, originalClass);
104105

105-
if (result == Starlark.NONE) {
106+
if (result.isEmpty()) {
106107
// Rule claims to be already reproducible, so wants to be called as is.
107108
repositoryBuilder.put(ResolvedFileValue.ATTRIBUTES, origAttr);
108109
this.informationReturned = false;
109110
this.message = "Repository rule '" + rule.getName() + "' finished.";
110-
} else if (result instanceof Map) {
111+
} else {
111112
// Rule claims that the returned (probably changed) arguments are a reproducible
112113
// version of itself.
113114
repositoryBuilder.put(ResolvedFileValue.ATTRIBUTES, result);
114115
Pair<Map<String, Object>, List<String>> diff =
115-
compare(origAttr, defaults.buildOrThrow(), (Map<?, ?>) result);
116+
compare(origAttr, defaults.buildOrThrow(), result);
116117
if (diff.getFirst().isEmpty() && diff.getSecond().isEmpty()) {
117118
this.informationReturned = false;
118119
this.message = "Repository rule '" + rule.getName() + "' finished.";
@@ -143,13 +144,6 @@ public RepositoryResolvedEvent(Rule rule, StructImpl attrs, Path outputDirectory
143144
+ Starlark.repr(diff.getSecond());
144145
}
145146
}
146-
} else {
147-
// TODO(aehlig): handle strings specially to allow encodings of the former
148-
// values to be accepted as well.
149-
resolvedInformationBuilder.put(ResolvedFileValue.REPOSITORIES, result);
150-
repositoryBuilder = null; // We already added the REPOSITORIES entry
151-
this.informationReturned = true;
152-
this.message = "Repository rule '" + rule.getName() + "' returned: " + result;
153147
}
154148

155149
this.name = rule.getName();

src/main/java/com/google/devtools/build/lib/bazel/repository/cache/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ java_library(
2121
"RepositoryCache.java",
2222
],
2323
deps = [
24+
"//src/main/java/com/google/devtools/build/lib/util:file_system_lock",
2425
"//src/main/java/com/google/devtools/build/lib/vfs",
2526
"//src/main/java/com/google/devtools/build/lib/vfs/bazel",
2627
"//third_party:guava",

src/main/java/com/google/devtools/build/lib/bazel/repository/cache/RepoContentsCache.java

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,29 +14,118 @@
1414

1515
package com.google.devtools.build.lib.bazel.repository.cache;
1616

17+
import static com.google.common.collect.ImmutableList.toImmutableList;
18+
1719
import com.google.common.base.Preconditions;
20+
import com.google.common.collect.ImmutableList;
21+
import com.google.devtools.build.lib.util.FileSystemLock;
22+
import com.google.devtools.build.lib.util.FileSystemLock.LockMode;
23+
import com.google.devtools.build.lib.vfs.FileSystemUtils;
1824
import com.google.devtools.build.lib.vfs.Path;
25+
import java.io.IOException;
26+
import java.nio.charset.StandardCharsets;
27+
import java.util.Comparator;
1928
import javax.annotation.Nullable;
2029

2130
/** A cache directory that stores the contents of fetched repos across different workspaces. */
2231
public class RepoContentsCache {
32+
public static final String RECORDED_INPUTS_SUFFIX = ".recorded_inputs";
33+
2334
@Nullable private Path path;
2435

36+
// TODO: wyv@ - implement garbage collection
37+
2538
public void setPath(@Nullable Path path) {
2639
this.path = path;
2740
}
2841

42+
@Nullable
43+
public Path getPath() {
44+
return path;
45+
}
46+
2947
public boolean isEnabled() {
3048
return path != null;
3149
}
3250

33-
public Path recordedInputsFile(String predeclaredInputHash) {
51+
/** A candidate repo in the repo contents cache for one predeclared input hash. */
52+
public record CandidateRepo(Path recordedInputsFile, Path contentsDir) {
53+
private static CandidateRepo fromRecordedInputsFile(Path recordedInputsFile) {
54+
String recordedInputsFileBaseName = recordedInputsFile.getBaseName();
55+
String contentsDirBaseName =
56+
recordedInputsFileBaseName.substring(
57+
0, recordedInputsFileBaseName.length() - RECORDED_INPUTS_SUFFIX.length());
58+
return new CandidateRepo(
59+
recordedInputsFile, recordedInputsFile.replaceName(contentsDirBaseName));
60+
}
61+
}
62+
63+
/** Returns the list of candidate repos for the given predeclared input hash. */
64+
public ImmutableList<CandidateRepo> getCandidateRepos(String predeclaredInputHash) {
3465
Preconditions.checkState(path != null);
35-
return path.getRelative(predeclaredInputHash + ".recorded_inputs");
66+
Path entryDir = path.getRelative(predeclaredInputHash);
67+
try {
68+
return entryDir.getDirectoryEntries().stream()
69+
.filter(path -> path.getBaseName().endsWith(RECORDED_INPUTS_SUFFIX))
70+
// We're just sorting for consistency here. (Note that "10.recorded_inputs" would sort
71+
// before "1.recorded_inputs".) If necessary, we could use some sort of heuristics here
72+
// to speed up the subsequent up-to-date-ness checking.
73+
.sorted(Comparator.comparing(Path::getBaseName))
74+
.map(CandidateRepo::fromRecordedInputsFile)
75+
.collect(toImmutableList());
76+
} catch (IOException e) {
77+
// This should only happen if `entryDir` doesn't exist yet or is not a directory. Either way,
78+
// don't outright fail; just treat it as if the cache is empty.
79+
return ImmutableList.of();
80+
}
3681
}
3782

38-
public Path contentsDir(String predeclaredInputHash) {
83+
/** Moves a freshly fetched repo into the contents cache. */
84+
public void moveToCache(
85+
Path fetchedRepoDir, Path fetchedRepoMarkerFile, String predeclaredInputHash)
86+
throws IOException {
3987
Preconditions.checkState(path != null);
40-
return path.getRelative(predeclaredInputHash);
88+
89+
Path entryDir = path.getRelative(predeclaredInputHash);
90+
if (!entryDir.isDirectory()) {
91+
entryDir.delete();
92+
}
93+
String counter = getNextCounterInDir(entryDir);
94+
Path cacheRecordedInputsFile = entryDir.getChild(counter + RECORDED_INPUTS_SUFFIX);
95+
Path cacheRepoDir = entryDir.getChild(counter);
96+
97+
cacheRepoDir.deleteTree();
98+
cacheRepoDir.createDirectoryAndParents();
99+
// Move the fetched marker file to a temp location, so that if following operations fail, both
100+
// the fetched repo and the cache locations are considered out-of-date.
101+
Path temporaryMarker = entryDir.getChild(counter + ".temp_recorded_inputs");
102+
FileSystemUtils.moveFile(fetchedRepoMarkerFile, temporaryMarker);
103+
// Now perform the move, and afterwards, restore the marker file.
104+
try {
105+
fetchedRepoDir.renameTo(cacheRepoDir);
106+
} catch (IOException e) {
107+
FileSystemUtils.moveTreesBelow(fetchedRepoDir, cacheRepoDir);
108+
}
109+
temporaryMarker.renameTo(cacheRecordedInputsFile);
110+
// Set up a symlink at the original fetched repo dir path.
111+
fetchedRepoDir.deleteTree();
112+
FileSystemUtils.ensureSymbolicLink(fetchedRepoDir, cacheRepoDir);
113+
}
114+
115+
private static String getNextCounterInDir(Path entryDir) throws IOException {
116+
Path counterFile = entryDir.getRelative("counter");
117+
try (var lock = FileSystemLock.get(entryDir.getRelative("lock"), LockMode.EXCLUSIVE)) {
118+
int c = 0;
119+
if (counterFile.exists()) {
120+
try {
121+
c = Integer.parseInt(FileSystemUtils.readContent(counterFile, StandardCharsets.UTF_8));
122+
} catch (NumberFormatException e) {
123+
// ignored
124+
}
125+
}
126+
String counter = Integer.toString(c + 1);
127+
FileSystemUtils.writeContent(counterFile, StandardCharsets.UTF_8, counter);
128+
return counter;
129+
}
41130
}
42131
}

0 commit comments

Comments
 (0)