18
18
19
19
import com .google .common .base .Preconditions ;
20
20
import com .google .common .collect .ImmutableList ;
21
+ import com .google .devtools .build .lib .server .IdleTask ;
22
+ import com .google .devtools .build .lib .server .IdleTaskException ;
21
23
import com .google .devtools .build .lib .util .FileSystemLock ;
22
24
import com .google .devtools .build .lib .util .FileSystemLock .LockMode ;
25
+ import com .google .devtools .build .lib .vfs .Dirent ;
23
26
import com .google .devtools .build .lib .vfs .FileSystemUtils ;
24
27
import com .google .devtools .build .lib .vfs .Path ;
28
+ import com .google .devtools .build .lib .vfs .Symlinks ;
25
29
import java .io .IOException ;
26
30
import java .nio .charset .StandardCharsets ;
31
+ import java .time .Duration ;
32
+ import java .time .Instant ;
27
33
import java .util .Comparator ;
34
+ import java .util .UUID ;
28
35
import javax .annotation .Nullable ;
29
36
30
- /** A cache directory that stores the contents of fetched repos across different workspaces. */
31
- public class RepoContentsCache {
37
+ /**
38
+ * A cache directory that stores the contents of fetched repos across different workspaces.
39
+ *
40
+ * <p>The repo contents cache is laid out in two layers. The first layer is a lookup by "predeclared
41
+ * inputs hash", which is defined as the hash of all predeclared inputs of a repo (such as
42
+ * transitive bzl digest, repo attrs, starlark semantics, etc). Each distinct predeclared inputs
43
+ * hash is its own entry directory in the first layer.
44
+ *
45
+ * <p>Inside each entry directory are pairs of directories and files {@code <N, N.recorded_inputs>}
46
+ * where {@code N} is an integer. The file {@code N.recorded_inputs} contains the recorded inputs
47
+ * and their values of a cached repo, and the directory {@code N} contains the cached repo contents.
48
+ * There is also a file named {@code counter} that stores the next available {@code N} for this
49
+ * entry directory, and a file named {@code lock} to ensure exclusive access to the {@code counter}
50
+ * file.
51
+ *
52
+ * <p>On a cache hit (that is, the predeclared inputs hash matches, and recorded inputs are
53
+ * up-to-date), the recorded inputs file has its mtime updated. Cached repos whose recorded inputs
54
+ * file is older than {@code --repo_contents_cache_gc_max_age} are garbage collected.
55
+ */
56
+ public final class RepoContentsCache {
32
57
public static final String RECORDED_INPUTS_SUFFIX = ".recorded_inputs" ;
33
58
34
- @ Nullable private Path path ;
59
+ /**
60
+ * The path to a "lock" file, relative to the root of the repo contents cache. While a shared lock
61
+ * is held, no garbage collection should happen. While an exclusive lock is held, no reads should
62
+ * happen.
63
+ */
64
+ public static final String LOCK_PATH = "gc_lock" ;
65
+
66
+ /**
67
+ * The path to a trash directory relative to the root of the repo contents cache.
68
+ *
69
+ * <p>Since deleting entire directories could take a bit of time, we create a trash directory
70
+ * where we move the garbage directories to (which should be very fast). Then we can delete this
71
+ * trash directory altogether at the end. This makes the GC process safe against being interrupted
72
+ * in the middle (any undeleted trash will get deleted by the next GC). Also be sure to name this
73
+ * trashDir something that couldn't ever be a predeclared inputs hash (starting with an underscore
74
+ * should suffice).
75
+ */
76
+ public static final String TRASH_PATH = "_trash" ;
35
77
36
- // TODO: wyv@ - implement garbage collection
78
+ @ Nullable private Path path ;
79
+ @ Nullable private FileSystemLock sharedLock ;
37
80
38
81
public void setPath (@ Nullable Path path ) {
39
82
this .path = path ;
@@ -58,6 +101,15 @@ private static CandidateRepo fromRecordedInputsFile(Path recordedInputsFile) {
58
101
return new CandidateRepo (
59
102
recordedInputsFile , recordedInputsFile .replaceName (contentsDirBaseName ));
60
103
}
104
+
105
+ /** Updates the mtime of the recorded inputs file, to delay GC for this entry. */
106
+ public void touch () {
107
+ try {
108
+ recordedInputsFile .setLastModifiedTime (Path .NOW_SENTINEL_TIME );
109
+ } catch (IOException e ) {
110
+ // swallow the exception. it's not a huge deal.
111
+ }
112
+ }
61
113
}
62
114
63
115
/** Returns the list of candidate repos for the given predeclared input hash. */
@@ -80,8 +132,19 @@ public ImmutableList<CandidateRepo> getCandidateRepos(String predeclaredInputHas
80
132
}
81
133
}
82
134
83
- /** Moves a freshly fetched repo into the contents cache. */
84
- public void moveToCache (
135
+ private Path ensureTrashDir () throws IOException {
136
+ Preconditions .checkState (path != null );
137
+ Path trashDir = path .getChild (TRASH_PATH );
138
+ trashDir .createDirectoryAndParents ();
139
+ return trashDir ;
140
+ }
141
+
142
+ /**
143
+ * Moves a freshly fetched repo into the contents cache.
144
+ *
145
+ * @return the repo dir in the contents cache.
146
+ */
147
+ public Path moveToCache (
85
148
Path fetchedRepoDir , Path fetchedRepoMarkerFile , String predeclaredInputHash )
86
149
throws IOException {
87
150
Preconditions .checkState (path != null );
@@ -98,7 +161,7 @@ public void moveToCache(
98
161
cacheRepoDir .createDirectoryAndParents ();
99
162
// Move the fetched marker file to a temp location, so that if following operations fail, both
100
163
// the fetched repo and the cache locations are considered out-of-date.
101
- Path temporaryMarker = entryDir .getChild (counter + ".temp_recorded_inputs" );
164
+ Path temporaryMarker = ensureTrashDir () .getChild (UUID . randomUUID (). toString () );
102
165
FileSystemUtils .moveFile (fetchedRepoMarkerFile , temporaryMarker );
103
166
// Now perform the move, and afterwards, restore the marker file.
104
167
try {
@@ -110,6 +173,7 @@ public void moveToCache(
110
173
// Set up a symlink at the original fetched repo dir path.
111
174
fetchedRepoDir .deleteTree ();
112
175
FileSystemUtils .ensureSymbolicLink (fetchedRepoDir , cacheRepoDir );
176
+ return cacheRepoDir ;
113
177
}
114
178
115
179
private static String getNextCounterInDir (Path entryDir ) throws IOException {
@@ -128,4 +192,80 @@ private static String getNextCounterInDir(Path entryDir) throws IOException {
128
192
return counter ;
129
193
}
130
194
}
195
+
196
+ public void acquireSharedLock () throws IOException {
197
+ Preconditions .checkState (path != null );
198
+ Preconditions .checkState (sharedLock == null , "this process already has the shared lock" );
199
+ sharedLock = FileSystemLock .get (path .getRelative (LOCK_PATH ), LockMode .SHARED );
200
+ }
201
+
202
+ public void releaseSharedLock () throws IOException {
203
+ Preconditions .checkState (sharedLock != null );
204
+ sharedLock .close ();
205
+ sharedLock = null ;
206
+ }
207
+
208
+ /**
209
+ * Creates a garbage collection {@link IdleTask} that deletes cached repos who are last accessed
210
+ * more than {@code maxAge} ago, with an idle delay of {@code idleDelay}.
211
+ */
212
+ public IdleTask createGcIdleTask (Duration maxAge , Duration idleDelay ) {
213
+ Preconditions .checkState (path != null );
214
+ return new IdleTask () {
215
+ @ Override
216
+ public String displayName () {
217
+ return "Repo contents cache garbage collection" ;
218
+ }
219
+
220
+ @ Override
221
+ public Duration delay () {
222
+ return idleDelay ;
223
+ }
224
+
225
+ @ Override
226
+ public void run () throws InterruptedException , IdleTaskException {
227
+ try {
228
+ Preconditions .checkState (path != null );
229
+ // If we can't grab the lock, abort GC. Someone will come along later.
230
+ try (var lock = FileSystemLock .tryGet (path .getRelative (LOCK_PATH ), LockMode .EXCLUSIVE )) {
231
+ runGc (maxAge );
232
+ }
233
+ // Empty the trash dir outside the lock. No one is reading from these files, so it should
234
+ // be safe. At worst, multiple servers performing GC will try to delete the same files,
235
+ // but whatever.
236
+ path .getChild (TRASH_PATH ).deleteTreesBelow ();
237
+ } catch (IOException e ) {
238
+ throw new IdleTaskException (e );
239
+ }
240
+ }
241
+ };
242
+ }
243
+
244
+ private void runGc (Duration maxAge ) throws InterruptedException , IOException {
245
+ path .setLastModifiedTime (Path .NOW_SENTINEL_TIME );
246
+ Instant cutoff = Instant .ofEpochMilli (path .getLastModifiedTime ()).minus (maxAge );
247
+ Path trashDir = ensureTrashDir ();
248
+
249
+ for (Dirent dirent : path .readdir (Symlinks .NOFOLLOW )) {
250
+ if (dirent .getType () != Dirent .Type .DIRECTORY || dirent .getName ().equals (TRASH_PATH )) {
251
+ continue ;
252
+ }
253
+ for (Path recordedInputsFile : path .getChild (dirent .getName ()).getDirectoryEntries ()) {
254
+ if (!recordedInputsFile .getBaseName ().endsWith (RECORDED_INPUTS_SUFFIX )) {
255
+ continue ;
256
+ }
257
+ if (Thread .interrupted ()) {
258
+ throw new InterruptedException ();
259
+ }
260
+
261
+ if (Instant .ofEpochMilli (recordedInputsFile .getLastModifiedTime ()).isBefore (cutoff )) {
262
+ // Sorry buddy, you're out.
263
+ recordedInputsFile .delete ();
264
+ var repoDir = CandidateRepo .fromRecordedInputsFile (recordedInputsFile ).contentsDir ;
265
+ // Use a UUID to avoid clashes.
266
+ repoDir .renameTo (trashDir .getChild (UUID .randomUUID ().toString ()));
267
+ }
268
+ }
269
+ }
270
+ }
131
271
}
0 commit comments