5
5
import logging
6
6
import math
7
7
import os
8
+ from itertools import chain
8
9
from functools import lru_cache
9
10
from typing import TYPE_CHECKING
10
11
16
17
if not TYPE_CHECKING :
17
18
import json
18
19
19
- from . .asyn import AsyncFileSystem
20
- from . .callbacks import DEFAULT_CALLBACK
21
- from . .core import filesystem , open , split_protocol
22
- from . .utils import isfilelike , merge_offset_ranges , other_paths
20
+ from fsspec .asyn import AsyncFileSystem
21
+ from fsspec .callbacks import DEFAULT_CALLBACK
22
+ from fsspec .core import filesystem , open , split_protocol
23
+ from fsspec .utils import isfilelike , merge_offset_ranges , other_paths
23
24
24
25
logger = logging .getLogger ("fsspec.reference" )
25
26
@@ -131,7 +132,6 @@ def __init__(
131
132
self .out_root = out_root or self .root
132
133
self .cat_thresh = categorical_threshold
133
134
self .cache_size = cache_size
134
- self .dirs = None
135
135
self .url = self .root + "/{field}/refs.{record}.parq"
136
136
# TODO: derive fs from `root`
137
137
self .fs = fsspec .filesystem ("file" ) if fs is None else fs
@@ -195,32 +195,36 @@ def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
195
195
fs .pipe ("/" .join ([root , ".zmetadata" ]), json .dumps (met ).encode ())
196
196
return LazyReferenceMapper (root , fs , ** kwargs )
197
197
198
- def listdir (self , basename = True ):
198
+ @lru_cache ()
199
+ def listdir (self ):
199
200
"""List top-level directories"""
200
- # cache me?
201
- if self .dirs is None :
202
- dirs = [p .split ("/" , 1 )[0 ] for p in self .zmetadata ]
203
- self .dirs = {p for p in dirs if p and not p .startswith ("." )}
204
- listing = self .dirs
205
- if basename :
206
- listing = [os .path .basename (path ) for path in listing ]
207
- return listing
201
+ dirs = (p .rsplit ("/" , 1 )[0 ] for p in self .zmetadata if not p .startswith (".z" ))
202
+ return set (dirs )
208
203
209
204
def ls (self , path = "" , detail = True ):
210
205
"""Shortcut file listings"""
211
- if not path :
212
- dirnames = self .listdir ()
213
- others = set (
214
- [".zmetadata" ]
215
- + [name for name in self .zmetadata if "/" not in name ]
216
- + [name for name in self ._items if "/" not in name ]
217
- )
206
+ path = path .rstrip ("/" )
207
+ pathdash = path + "/" if path else ""
208
+ dirnames = self .listdir ()
209
+ dirs = [
210
+ d
211
+ for d in dirnames
212
+ if d .startswith (pathdash ) and "/" not in d .lstrip (pathdash )
213
+ ]
214
+ if dirs :
215
+ others = {
216
+ f
217
+ for f in chain (
218
+ [".zmetadata" ],
219
+ (name for name in self .zmetadata ),
220
+ (name for name in self ._items ),
221
+ )
222
+ if f .startswith (pathdash ) and "/" not in f .lstrip (pathdash )
223
+ }
218
224
if detail is False :
219
- others .update (dirnames )
225
+ others .update (dirs )
220
226
return sorted (others )
221
- dirinfo = [
222
- {"name" : name , "type" : "directory" , "size" : 0 } for name in dirnames
223
- ]
227
+ dirinfo = [{"name" : name , "type" : "directory" , "size" : 0 } for name in dirs ]
224
228
fileinfo = [
225
229
{
226
230
"name" : name ,
@@ -234,10 +238,7 @@ def ls(self, path="", detail=True):
234
238
for name in others
235
239
]
236
240
return sorted (dirinfo + fileinfo , key = lambda s : s ["name" ])
237
- parts = path .split ("/" , 1 )
238
- if len (parts ) > 1 :
239
- raise FileNotFoundError ("Cannot list within directories right now" )
240
- field = parts [0 ]
241
+ field = path
241
242
others = set (
242
243
[name for name in self .zmetadata if name .startswith (f"{ path } /" )]
243
244
+ [name for name in self ._items if name .startswith (f"{ path } /" )]
0 commit comments