Skip to content

Issue regarding "9.1 Download pretrained transformers automatically" #3

Open
@CaterinaFabbri

Description

@CaterinaFabbri

While running def translate_a_single_sentence(translation_config): I have encountered an error in which the file en-de.tgz is not recognized as a gzip file. How could I do?
Below, it is reported the snippet of the error :
` downloading en-de.tgz
C:\Users..\pytorch-original-transformer\data\iwslt\en-de.tgz: 97.4kB [00:00, 1.60MB/s]

BadGzipFile Traceback (most recent call last)
~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in gzopen(cls, name, mode, fileobj, compresslevel, **kwargs)
1669 try:
-> 1670 t = cls.taropen(name, mode, fileobj, **kwargs)
1671 except OSError:

~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in taropen(cls, name, mode, fileobj, **kwargs)
1646 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
-> 1647 return cls(name, mode, fileobj, **kwargs)
1648

~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in init(self, name, mode, fileobj, format, tarinfo, dereference, ignore_zeros, encoding, errors, pax_headers, debug, errorlevel, copybufsize)
1509 self.firstmember = None
-> 1510 self.firstmember = self.next()
1511

~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in next(self)
2310 try:
-> 2311 tarinfo = self.tarinfo.fromtarfile(self)
2312 except EOFHeaderError as e:

~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in fromtarfile(cls, tarfile)
1101 """
-> 1102 buf = tarfile.fileobj.read(BLOCKSIZE)
1103 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)

~\anaconda3\envs\pytorch-transformer\lib\gzip.py in read(self, size)
291 raise OSError(errno.EBADF, "read() on write-only GzipFile object")
--> 292 return self._buffer.read(size)
293

~\anaconda3\envs\pytorch-transformer\lib_compression.py in readinto(self, b)
67 with memoryview(b) as view, view.cast("B") as byte_view:
---> 68 data = self.read(len(byte_view))
69 byte_view[:len(data)] = data

~\anaconda3\envs\pytorch-transformer\lib\gzip.py in read(self, size)
478 self._init_read()
--> 479 if not self._read_gzip_header():
480 self._size = self._pos

~\anaconda3\envs\pytorch-transformer\lib\gzip.py in _read_gzip_header(self)
426 if magic != b'\037\213':
--> 427 raise BadGzipFile('Not a gzipped file (%r)' % magic)
428

BadGzipFile: Not a gzipped file (b'<!')

During handling of the above exception, another exception occurred:

ReadError Traceback (most recent call last)
in
85
86 # Translate the given source sentence
---> 87 translate_a_single_sentence(translation_config)

in translate_a_single_sentence(translation_config)
5 print(2)
6 # Step 1: Prepare the field processor (tokenizer, numericalizer)
----> 7 _, _, src_field_processor, trg_field_processor = get_datasets_and_vocabs(
8 translation_config['dataset_path'],
9 translation_config['language_direction'],

in get_datasets_and_vocabs(dataset_path, language_direction, use_iwslt, use_caching_mechanism)
41 dataset_split_fn = datasets.IWSLT.splits if use_iwslt else datasets.WMT14.splits
42
---> 43 train_dataset, val_dataset, test_dataset = dataset_split_fn(
44 exts=(src_ext, trg_ext),
45 fields=fields,

~\anaconda3\envs\pytorch-transformer\lib\site-packages\torchtext\datasets\translation.py in splits(cls, exts, fields, root, train, validation, test, **kwargs)
142 cls.urls = [cls.base_url.format(exts[0][1:], exts[1][1:], cls.dirname)]
143 check = os.path.join(root, cls.name, cls.dirname)
--> 144 path = cls.download(root, check=check)
145
146 train = '.'.join([train, cls.dirname])

~\anaconda3\envs\pytorch-transformer\lib\site-packages\torchtext\data\dataset.py in download(cls, root, check)
189 # tarfile cannot handle bare .gz files
190 elif ext == '.tgz' or ext == '.gz' and ext_inner == '.tar':
--> 191 with tarfile.open(zpath, 'r:gz') as tar:
192 dirs = [member for member in tar.getmembers()]
193 tar.extractall(path=path, members=dirs)

~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in open(cls, name, mode, fileobj, bufsize, **kwargs)
1615 else:
1616 raise CompressionError("unknown compression type %r" % comptype)
-> 1617 return func(name, filemode, fileobj, **kwargs)
1618
1619 elif "|" in mode:

~\anaconda3\envs\pytorch-transformer\lib\tarfile.py in gzopen(cls, name, mode, fileobj, compresslevel, **kwargs)
1672 fileobj.close()
1673 if mode == 'r':
-> 1674 raise ReadError("not a gzip file")
1675 raise
1676 except:

ReadError: not a gzip file`

Thank you very much!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions