Skip to content

Commit f2a5877

Browse files
committed
transparent reading from gzipped proto and text files
1 parent 65125e2 commit f2a5877

File tree

3 files changed

+181
-65
lines changed

3 files changed

+181
-65
lines changed

ortools/base/BUILD.bazel

+1
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ cc_library(
223223
"@abseil-cpp//absl/status",
224224
"@abseil-cpp//absl/strings",
225225
"@com_google_protobuf//:protobuf",
226+
"@zlib",
226227
],
227228
)
228229

ortools/base/file.cc

+160-44
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include <sys/stat.h>
1717
#include <sys/types.h>
18+
#include <zlib.h>
1819

1920
#include <cstdint>
2021
#if defined(_MSC_VER)
@@ -41,39 +42,151 @@
4142
#include "google/protobuf/message.h"
4243
#include "google/protobuf/text_format.h"
4344

44-
File::File(FILE* descriptor, absl::string_view name)
45-
: f_(descriptor), name_(name) {}
45+
namespace {
46+
enum class Format {
47+
NORMAL_FILE,
48+
GZIP_FILE,
49+
};
4650

47-
size_t File::Size() {
48-
struct stat f_stat;
49-
stat(name_.c_str(), &f_stat);
50-
return f_stat.st_size;
51+
static Format GetFormatFromName(absl::string_view name) {
52+
const int size = name.size();
53+
if (size > 4 && name.substr(size - 3) == ".gz") {
54+
return Format::GZIP_FILE;
55+
} else {
56+
return Format::NORMAL_FILE;
57+
}
5158
}
5259

53-
bool File::Flush() { return fflush(f_) == 0; }
60+
class CFile : public File {
61+
public:
62+
CFile(FILE* c_file, absl::string_view name) : File(name), f_(c_file) {}
63+
virtual ~CFile() = default;
64+
65+
// Reads "size" bytes to buf from file, buf should be pre-allocated.
66+
size_t Read(void* buf, size_t size) override {
67+
return fread(buf, 1, size, f_);
68+
}
5469

55-
// Deletes "this" on closing.
56-
absl::Status File::Close(int /*flags*/) {
57-
absl::Status status;
58-
if (f_ == nullptr) {
70+
// Writes "size" bytes of buf to file, buf should be pre-allocated.
71+
size_t Write(const void* buf, size_t size) override {
72+
return fwrite(buf, 1, size, f_);
73+
}
74+
75+
// Closes the file and delete the underlying FILE* descriptor.
76+
absl::Status Close(int flags) override {
77+
absl::Status status;
78+
if (f_ == nullptr) {
79+
return status;
80+
}
81+
if (fclose(f_) == 0) {
82+
f_ = nullptr;
83+
} else {
84+
status.Update(
85+
absl::Status(absl::StatusCode::kInvalidArgument,
86+
absl::StrCat("Could not close file '", name_, "'")));
87+
}
88+
delete this;
5989
return status;
6090
}
61-
if (fclose(f_) == 0) {
62-
f_ = nullptr;
63-
} else {
64-
status.Update(
65-
absl::Status(absl::StatusCode::kInvalidArgument,
66-
absl::StrCat("Could not close file '", name_, "'")));
91+
92+
// Flushes buffer.
93+
bool Flush() override { return fflush(f_) == 0; }
94+
95+
// Returns file size.
96+
size_t Size() override {
97+
struct stat f_stat;
98+
stat(name_.c_str(), &f_stat);
99+
return f_stat.st_size;
67100
}
68-
delete this;
69-
return status;
70-
}
71101

72-
size_t File::Read(void* buf, size_t size) { return fread(buf, 1, size, f_); }
102+
bool Open() const override { return f_ != nullptr; }
73103

74-
size_t File::Write(const void* buf, size_t size) {
75-
return fwrite(buf, 1, size, f_);
76-
}
104+
private:
105+
FILE* f_;
106+
};
107+
108+
class GzFile : public File {
109+
public:
110+
GzFile(gzFile gz_file, absl::string_view name) : File(name), f_(gz_file) {}
111+
virtual ~GzFile() = default;
112+
113+
// Reads "size" bytes to buf from file, buf should be pre-allocated.
114+
size_t Read(void* buf, size_t size) override {
115+
return gzread(f_, buf, size);
116+
}
117+
118+
// Writes "size" bytes of buf to file, buf should be pre-allocated.
119+
size_t Write(const void* buf, size_t size) override {
120+
return gzwrite(f_, buf, size);
121+
}
122+
123+
// Closes the file and delete the underlying FILE* descriptor.
124+
absl::Status Close(int flags) override {
125+
absl::Status status;
126+
if (f_ == nullptr) {
127+
return status;
128+
}
129+
if (gzclose(f_) == 0) {
130+
f_ = nullptr;
131+
} else {
132+
status.Update(
133+
absl::Status(absl::StatusCode::kInvalidArgument,
134+
absl::StrCat("Could not close file '", name_, "'")));
135+
}
136+
delete this;
137+
return status;
138+
}
139+
140+
// Flushes buffer.
141+
bool Flush() override { return gzflush(f_, Z_FINISH) == Z_OK; }
142+
143+
// Returns file size.
144+
size_t Size() override {
145+
gzFile file;
146+
std::string null_terminated_name = std::string(name_);
147+
#if defined(_MSC_VER)
148+
file = gzopen (null_terminated_name.c_str(), "rb");
149+
#else
150+
file = gzopen (null_terminated_name.c_str(), "r");
151+
#endif
152+
if (! file) {
153+
LOG(FATAL) << "Cannot get the size of '" << name_
154+
<< "': " << strerror(errno);
155+
}
156+
157+
const int kLength = 5 * 1024;
158+
unsigned char buffer[kLength];
159+
size_t uncompressed_size = 0;
160+
while (1) {
161+
int err;
162+
int bytes_read;
163+
bytes_read = gzread(file, buffer, kLength - 1);
164+
uncompressed_size += bytes_read;
165+
if (bytes_read < kLength - 1) {
166+
if (gzeof(file)) {
167+
break;
168+
} else {
169+
const char* error_string;
170+
error_string = gzerror(file, &err);
171+
if (err) {
172+
LOG(FATAL) << "Error " << error_string;
173+
}
174+
}
175+
}
176+
}
177+
gzclose(file);
178+
return uncompressed_size;
179+
}
180+
181+
bool Open() const override { return f_ != nullptr; }
182+
183+
private:
184+
gzFile f_;
185+
};
186+
187+
} // namespace
188+
189+
File::File(absl::string_view name) : name_(name) {}
77190

78191
File* File::OpenOrDie(absl::string_view filename, absl::string_view mode) {
79192
File* f = File::Open(filename, mode);
@@ -84,11 +197,24 @@ File* File::OpenOrDie(absl::string_view filename, absl::string_view mode) {
84197
File* File::Open(absl::string_view filename, absl::string_view mode) {
85198
std::string null_terminated_name = std::string(filename);
86199
std::string null_terminated_mode = std::string(mode);
87-
FILE* f_des =
88-
fopen(null_terminated_name.c_str(), null_terminated_mode.c_str());
89-
if (f_des == nullptr) return nullptr;
90-
File* f = new File(f_des, filename);
91-
return f;
200+
const Format format = GetFormatFromName(filename);
201+
switch (format) {
202+
case Format::NORMAL_FILE: {
203+
FILE* c_file =
204+
fopen(null_terminated_name.c_str(), null_terminated_mode.c_str());
205+
if (c_file == nullptr) return nullptr;
206+
return new CFile(c_file, filename);
207+
}
208+
case Format::GZIP_FILE: {
209+
gzFile gz_file =
210+
gzopen(null_terminated_name.c_str(), null_terminated_mode.c_str());
211+
if (!gz_file) {
212+
return nullptr;
213+
}
214+
return new GzFile(gz_file, filename);
215+
}
216+
}
217+
return nullptr;
92218
}
93219

94220
int64_t File::ReadToString(std::string* line, uint64_t max_length) {
@@ -121,8 +247,6 @@ size_t File::WriteString(absl::string_view str) {
121247

122248
absl::string_view File::filename() const { return name_; }
123249

124-
bool File::Open() const { return f_ != nullptr; }
125-
126250
void File::Init() {}
127251

128252
namespace file {
@@ -160,29 +284,21 @@ absl::StatusOr<std::string> GetContents(absl::string_view path,
160284
absl::Status GetContents(absl::string_view filename, std::string* output,
161285
Options options) {
162286
File* file;
287+
#if defined(_MSC_VER)
288+
auto status = file::Open(filename, "rb", &file, options);
289+
#else
163290
auto status = file::Open(filename, "r", &file, options);
291+
#endif
164292
if (!status.ok()) return status;
165293

166294
const int64_t size = file->Size();
167295
if (file->ReadToString(output, size) == size) {
168296
status.Update(file->Close(options));
169297
return status;
170298
}
171-
#if defined(_MSC_VER)
172-
// On windows, binary files needs to be opened with the "rb" flags.
173-
file->Close(options);
174-
// Retry in binary mode.
175-
status = file::Open(filename, "rb", &file, options);
176-
if (!status.ok()) return status;
177-
178-
const int64_t b_size = file->Size();
179-
if (file->ReadToString(output, b_size) == b_size) {
180-
status.Update(file->Close(options));
181-
return status;
182-
}
183-
#endif // _MSC_VER
184299

185300
file->Close(options).IgnoreError(); // Even if ReadToString() fails!
301+
186302
return absl::Status(absl::StatusCode::kInvalidArgument,
187303
absl::StrCat("Could not read from '", filename, "'."));
188304
}

ortools/base/file.h

+20-21
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
#define OR_TOOLS_BASE_FILE_H_
1616

1717
#include <cstdint>
18-
#include <cstdio>
19-
#include <cstdlib>
2018
#include <string>
2119

2220
#include "absl/status/status.h"
@@ -41,40 +39,41 @@ class File {
4139
static File* OpenOrDie(absl::string_view filename, absl::string_view mode);
4240
#endif // SWIG
4341

44-
// Reads "size" bytes to buff from file, buff should be pre-allocated.
45-
size_t Read(void* buff, size_t size);
42+
File(absl::string_view name);
43+
virtual ~File() = default;
4644

47-
// Reads the whole file to a string, with a maximum length of 'max_length'.
48-
// Returns the number of bytes read.
49-
int64_t ReadToString(std::string* line, uint64_t max_length);
50-
51-
// Writes "size" bytes of buff to file, buff should be pre-allocated.
52-
size_t Write(const void* buff, size_t size);
45+
// Reads "size" bytes to buf from file, buff should be pre-allocated.
46+
virtual size_t Read(void* buf, size_t size) = 0;
5347

54-
// Writes a string to file.
55-
size_t WriteString(absl::string_view str);
48+
// Writes "size" bytes of buf to file, buff should be pre-allocated.
49+
virtual size_t Write(const void* buf, size_t size) = 0;
5650

5751
// Closes the file and delete the underlying FILE* descriptor.
58-
absl::Status Close(int flags);
52+
virtual absl::Status Close(int flags) = 0;
5953

6054
// Flushes buffer.
61-
bool Flush();
55+
virtual bool Flush() = 0;
6256

6357
// Returns file size.
64-
size_t Size();
58+
virtual size_t Size() = 0;
59+
60+
// Returns wether the file is currently open.
61+
virtual bool Open() const = 0;
62+
63+
// Reads the whole file to a string, with a maximum length of 'max_length'.
64+
// Returns the number of bytes read.
65+
int64_t ReadToString(std::string* line, uint64_t max_length);
66+
67+
// Writes a string to file.
68+
size_t WriteString(absl::string_view str);
6569

6670
// Inits internal data structures.
6771
static void Init();
6872

6973
// Returns the file name.
7074
absl::string_view filename() const;
7175

72-
bool Open() const;
73-
74-
private:
75-
File(FILE* descriptor, absl::string_view name);
76-
77-
FILE* f_;
76+
protected:
7877
std::string name_;
7978
};
8079

0 commit comments

Comments
 (0)