Skip to content

Commit fa1865b

Browse files
authored
Ensure Buffer objects are returned by compression functions (#88)
Problem ======= compression functions' return values are assumed to be Buffer objects. However, in reality they could be non-Buffer objects and will results in exceptions during reading/writing. #72 Solution ======== Detect the data type of the value from various compression libraries and convert the value to Buffer object if necessary before returning. Change summary: --------------- Ensure Buffer objects are returned by compression functions Steps to Verify: ---------------- As pointed out in the original issue, the problem can be reproduced using the browser build of parquetjs to read a file with snappy compression.
1 parent 17cb5ed commit fa1865b

File tree

8 files changed

+101
-16
lines changed

8 files changed

+101
-16
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ npm-debug.log
66
dist
77
!test/test-files/*.parquet
88
examples/server/package-lock.json
9+
test/browser/*.js

esbuild.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,21 @@ const baseConfig = {
1616
plugins: [compressionBrowserPlugin, wasmPlugin],
1717
target: "es2020" // default
1818
};
19+
// configuration for generating test code in browser
20+
const testConfig = {
21+
bundle: true,
22+
entryPoints: ['test/browser/main.ts'],
23+
define: {
24+
"process.env.NODE_DEBUG": false,
25+
"process.env.NODE_ENV": "\"production\"",
26+
global: "window"
27+
},
28+
inject: ['./esbuild-shims.js'],
29+
minify: false,
30+
platform: 'browser', // default
31+
plugins: [compressionBrowserPlugin, wasmPlugin],
32+
target: "es2020" // default
33+
}
1934
const targets = [
2035
{
2136
...baseConfig,
@@ -31,6 +46,11 @@ const targets = [
3146
...baseConfig,
3247
format: "cjs",
3348
outfile: path.resolve(__dirname, "dist","browser","parquet.cjs.js"),
49+
},
50+
// Browser test code below
51+
{
52+
...testConfig,
53+
outfile: path.resolve(__dirname, "test","browser","main.js"),
3454
}
3555
]
3656
Promise.all(targets.map(esbuild.build))

lib/browser/compression.js

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,16 @@ async function deflate(method, value) {
2929
}
3030

3131
function deflate_identity(value) {
32-
return value;
32+
return buffer_from_result(value);
3333
}
3434

3535
function deflate_gzip(value) {
3636
return zlib.gzipSync(value);
3737
}
3838

3939
function deflate_snappy(value) {
40-
return snappy.compress(value);
40+
const compressedValue = snappy.compress(value);
41+
return buffer_from_result(compressedValue);
4142
}
4243

4344
/**
@@ -52,15 +53,24 @@ async function inflate(method, value) {
5253
}
5354

5455
function inflate_identity(value) {
55-
return value;
56+
return buffer_from_result(value);
5657
}
5758

5859
function inflate_gzip(value) {
5960
return zlib.gunzipSync(value);
6061
}
6162

6263
function inflate_snappy(value) {
63-
return snappy.uncompress(value);
64+
const uncompressedValue = snappy.uncompress(value);
65+
return buffer_from_result(uncompressedValue);
66+
}
67+
68+
function buffer_from_result(result) {
69+
if (Buffer.isBuffer(result)) {
70+
return result;
71+
} else {
72+
return Buffer.from(result);
73+
}
6474
}
6575

6676
exports.PARQUET_COMPRESSION_METHODS = PARQUET_COMPRESSION_METHODS

lib/compression.ts

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ type d_brotli = (value: Uint8Array ) => Promise<Buffer>
99

1010
interface PARQUET_COMPRESSION_METHODS {
1111
[key:string]: {
12-
deflate: Function
13-
inflate: Function
12+
deflate: (value: any) => Buffer | Promise<Buffer>
13+
inflate: (value: any) => Buffer | Promise<Buffer>
1414
}
1515
}
1616
// LZO compression is disabled. See: https://github.com/LibertyDSNP/parquetjs/issues/18
@@ -36,7 +36,7 @@ export const PARQUET_COMPRESSION_METHODS: PARQUET_COMPRESSION_METHODS = {
3636
/**
3737
* Deflate a value using compression method `method`
3838
*/
39-
export async function deflate(method: string, value: unknown) {
39+
export async function deflate(method: string, value: unknown): Promise<Buffer> {
4040
if (!(method in PARQUET_COMPRESSION_METHODS)) {
4141
throw 'invalid compression method: ' + method;
4242
}
@@ -45,15 +45,16 @@ export async function deflate(method: string, value: unknown) {
4545
}
4646

4747
function deflate_identity(value: ArrayBuffer | Buffer | Uint8Array) {
48-
return value;
48+
return buffer_from_result(value);
4949
}
5050

5151
function deflate_gzip(value: ArrayBuffer | Buffer | string) {
5252
return zlib.gzipSync(value);
5353
}
5454

5555
function deflate_snappy(value: ArrayBuffer | Buffer | Uint8Array) {
56-
return snappy.compress(value);
56+
const compressedValue = snappy.compress(value);
57+
return buffer_from_result(compressedValue);
5758
}
5859

5960
async function deflate_brotli(value: Uint8Array) {
@@ -70,29 +71,36 @@ async function deflate_brotli(value: Uint8Array) {
7071
/**
7172
* Inflate a value using compression method `method`
7273
*/
73-
export async function inflate(method: string, value: unknown) {
74+
export async function inflate(method: string, value: unknown): Promise<Buffer> {
7475
if (!(method in PARQUET_COMPRESSION_METHODS)) {
7576
throw 'invalid compression method: ' + method;
7677
}
7778

7879
return await PARQUET_COMPRESSION_METHODS[method].inflate(value);
7980
}
8081

81-
function inflate_identity(value: ArrayBuffer | Buffer | Uint8Array) {
82-
return value;
82+
async function inflate_identity(value: ArrayBuffer | Buffer | Uint8Array): Promise<Buffer> {
83+
return buffer_from_result(value);
8384
}
8485

85-
function inflate_gzip(value: Buffer | ArrayBuffer | string) {
86+
async function inflate_gzip(value: Buffer | ArrayBuffer | string) {
8687
return zlib.gunzipSync(value);
8788
}
8889

8990
function inflate_snappy(value: ArrayBuffer | Buffer | Uint8Array) {
90-
return snappy.uncompress(value);
91+
const uncompressedValue = snappy.uncompress(value);
92+
return buffer_from_result(uncompressedValue);
9193
}
9294

9395
async function inflate_brotli(value: Uint8Array) {
9496
const uncompressedContent = await brotliDecompress(value)
9597
return Buffer.from(uncompressedContent);
9698
}
9799

98-
100+
function buffer_from_result(result: ArrayBuffer | Buffer | Uint8Array): Buffer {
101+
if (Buffer.isBuffer(result)) {
102+
return result;
103+
} else {
104+
return Buffer.from(result);
105+
}
106+
}

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
"build:browser": "node esbuild.js",
6363
"type": "tsc --noEmit",
6464
"lint": "echo 'Linting, it is on the TODO list...'",
65-
"test": "mocha -r ts-node/register 'test/**/*.{js,ts}'",
65+
"test": "mocha -r ts-node/register 'test/{,!(browser)/**}/*.{js,ts}'",
6666
"test:only": "mocha -r ts-node/register",
6767
"clean": "rm -Rf ./dist",
6868
"prepublishOnly": "npm run clean && npm run build:node && npm run build:types && npm run build:browser",

test/browser/index.html

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<title>Mocha Tests</title>
5+
<link rel="stylesheet" href="../../node_modules/mocha/mocha.css">
6+
</head>
7+
8+
<body>
9+
<div id="mocha"></div>
10+
<script src="../../node_modules/mocha/mocha.js"></script>
11+
<script src="../../node_modules/chai/chai.js"></script>
12+
<script>mocha.setup('bdd')</script>
13+
14+
<!-- load code you want to test here -->
15+
<script src="./main.js"></script>
16+
<!-- load your test files here -->
17+
18+
<script>
19+
mocha.run();
20+
</script>
21+
</body>
22+
</html>

test/browser/main.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import * as parquetjs from "../../dist/browser/parquet.esm";
2+
import { assert } from "chai";
3+
4+
const buffer = require("buffer");
5+
6+
describe("Browser tests", () => {
7+
describe("reader", () => {
8+
it("can read snappy compressed data", async () => {
9+
// Data from test/test-files/snappy-compressed.parquet
10+
const uint8Array = [80, 65, 82, 49, 21, 6, 21, 80, 21, 82, 92, 21, 8, 21, 0, 21, 8, 21, 0, 21, 0, 21, 0, 17, 28, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 22, 0, 22, 8, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 0, 0, 0, 40, 32, 5, 0, 0, 0, 104, 101, 108, 108, 111, 1, 9, 104, 119, 111, 114, 108, 100, 6, 0, 0, 0, 98, 97, 110, 97, 110, 97, 8, 0, 0, 0, 49, 112, 111, 97, 52, 98, 112, 102, 21, 12, 25, 37, 6, 0, 25, 24, 16, 99, 111, 109, 112, 114, 101, 115, 115, 101, 100, 83, 116, 114, 105, 110, 103, 21, 2, 22, 8, 22, 206, 1, 22, 206, 1, 38, 8, 60, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 22, 0, 22, 8, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 0, 0, 41, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 25, 24, 5, 119, 111, 114, 108, 100, 0, 25, 28, 22, 8, 21, 206, 1, 22, 0, 0, 0, 21, 2, 25, 44, 72, 4, 114, 111, 111, 116, 21, 2, 0, 21, 12, 37, 0, 24, 16, 99, 111, 109, 112, 114, 101, 115, 115, 101, 100, 83, 116, 114, 105, 110, 103, 37, 0, 0, 22, 8, 25, 28, 25, 28, 38, 214, 1, 28, 21, 12, 25, 37, 6, 0, 25, 24, 16, 99, 111, 109, 112, 114, 101, 115, 115, 101, 100, 83, 116, 114, 105, 110, 103, 21, 2, 22, 8, 22, 206, 1, 22, 206, 1, 38, 8, 60, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 22, 0, 22, 8, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 0, 0, 22, 154, 3, 21, 22, 22, 242, 2, 21, 40, 0, 22, 234, 2, 22, 8, 0, 25, 12, 24, 15, 64, 100, 115, 110, 112, 47, 112, 97, 114, 113, 117, 101, 116, 106, 115, 0, 163, 0, 0, 0, 80, 65, 82, 49];
11+
const snappyCompressedBuffer = buffer.Buffer.from(uint8Array);
12+
const reader = await parquetjs.ParquetReader.openBuffer(snappyCompressedBuffer);
13+
const data: any[] = [];
14+
for await (const record of reader) {
15+
data.push(record);
16+
}
17+
assert.equal(data.length, 4);
18+
19+
after(async () => {
20+
await reader.close();
21+
})
22+
});
23+
});
24+
});
387 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)