Skip to content

Commit d382d0f

Browse files
authored
Types Cleanup (#67)
* Switch to import * Better optional types * Remove duplicated types * types/types -> declare for less confusion between types and types/types * Make sure thrift types pass through * Minimal WriteStream Interface * Fix an unknown type * Better openUrl Interface
1 parent c86f490 commit d382d0f

17 files changed

+110
-498
lines changed

lib/bloom/sbbf.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import parquet_thrift from "../../gen-nodejs/parquet_types";
22
import Long from 'long';
33
import XxHasher from "./xxhasher"
4-
import {Block} from "../types/types";
4+
import {Block} from "../declare";
55

66
/**
77
* @class SplitBlockBloomFilter

lib/bloomFilterIO/bloomFilterReader.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import * as parquet_util from "../util";
22
import parquet_thrift from "../../gen-nodejs/parquet_types";
33
import sbbf from "../bloom/sbbf";
44
import { ParquetEnvelopeReader } from "../reader"
5-
import { ColumnChunkData } from "../types/types";
5+
import { ColumnChunkData } from "../declare";
66

77
const filterColumnChunksWithBloomFilters = (
88
columnChunkDataCollection: Array<ColumnChunkData>

lib/bloomFilterIO/bloomFilterWriter.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import * as parquet_util from "../util";
22
import parquet_thrift from "../../gen-nodejs/parquet_types";
33
import SplitBlockBloomFilter from "../bloom/sbbf";
44

5-
import { Block } from "../types/types";
5+
import { Block } from "../declare";
66
import Int64 from 'node-int64'
77

88
export type createSBBFParams = {

lib/bufferReader.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { Statistics } from "../gen-nodejs/parquet_types"
22
import { ParquetEnvelopeReader } from "./reader"
3-
import { FileMetaDataExt } from "./types/types"
3+
import { FileMetaDataExt } from "./declare"
44

55
export interface BufferReaderOptions {
66
maxSpan?: number,

lib/codec/types.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import { PrimitiveType } from "../types/types";
2-
import { ParquetCodec, OriginalType, ParquetField } from "../types/types";
1+
import { PrimitiveType } from "../declare";
2+
import { ParquetCodec, OriginalType, ParquetField } from "../declare";
33
import { Statistics } from "../../gen-nodejs/parquet_types";
44

55
export interface Options {

lib/types/types.ts renamed to lib/declare.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
// Lifted from https://github.com/kbajalc/parquets
1+
// Thanks to https://github.com/kbajalc/parquets
22

3-
import parquet_thrift from "../../gen-nodejs/parquet_types";
4-
import { Statistics, OffsetIndex, ColumnIndex, PageType, DataPageHeader, DataPageHeaderV2, DictionaryPageHeader, IndexPageHeader, Type, ColumnMetaData } from "../../gen-nodejs/parquet_types";
5-
import SplitBlockBloomFilter from "../bloom/sbbf";
6-
import { createSBBFParams } from "../bloomFilterIO/bloomFilterWriter";
3+
import parquet_thrift from "../gen-nodejs/parquet_types";
4+
import { Statistics, OffsetIndex, ColumnIndex, PageType, DataPageHeader, DataPageHeaderV2, DictionaryPageHeader, IndexPageHeader, Type, ColumnMetaData } from "../gen-nodejs/parquet_types";
5+
import SplitBlockBloomFilter from "./bloom/sbbf";
6+
import { createSBBFParams } from "./bloomFilterIO/bloomFilterWriter";
77
import Int64 from 'node-int64'
88

99
export type ParquetCodec = 'PLAIN' | 'RLE';
@@ -27,8 +27,8 @@ export type OriginalType =
2727
| 'UTF8' // 0
2828
| 'MAP' // 1
2929
// | 'MAP_KEY_VALUE' // 2
30-
| 'LIST' // 3
31-
// | 'ENUM' // 4
30+
| 'LIST' // 3
31+
| 'ENUM' // 4
3232
// | 'DECIMAL' // 5
3333
| 'DATE' // 6
3434
| 'TIME_MILLIS' // 7
@@ -59,15 +59,15 @@ export interface FieldDefinition {
5959
optional?: boolean;
6060
repeated?: boolean;
6161
fields?: SchemaDefinition;
62-
statistics?: Statistics
62+
statistics?: Statistics | false;
6363
parent?: ParentField
6464
num_children?: NumChildrenField
6565
}
6666

6767
export interface ParquetField {
6868
name: string;
6969
path: string[];
70-
statistics?: Statistics
70+
statistics?: Statistics | false;
7171
primitiveType?: PrimitiveType;
7272
originalType?: OriginalType;
7373
repetitionType: RepetitionType;

lib/reader.ts

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import * as parquet_types from './types';
99
import BufferReader , { BufferReaderOptions } from './bufferReader';
1010
import * as bloomFilterReader from './bloomFilterIO/bloomFilterReader';
1111
import fetch from 'cross-fetch';
12-
import { ParquetCodec, Parameter,PageData, SchemaDefinition, ParquetType, FieldDefinition, ParquetField, ClientS3, ClientParameters, FileMetaDataExt, NewPageHeader, RowGroupExt, ColumnChunkExt } from './types/types';
12+
import { ParquetCodec, Parameter,PageData, SchemaDefinition, ParquetType, FieldDefinition, ParquetField, ClientS3, ClientParameters, FileMetaDataExt, NewPageHeader, RowGroupExt, ColumnChunkExt } from './declare';
1313
import { Cursor, Options } from './codec/types';
1414

1515
const {
@@ -111,12 +111,12 @@ export class ParquetReader {
111111
* Open the parquet file pointed to by the specified path and return a new
112112
* parquet reader
113113
*/
114-
static async openFile(filePath: string | Buffer | URL, options: BufferReaderOptions) {
114+
static async openFile(filePath: string | Buffer | URL, options?: BufferReaderOptions) {
115115
let envelopeReader = await ParquetEnvelopeReader.openFile(filePath, options);
116116
return this.openEnvelopeReader(envelopeReader, options);
117117
}
118118

119-
static async openBuffer(buffer: Buffer, options: BufferReaderOptions) {
119+
static async openBuffer(buffer: Buffer, options?: BufferReaderOptions) {
120120
let envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer, options);
121121
return this.openEnvelopeReader(envelopeReader, options);
122122
}
@@ -126,7 +126,7 @@ export class ParquetReader {
126126
* The params have to include `Bucket` and `Key` to the file requested
127127
* This function returns a new parquet reader
128128
*/
129-
static async openS3(client: ClientS3, params: ClientParameters, options: BufferReaderOptions) {
129+
static async openS3(client: ClientS3, params: ClientParameters, options?: BufferReaderOptions) {
130130
let envelopeReader = await ParquetEnvelopeReader.openS3(client, params, options);
131131
return this.openEnvelopeReader(envelopeReader, options);
132132
}
@@ -137,13 +137,13 @@ export class ParquetReader {
137137
* a `url` property.
138138
* This function returns a new parquet reader
139139
*/
140-
static async openUrl(params: Parameter, options: BufferReaderOptions) {
140+
static async openUrl(params: Parameter | URL | string, options?: BufferReaderOptions) {
141141
let envelopeReader = await ParquetEnvelopeReader.openUrl(params, options);
142142
return this.openEnvelopeReader(envelopeReader, options);
143143
}
144144

145-
static async openEnvelopeReader(envelopeReader: ParquetEnvelopeReader, opts: BufferReaderOptions) {
146-
if (opts && opts.metadata) {
145+
static async openEnvelopeReader(envelopeReader: ParquetEnvelopeReader, opts?: BufferReaderOptions) {
146+
if (opts?.metadata) {
147147
return new ParquetReader(opts.metadata, envelopeReader, opts);
148148
}
149149
try {
@@ -164,7 +164,7 @@ export class ParquetReader {
164164
* and internal use cases. Consider using one of the open{File,Buffer} methods
165165
* instead
166166
*/
167-
constructor(metadata: FileMetaDataExt, envelopeReader: ParquetEnvelopeReader, opts: BufferReaderOptions) {
167+
constructor(metadata: FileMetaDataExt, envelopeReader: ParquetEnvelopeReader, opts?: BufferReaderOptions) {
168168
opts = opts || {};
169169
if (metadata.version != PARQUET_VERSION) {
170170
throw 'invalid parquet version';
@@ -266,7 +266,7 @@ export class ParquetReader {
266266

267267
async getBloomFiltersFor(columnNames: string[]) {
268268
const bloomFilterData = await getBloomFiltersFor(columnNames, this.envelopeReader!);
269-
return bloomFilterData.reduce((acc: Record<string, Array<unknown>>, value) => {
269+
return bloomFilterData.reduce((acc: Record<string, typeof bloomFilterData>, value) => {
270270
if (acc[value.columnName]) acc[value.columnName].push(value)
271271
else acc[value.columnName] = [value]
272272
return acc;
@@ -384,7 +384,7 @@ export class ParquetEnvelopeReader {
384384
metadata?: FileMetaDataExt;
385385
schema?: parquet_schema.ParquetSchema
386386

387-
static async openFile(filePath: string | Buffer | URL, options: BufferReaderOptions) {
387+
static async openFile(filePath: string | Buffer | URL, options?: BufferReaderOptions) {
388388
let fileStat = await parquet_util.fstat(filePath);
389389
let fileDescriptor = await parquet_util.fopen(filePath);
390390

@@ -401,7 +401,7 @@ export class ParquetEnvelopeReader {
401401
return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size, options);
402402
}
403403

404-
static async openBuffer(buffer: Buffer, options: BufferReaderOptions) {
404+
static async openBuffer(buffer: Buffer, options?: BufferReaderOptions) {
405405
let readFn = (offset: number, length: number, file?: string) => {
406406
if (file) {
407407
return Promise.reject('external references are not supported');
@@ -414,7 +414,7 @@ export class ParquetEnvelopeReader {
414414
return new ParquetEnvelopeReader(readFn, closeFn, buffer.length, options);
415415
}
416416

417-
static async openS3(client: ClientS3, params: ClientParameters, options: BufferReaderOptions) {
417+
static async openS3(client: ClientS3, params: ClientParameters, options?: BufferReaderOptions) {
418418
let fileStat = async () => client.headObject(params).promise().then((d: {ContentLength: number}) => d.ContentLength);
419419

420420
let readFn = async (offset: number, length: number, file?: string) => {
@@ -432,9 +432,12 @@ export class ParquetEnvelopeReader {
432432
return new ParquetEnvelopeReader(readFn, closeFn, fileStat, options);
433433
}
434434

435-
static async openUrl(params: Parameter, options: BufferReaderOptions) {
436-
if (typeof params === 'string')
437-
params = {url: params};
435+
static async openUrl(url: Parameter | URL | string, options?: BufferReaderOptions) {
436+
let params: Parameter;
437+
if (typeof url === 'string') params = { url };
438+
else if(url instanceof URL) params = { url: url.toString() }
439+
else params = url;
440+
438441
if (!params.url)
439442
throw new Error('URL missing');
440443

@@ -465,7 +468,7 @@ export class ParquetEnvelopeReader {
465468
return new ParquetEnvelopeReader(readFn, closeFn, filesize, options);
466469
}
467470

468-
constructor(readFn: (offset: number, length: number, file?: string) => Promise<Buffer> , closeFn: () => unknown, fileSize: Function | number, options: BufferReaderOptions, metadata?: FileMetaDataExt) {
471+
constructor(readFn: (offset: number, length: number, file?: string) => Promise<Buffer> , closeFn: () => unknown, fileSize: Function | number, options?: BufferReaderOptions, metadata?: FileMetaDataExt) {
469472
options = options || {};
470473
this.readFn = readFn;
471474
this.id = ++ParquetEnvelopeReaderIdCounter;

lib/schema.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import * as parquet_codec from './codec';
22
import * as parquet_compression from './compression'
33
import * as parquet_types from './types'
4-
import { SchemaDefinition, ParquetField, RepetitionType } from './types/types'
4+
import { SchemaDefinition, ParquetField, RepetitionType } from './declare'
55

66
const PARQUET_COLUMN_KEY_SEPARATOR = '.';
77

@@ -106,7 +106,7 @@ function buildFields(schema: SchemaDefinition, rLevelParentMax?: number, dLevelP
106106
}
107107

108108
/* nested field */
109-
109+
110110
if (opts.fields) {
111111
fieldList[name] = {
112112
name: name,
@@ -123,7 +123,7 @@ function buildFields(schema: SchemaDefinition, rLevelParentMax?: number, dLevelP
123123
dLevelMax,
124124
path.concat(name))
125125
};
126-
126+
127127
if (opts.type == 'LIST' || opts.type == 'MAP') fieldList[name].originalType = opts.type;
128128

129129
continue;

lib/shred.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import * as parquet_types from './types'
22
import { ParquetSchema } from './schema'
3-
import { Page, PageData, ParquetField } from './types/types';
3+
import { Page, PageData, ParquetField } from './declare';
44

55
/**
66
* 'Shred' a record into a list of <value, repetition_level, definition_level>

0 commit comments

Comments
 (0)