Skip to content

Commit

Permalink
add alpha parquet data files loading with parquets lib (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
RandomFractals committed Feb 28, 2021
1 parent da83ab4 commit 5b2c17c
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 70 deletions.
105 changes: 92 additions & 13 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "vscode-data-preview",
"displayName": "Data Preview",
"description": "Data Preview 🈸 extension for importing 📤 viewing 🔎 slicing 🔪 dicing 🎲 charting 📊 & exporting 📥 large JSON array/config, YAML, Apache Arrow, Avro & Excel data files",
"version": "2.2.0",
"version": "2.3.0",
"icon": "images/data-preview.png",
"publisher": "RandomFractalsInc",
"author": "Taras Novak a.k.a. dataPixy devTools maker :)",
Expand Down Expand Up @@ -416,8 +416,9 @@
"json5": "^2.1.3",
"jsonc-parser": "2.3.1",
"json-spread": "0.3.2",
"parquets": "^0.10.10",
"properties": "^1.2.1",
"snappy": "^6.3.4",
"snappy": "^6.3.5",
"superagent": "^6.1.0",
"xlsx": "^0.16.7"
}
Expand Down
6 changes: 4 additions & 2 deletions src/data.manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ import {JsonDataProvider} from './data.providers/json.data.provider';
import {Json5DataProvider} from './data.providers/json5.data.provider';
import {JsonLineDataProvider} from './data.providers/json.line.data.provider';
import {MarkdownDataProvider} from './data.providers/markdown.data.provider';
import {TextDataProvider} from './data.providers/text.data.provider';
import {ParquetDataProvider} from './data.providers/parquet.data.provider';
import {PropertiesDataProvider} from './data.providers/properties.data.provider';
import {TextDataProvider} from './data.providers/text.data.provider';
import {YamlDataProvider} from './data.providers/yaml.data.provider';

/**
Expand Down Expand Up @@ -143,8 +144,9 @@ export class DataManager implements IDataManager {
this.addDataProvider(dataProviders, new Json5DataProvider());
this.addDataProvider(dataProviders, new JsonLineDataProvider());
this.addDataProvider(dataProviders, new MarkdownDataProvider());
this.addDataProvider(dataProviders, new TextDataProvider());
this.addDataProvider(dataProviders, new PropertiesDataProvider());
this.addDataProvider(dataProviders, new ParquetDataProvider());
this.addDataProvider(dataProviders, new TextDataProvider());
this.addDataProvider(dataProviders, new YamlDataProvider());
this._logger.debug('loadDataProviders(): loaded data providers:', Object.keys(dataProviders));
return dataProviders;
Expand Down
68 changes: 19 additions & 49 deletions src/data.preview.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import {
// fs data parsing imports
import * as fs from 'fs';
import * as path from 'path';
//import * as parquet from 'parquetjs';

// data preview imports
import * as config from './config';
Expand Down Expand Up @@ -404,7 +403,7 @@ export class DataPreview {
// update columns and rows state vars
this._columns = columns;
this._rowCount = rowCount;
let dataStats: string = `Rows: ${rowCount.toLocaleString()}\tColumns: ${columns.length.toLocaleString()}`;
let dataStats: string = `Rows: ${rowCount.toLocaleString()}\tColumns: ${columns?.length.toLocaleString()}`;
if (this._tableNames.length > 0) {
// add tables count to data preview data stats status display
dataStats = `Tables: ${this._tableNames.length.toLocaleString()}\t${dataStats}`;
Expand Down Expand Up @@ -666,30 +665,23 @@ export class DataPreview {
*/
private getData(dataUrl: string, dataTable: string = ''): any {
let data: any = [];
if (this._fileExtension === '.parquet') {
// TODO: sort out node-gyp lzo lib loading for parquet data files parse
window.showInformationMessage('Parquet Data Preview 🈸 coming soon!');
// data = this.getParquetData(dataFilePath);
}
else { // get data, table names, and data schema via data.manager api
dataManager.getData(dataUrl, {
dataTable: dataTable,
createJsonFiles: this.createJsonFiles,
createJsonSchema: this.createJsonSchema
}, (data: any) => {
this._tableNames = dataManager.getDataTableNames(dataUrl);
this._dataSchema = dataManager.getDataSchema(dataUrl);
this.loadData(data);
// log data stats
if (typeof data === 'string') {
const dataLines: Array<string> = data.split('\n');
this.logDataStats(dataLines);
}
else {
this.logDataStats(data, this._dataSchema);
}
});
}
dataManager.getData(dataUrl, {
dataTable: dataTable,
createJsonFiles: this.createJsonFiles,
createJsonSchema: this.createJsonSchema
}, (data: any) => {
this._tableNames = dataManager.getDataTableNames(dataUrl);
this._dataSchema = dataManager.getDataSchema(dataUrl);
this.loadData(data);
// log data stats
if (typeof data === 'string') {
const dataLines: Array<string> = data.split('\n');
this.logDataStats(dataLines);
}
else {
this.logDataStats(data, this._dataSchema);
}
});
return data;
} // end of getData()

Expand All @@ -716,29 +708,7 @@ export class DataPreview {
}
}

/**
* Gets binary Parquet file data.
* @param dataFilePath Parquet data file path.
* @returns Array of row objects.
*/ /*
private async getParquetData(dataFilePath: string) {
let dataSchema: any = {};
let dataRows: Array<any> = [];
const parquetReader = await parquet.ParquetReader.openFile(dataFilePath);
const cursor = parquetReader.getCursor();
// read all records
let record = null;
while (record = await cursor.next()) {
dataRows.push(record);
}
await parquetReader.close();
dataRows = dataRows.map(rowObject => this.flattenObject(rowObject));
this.logDataStats(dataRows, dataSchema);
// update web view
this.loadData(dataRows);
return dataRows;
} */


/**
* Saves posted data from data view.
* @param fileData File data to save.
Expand Down
Loading

0 comments on commit 5b2c17c

Please sign in to comment.