/**
* Module to manage the ndarray entries for sqlite.
*
* ![nqminds-blue-logo.png][1] ![interlinq-logo-darker.png][2]
*
* [1]: ./img/nqminds-blue-logo.png
* [2]: ./img/interlinq-logo-darker.png
*
* @module sqlite-ndarray
* @author Alexandru Mereacre <mereacre@gmail.com>
*/
module.exports = (function() {
"use strict";
const _ = require("lodash");
const fs = require("fs");
const os = require("os");
const path = require("path");
const shortid = require("shortid");
const sqliteConstants = require("./sqlite-constants.js");
const ndamodule = {};
/**
* @global
* @typedef {object} NdarrayData
* @property {Buffer|{object: any}} data - The ndarray data Buffer or Stream.
* @property {string} dtype - The ndarray data type is of type `NDARRAY_DTYPES`.
* @property {number[]} shape - The ndarray shape.
* @property {boolean} major - The ndarray major (`true` - row-major, `false` - column-major).
* @property {string} ftype - The ndarray file type is of type `NDARRAY_FTYPES`.
*/
/**
* Returns the numpy endianness of the system.
* @returns {string} < - little endian, > - bing endian.
*/
function getEndianness() {
return (os.endianness() === "BE") ? ">" : "<";
}
/**
* Returns the buffer size in bytes from a numpy type.
* @function
* @alias module:sqlite-ndarray.getTypedBufferSize
* @sync
* @param {string} numpyType - The numpy array type.
* @param {array} numpySize - The numpy array size.
* @returns {number} - The buffer size.
*/
ndamodule.getTypedBufferSize = function(numpyType, numpySize) {
const size = numpySize.reduce((a, b) => (a * b));
const arrayType = numpyType.slice(1);
switch (arrayType) {
case "B":
return size;
case "H":
return 2 * size;
case "u32":
return 4 * size;
case "b":
return size;
case "h":
return 2 * size;
case "i4":
return 4 * size;
case "f4":
return 4 * size;
case "f8":
return 8 * size;
}
return size;
};
/**
* Returns the Javascript typed array from a buffer of a given type.
* @function
* @alias module:sqlite-ndarray.getTypedArrayFromBuffer
* @sync
* @param {Buffer} buffer - The data buffer.
* @param {string} dtype - The data type is of type `NDARRAY_DTYPES`.
* @returns {object} - The typed array.
*/
ndamodule.getTypedArrayFromBuffer = function(buffer, dtype) {
switch (dtype) {
case sqliteConstants.NDARRAY_DTYPES.uint8:
return new Uint8Array(buffer);
case sqliteConstants.NDARRAY_DTYPES.uint16:
return new Uint16Array(buffer);
case sqliteConstants.NDARRAY_DTYPES.uint32:
return new Uint32Array(buffer);
case sqliteConstants.NDARRAY_DTYPES.int8:
return new Int8Array(buffer);
case sqliteConstants.NDARRAY_DTYPES.int16:
return new Int16Array(buffer);
case sqliteConstants.NDARRAY_DTYPES.int32:
return new Int32Array(buffer);
case sqliteConstants.NDARRAY_DTYPES.float:
case sqliteConstants.NDARRAY_DTYPES.float32:
return new Float32Array(buffer);
case sqliteConstants.NDARRAY_DTYPES.double:
case sqliteConstants.NDARRAY_DTYPES.float64:
return new Float64Array(buffer);
}
return new Uint8Array(buffer);
};
/**
* Returns the ndarray metadata.
* @function
* @alias module:sqlite-ndarray.getNdarrayMeta
* @sync
* @param {Buffer|{object: any}} data - The input data buffer or data stream.
* @param {string} [dtype] - The data type is of type `NDARRAY_DTYPES`.
* @param {array} [shape] - The shape of the data.
* @param {boolean} [major] - The data major (true - row-major, false - column-major).
* @param {string} [ftype] - The ndarray file type is of type `NDARRAY_FTYPES`.
* @returns {object} - The ndarray metadata.
*/
ndamodule.getNdarrayMeta = function(data, dtype, shape, major, ftype) {
const meta = {"data": data};
// Default platform alignment byte order
// ">" (big-endian), "<" (little-endian), "=" (hardware-native)
let numpyType = getEndianness();
if (data instanceof Buffer) {
dtype = dtype || sqliteConstants.NDARRAY_DTYPES.buffer;
shape = shape || [data.length];
} else {
dtype = dtype || sqliteConstants.NDARRAY_DTYPES.stream;
shape = shape || [0];
}
// Default is row-major
if (typeof major === "undefined")
major = major || true;
// Default is raw file type
ftype = ftype || sqliteConstants.NDARRAY_FTYPES.raw;
switch (dtype) {
case sqliteConstants.NDARRAY_DTYPES.uint8:
numpyType += "B";
break;
case sqliteConstants.NDARRAY_DTYPES.uint16:
numpyType += "H";
break;
case sqliteConstants.NDARRAY_DTYPES.uint32:
numpyType += "u32";
break;
case sqliteConstants.NDARRAY_DTYPES.int8:
numpyType += "b";
break;
case sqliteConstants.NDARRAY_DTYPES.int16:
numpyType += "h";
break;
case sqliteConstants.NDARRAY_DTYPES.int32:
numpyType += "i4";
break;
case sqliteConstants.NDARRAY_DTYPES.float:
case sqliteConstants.NDARRAY_DTYPES.float32:
numpyType += "f4";
break;
case sqliteConstants.NDARRAY_DTYPES.double:
case sqliteConstants.NDARRAY_DTYPES.float64:
numpyType += "f8";
break;
case sqliteConstants.NDARRAY_DTYPES.stream:
case sqliteConstants.NDARRAY_DTYPES.buffer:
numpyType += "B";
break;
default:
numpyType += "B";
}
// Retrieve the timestamp buffer in milliseoncds
const timestampArray = new Date().getTime().toString().split("").map((value) => (parseInt(value)));
// Make the filename (check the Python construct)
const fileName = Buffer.from(timestampArray).toString("base64") + shortid.generate() +
sqliteConstants.DATABASE_DATA_SUFFIX;
meta.t = numpyType;
meta.s = shape;
switch (ftype) {
case sqliteConstants.NDARRAY_FTYPES.raw:
case sqliteConstants.NDARRAY_DTYPES.b64:
case sqliteConstants.NDARRAY_DTYPES.zip:
meta.v = "f";
break;
default:
meta.v = "f";
}
meta.c = major;
meta.p = fileName;
return meta;
};
/**
* Returns the ndarray object.
* @function
* @alias module:sqlite-ndarray.getNdarrayObject
* @sync
* @param {Buffer|object} data - The input data buffer or data stream.
* @param {object} [meta] - The ndarray metadata.
* @returns {NdarrayData} - The ndarray object with the structure.
* {data: Buffer|Stream, dtype: string, shape: array, major: boolean, ftype: string}.
*/
ndamodule.getNdarrayData = function(data, meta) {
const ndData = {};
ndData.data = data;
// ftype
const numpyType = meta.t.substring(1);
switch (numpyType) {
case "B":
ndData.dtype = sqliteConstants.NDARRAY_DTYPES.uint8;
break;
case "H":
ndData.dtype = sqliteConstants.NDARRAY_DTYPES.uint16;
break;
case "u32":
ndData.dtype = sqliteConstants.NDARRAY_DTYPES.uint32;
break;
case "b":
ndData.dtype = sqliteConstants.NDARRAY_DTYPES.int8;
break;
case "h":
ndData.dtype = sqliteConstants.NDARRAY_DTYPES.int16;
break;
case "i4":
ndData.dtype = sqliteConstants.NDARRAY_DTYPES.int32;
break;
case "f4":
ndData.dtype = sqliteConstants.NDARRAY_DTYPES.float32;
break;
case "f8":
ndData.dtype = sqliteConstants.NDARRAY_DTYPES.float64;
break;
default:
ndData.dtype = sqliteConstants.NDARRAY_DTYPES.uint8;
}
ndData.shape = meta.s;
ndData.major = meta.c;
switch (meta.v) {
case "f":
ndData.ftype = sqliteConstants.NDARRAY_FTYPES.raw;
break;
default:
ndData.ftype = sqliteConstants.NDARRAY_FTYPES.raw;
}
return ndData;
};
/**
* Write ndarray documents to files.
* @function
* @alias module:sqlite-ndarray.writeNdarrayMany
* @async
* @param {object} db - The sqlite3 db object from module node-sqlite3.
* @param {object[]} data - A list of all the data rows to write to file.
* @param {string|string[]} key - The key(s) representing the ndarray(s).
* @returns {Promise<object[]>} - The modified data list with ndarray metadata.
*/
ndamodule.writeNdarrayMany = function(db, data, key) {
return new Promise((resolve, reject) => {
const retData = [];
const dataKeys = [].concat(key);
// Iterate over documents
for (const dataRow of data) {
const newRow = _.omit(dataRow, dataKeys);
// Iterate over ndarray keys
for (const keyValue of dataKeys) {
const meta = dataRow[keyValue];
newRow[keyValue] = _.omit(meta, "data");
// Save data to file
const filePath = path.join(db.dataFolder, meta.p);
try {
let bytesWritten;
// Write to file using the sync method
if (meta.data instanceof Buffer) {
const fd = fs.openSync(filePath, "wx");
bytesWritten = fs.writeSync(fd, meta.data, 0, meta.data.length);
fs.closeSync(fd);
}
// Check if what's written is consistent with metadata
if (bytesWritten !== meta.data.length)
reject(Error(`Metadata size different to file size for ${JSON.stringify(meta)} while write`));
} catch (error) {
reject(error);
}
}
// Save to the new list
retData.push(newRow);
}
resolve(retData);
});
};
/**
* Read ndarray documents from files.
* @function
* @alias module:sqlite-ndarray.readNdarrayMany
* @async
* @param {object} db - The sqlite3 db object from module node-sqlite3.
* @param {object[]} data - A list of all the data rows to read from files.
* @param {string|string[]} key - The key(s) representing the ndarray(s).
* @param {boolean} [type] - The return type (true - Buffer, false - Stream), default is true.
* @returns {Promise<object[]>} - The modified data list with ndarray objects.
*/
ndamodule.readNdarrayMany = function(db, data, key, type) {
return new Promise((resolve, reject) => {
const retData = [];
const dataKeys = [].concat(key);
const alignment = getEndianness();
// Default is Buffer return type
if (typeof type === "undefined")
type = type || true;
// Iterate over documents
for (const dataRow of data) {
const newRow = _.omit(dataRow, dataKeys);
// Iterate over ndarray keys
for (const keyValue of dataKeys) {
const meta = dataRow[keyValue] || {};
// Stop the loop if the entry is not defined
if (_.isEmpty(meta)) break;
// If not defined assume it is a buffer with native alignment
meta.t = meta.t || `${alignment}B`;
if (meta.t[0] !== alignment)
throw Error("Non native byte alignment!");
meta.s = meta.s || [0];
if (meta.v !== sqliteConstants.DATABASE_PATH_TYPE_FILE)
reject(Error("Non file paths are not supported yet!"));
// Prepare the file buffers
const bufferSize = ndamodule.getTypedBufferSize(meta.t, meta.s);
let dataBuffer;
try {
// May run out of memory
dataBuffer = Buffer.alloc(bufferSize);
let filePath = "";
// Check if the path is absolute, otherwise append the db path
if (path.isAbsolute(meta.p))
filePath = meta.p;
else filePath = path.join(db.dataFolder, meta.p);
// Check the size of the file
const dataFileStats = fs.statSync(filePath);
// Open the file for reading only, throw an error if it doesn't exist
const fd = fs.openSync(filePath, "r");
const bytesRead = fs.readSync(fd, dataBuffer, 0, dataBuffer.length, 0);
fs.closeSync(fd);
// Check if what's read is consistent with metadata
if (bytesRead !== dataFileStats.size)
reject(Error(`Metadata size different to file size for ${JSON.stringify(meta)} while read`));
} catch (error) {
reject(error);
}
// Assign the new ndarray object
newRow[keyValue] = ndamodule.getNdarrayData(dataBuffer, meta);
}
// Save to the new list
retData.push(newRow);
}
resolve(retData);
});
};
return ndamodule;
}());