Source: sqlite-ndarray.js

/**
 * Module to manage the ndarray entries for sqlite.
 *
 * ![nqminds-blue-logo.png][1] ![interlinq-logo-darker.png][2]
 *
 * [1]: ./img/nqminds-blue-logo.png
 * [2]: ./img/interlinq-logo-darker.png
 *
 * @module sqlite-ndarray
 * @author Alexandru Mereacre <mereacre@gmail.com>
 */

module.exports = (function() {
  "use strict";

  const _ = require("lodash");
  const fs = require("fs");
  const os = require("os");
  const path = require("path");
  const shortid = require("shortid");
  const sqliteConstants = require("./sqlite-constants.js");

  const ndamodule = {};

  /**
   * @global
   * @typedef  {object} NdarrayData
   * @property  {Buffer|{object: any}} data - The ndarray data Buffer or Stream.
   * @property  {string} dtype - The ndarray data type is of type `NDARRAY_DTYPES`.
   * @property  {number[]} shape - The ndarray shape.
   * @property  {boolean} major - The ndarray major (`true` - row-major, `false` - column-major).
   * @property  {string} ftype - The ndarray file type is of type `NDARRAY_FTYPES`.
   */

  /**
   * Returns the numpy endianness of the system.
   * @returns {string} < - little endian, > - bing endian.
   */
  function getEndianness() {
    return (os.endianness() === "BE") ? ">" : "<";
  }

  /**
   * Returns the buffer size in bytes from a numpy type.
   * @function
   * @alias module:sqlite-ndarray.getTypedBufferSize
   * @sync
   * @param {string} numpyType - The numpy array type.
   * @param {array} numpySize - The numpy array size.
   * @returns {number} - The buffer size.
   */
  ndamodule.getTypedBufferSize = function(numpyType, numpySize) {
    const size = numpySize.reduce((a, b) => (a * b));
    const arrayType = numpyType.slice(1);

    switch (arrayType) {
      case "B":
        return size;
      case "H":
        return 2 * size;
      case "u32":
        return 4 * size;
      case "b":
        return size;
      case "h":
        return 2 * size;
      case "i4":
        return 4 * size;
      case "f4":
        return 4 * size;
      case "f8":
        return 8 * size;
    }

    return size;
  };

  /**
   * Returns the Javascript typed array from a buffer of a given type.
   * @function
   * @alias module:sqlite-ndarray.getTypedArrayFromBuffer
   * @sync
   * @param {Buffer} buffer - The data buffer.
   * @param {string} dtype - The data type is of type `NDARRAY_DTYPES`.
   * @returns {object} - The typed array.
   */
  ndamodule.getTypedArrayFromBuffer = function(buffer, dtype) {
    switch (dtype) {
      case sqliteConstants.NDARRAY_DTYPES.uint8:
        return new Uint8Array(buffer);
      case sqliteConstants.NDARRAY_DTYPES.uint16:
        return new Uint16Array(buffer);
      case sqliteConstants.NDARRAY_DTYPES.uint32:
        return new Uint32Array(buffer);
      case sqliteConstants.NDARRAY_DTYPES.int8:
        return new Int8Array(buffer);
      case sqliteConstants.NDARRAY_DTYPES.int16:
        return new Int16Array(buffer);
      case sqliteConstants.NDARRAY_DTYPES.int32:
        return new Int32Array(buffer);
      case sqliteConstants.NDARRAY_DTYPES.float:
      case sqliteConstants.NDARRAY_DTYPES.float32:
        return new Float32Array(buffer);
      case sqliteConstants.NDARRAY_DTYPES.double:
      case sqliteConstants.NDARRAY_DTYPES.float64:
        return new Float64Array(buffer);
    }

    return new Uint8Array(buffer);
  };

  /**
   * Returns the ndarray metadata.
   * @function
   * @alias module:sqlite-ndarray.getNdarrayMeta
   * @sync
   * @param {Buffer|{object: any}} data - The input data buffer or data stream.
   * @param {string} [dtype] - The data type is of type `NDARRAY_DTYPES`.
   * @param {array} [shape] - The shape of the data.
   * @param {boolean} [major] - The data major (true - row-major, false - column-major).
   * @param {string} [ftype] - The ndarray file type is of type `NDARRAY_FTYPES`.
   * @returns {object} - The ndarray metadata.
   */
  ndamodule.getNdarrayMeta = function(data, dtype, shape, major, ftype) {
    const meta = {"data": data};

    // Default platform alignment byte order
    // ">" (big-endian), "<" (little-endian), "=" (hardware-native)
    let numpyType = getEndianness();

    if (data instanceof Buffer) {
      dtype = dtype || sqliteConstants.NDARRAY_DTYPES.buffer;
      shape = shape || [data.length];
    } else {
      dtype = dtype || sqliteConstants.NDARRAY_DTYPES.stream;
      shape = shape || [0];
    }

    // Default is row-major
    if (typeof major === "undefined")
      major = major || true;

    // Default is raw file type
    ftype = ftype || sqliteConstants.NDARRAY_FTYPES.raw;

    switch (dtype) {
      case sqliteConstants.NDARRAY_DTYPES.uint8:
        numpyType += "B";
        break;
      case sqliteConstants.NDARRAY_DTYPES.uint16:
        numpyType += "H";
        break;
      case sqliteConstants.NDARRAY_DTYPES.uint32:
        numpyType += "u32";
        break;
      case sqliteConstants.NDARRAY_DTYPES.int8:
        numpyType += "b";
        break;
      case sqliteConstants.NDARRAY_DTYPES.int16:
        numpyType += "h";
        break;
      case sqliteConstants.NDARRAY_DTYPES.int32:
        numpyType += "i4";
        break;
      case sqliteConstants.NDARRAY_DTYPES.float:
      case sqliteConstants.NDARRAY_DTYPES.float32:
        numpyType += "f4";
        break;
      case sqliteConstants.NDARRAY_DTYPES.double:
      case sqliteConstants.NDARRAY_DTYPES.float64:
        numpyType += "f8";
        break;
      case sqliteConstants.NDARRAY_DTYPES.stream:
      case sqliteConstants.NDARRAY_DTYPES.buffer:
        numpyType += "B";
        break;

      default:
        numpyType += "B";
    }

    // Retrieve the timestamp buffer in milliseoncds
    const timestampArray = new Date().getTime().toString().split("").map((value) => (parseInt(value)));

    // Make the filename (check the Python construct)
    const fileName = Buffer.from(timestampArray).toString("base64") + shortid.generate() +
                      sqliteConstants.DATABASE_DATA_SUFFIX;

    meta.t = numpyType;
    meta.s = shape;
    switch (ftype) {
      case sqliteConstants.NDARRAY_FTYPES.raw:
      case sqliteConstants.NDARRAY_DTYPES.b64:
      case sqliteConstants.NDARRAY_DTYPES.zip:
        meta.v = "f";
        break;
      default:
        meta.v = "f";
    }

    meta.c = major;
    meta.p = fileName;

    return meta;
  };

  /**
   * Returns the ndarray object.
   * @function
   * @alias module:sqlite-ndarray.getNdarrayObject
   * @sync
   * @param {Buffer|object} data - The input data buffer or data stream.
   * @param {object} [meta] - The ndarray metadata.
   * @returns {NdarrayData} - The ndarray object with the structure.
   *                      {data: Buffer|Stream, dtype: string, shape: array, major: boolean, ftype: string}.
   */
  ndamodule.getNdarrayData = function(data, meta) {
    const ndData = {};
    ndData.data = data;

    // ftype
    const numpyType = meta.t.substring(1);

    switch (numpyType) {
      case "B":
        ndData.dtype = sqliteConstants.NDARRAY_DTYPES.uint8;
        break;
      case "H":
        ndData.dtype = sqliteConstants.NDARRAY_DTYPES.uint16;
        break;
      case "u32":
        ndData.dtype = sqliteConstants.NDARRAY_DTYPES.uint32;
        break;
      case "b":
        ndData.dtype = sqliteConstants.NDARRAY_DTYPES.int8;
        break;
      case "h":
        ndData.dtype = sqliteConstants.NDARRAY_DTYPES.int16;
        break;
      case "i4":
        ndData.dtype = sqliteConstants.NDARRAY_DTYPES.int32;
        break;
      case "f4":
        ndData.dtype = sqliteConstants.NDARRAY_DTYPES.float32;
        break;
      case "f8":
        ndData.dtype = sqliteConstants.NDARRAY_DTYPES.float64;
        break;

      default:
        ndData.dtype = sqliteConstants.NDARRAY_DTYPES.uint8;
    }

    ndData.shape = meta.s;
    ndData.major = meta.c;

    switch (meta.v) {
      case "f":
        ndData.ftype = sqliteConstants.NDARRAY_FTYPES.raw;
        break;
      default:
        ndData.ftype = sqliteConstants.NDARRAY_FTYPES.raw;
    }

    return ndData;
  };

  /**
   * Write ndarray documents to files.
   * @function
   * @alias module:sqlite-ndarray.writeNdarrayMany
   * @async
   * @param {object} db - The sqlite3 db object from module node-sqlite3.
   * @param {object[]} data - A list of all the data rows to write to file.
   * @param {string|string[]} key - The key(s) representing the ndarray(s).
   * @returns {Promise<object[]>} - The modified data list with ndarray metadata.
   */
  ndamodule.writeNdarrayMany = function(db, data, key) {
    return new Promise((resolve, reject) => {
      const retData = [];
      const dataKeys = [].concat(key);

      // Iterate over documents
      for (const dataRow of data) {
        const newRow = _.omit(dataRow, dataKeys);

        // Iterate over ndarray keys
        for (const keyValue of dataKeys) {
          const meta = dataRow[keyValue];
          newRow[keyValue] = _.omit(meta, "data");

          // Save data to file
          const filePath = path.join(db.dataFolder, meta.p);
          try {
            let bytesWritten;
            // Write to file using the sync method
            if (meta.data instanceof Buffer) {
              const fd = fs.openSync(filePath, "wx");
              bytesWritten = fs.writeSync(fd, meta.data, 0, meta.data.length);
              fs.closeSync(fd);
            }

            // Check if what's written is consistent with metadata
            if (bytesWritten !== meta.data.length)
              reject(Error(`Metadata size different to file size for ${JSON.stringify(meta)} while write`));
          } catch (error) {
            reject(error);
          }
        }
        // Save to the new list
        retData.push(newRow);
      }

      resolve(retData);
    });
  };

  /**
   * Read ndarray documents from files.
   * @function
   * @alias module:sqlite-ndarray.readNdarrayMany
   * @async
   * @param {object} db - The sqlite3 db object from module node-sqlite3.
   * @param {object[]} data - A list of all the data rows to read from files.
   * @param {string|string[]} key - The key(s) representing the ndarray(s).
   * @param {boolean} [type] - The return type (true - Buffer, false - Stream), default is true.
   * @returns {Promise<object[]>} - The modified data list with ndarray objects.
   */
  ndamodule.readNdarrayMany = function(db, data, key, type) {
    return new Promise((resolve, reject) => {
      const retData = [];
      const dataKeys = [].concat(key);
      const alignment = getEndianness();

      // Default is Buffer return type
      if (typeof type === "undefined")
        type = type || true;

      // Iterate over documents
      for (const dataRow of data) {
        const newRow = _.omit(dataRow, dataKeys);

        // Iterate over ndarray keys
        for (const keyValue of dataKeys) {
          const meta = dataRow[keyValue] || {};

          // Stop the loop if the entry is not defined
          if (_.isEmpty(meta)) break;

          // If not defined assume it is a buffer with native alignment
          meta.t = meta.t || `${alignment}B`;
          if (meta.t[0] !== alignment)
            throw Error("Non native byte alignment!");

          meta.s = meta.s || [0];

          if (meta.v !== sqliteConstants.DATABASE_PATH_TYPE_FILE)
            reject(Error("Non file paths are not supported yet!"));

          // Prepare the file buffers
          const bufferSize = ndamodule.getTypedBufferSize(meta.t, meta.s);
          let dataBuffer;

          try {
            // May run out of memory
            dataBuffer = Buffer.alloc(bufferSize);

            let filePath = "";

            // Check if the path is absolute, otherwise append the db path
            if (path.isAbsolute(meta.p))
              filePath = meta.p;
            else filePath = path.join(db.dataFolder, meta.p);

            // Check the size of the file
            const dataFileStats = fs.statSync(filePath);

            // Open the file for reading only, throw an error if it doesn't exist
            const fd = fs.openSync(filePath, "r");
            const bytesRead = fs.readSync(fd, dataBuffer, 0, dataBuffer.length, 0);
            fs.closeSync(fd);

            // Check if what's read is consistent with metadata
            if (bytesRead !== dataFileStats.size)
              reject(Error(`Metadata size different to file size for ${JSON.stringify(meta)} while read`));
          } catch (error) {
            reject(error);
          }

          // Assign the new ndarray object
          newRow[keyValue] = ndamodule.getNdarrayData(dataBuffer, meta);
        }

        // Save to the new list
        retData.push(newRow);
      }

      resolve(retData);
    });
  };

  return ndamodule;
}());