const dayjs = require('dayjs');
const { has, isNil, map, pick } = require('lodash');

/**
 * Appends a new paragraph item, with text and locations placeholder to mlValue.
 * @param  {object} options
 * @param  {Array<object>} options.mlValue - object with mlValue items to append to
 * @param  {string} options.appendedText - new text
 * @return {Array<object>} mlValue with appended new item
 */
function appendParagraph({ mlValue, appendedText }) {
  return mlValue.concat({
    text_normalized: appendedText,
    locations: [],
  });
}

/**
 * check if any part of contract_date is extracted
 * @param  {object}  options.mlJsonData
 * @return {boolean}
 */
function isDateEmpty({ mlJsonData }) {
  const { year, month, day } = getDateFragments({ mlJsonData });
  return !year && !month && !day;
}

/**
 * Format the number using the provided options.
 *
 * @param {number} value - The value to format.
 * @param {object} options - The options to use when formatting.
 * @param {number} [options.minDigits] - The minimum number of digits the formatted number should have.
 *
 * @return {string} - The formatted number.
 */
function formatNumber(value, { minDigits } = {}) {
  if (value !== null && value > 0) {
    return (value).toLocaleString('en-US', { minimumIntegerDigits: minDigits, useGrouping: false });
  }
}

/**
 * get object with year, month and day from contract_date
 * @param  {object} options.mlJsonData
 * @return {object} hash with year, month and day represented as integer values
 */
function getDateFragments({ mlJsonData }) {
  let year = null;
  let month = null;
  let day = null;

  const dateNormalized = getParagraphs({ mlJsonData, mlKey: 'contract_date' })[0];

  if (dateNormalized) {
    [year, month, day] = dateNormalized.split('-').map((number) => parseInt(number, 10));
  }

  return { year, month: formatNumber(month, { minDigits: 2 }), day: formatNumber(day, { minDigits: 2 }) };
}

function normalizeDateFragments({ year, month, day }) {
  return `${year}-${month}-${day}`;
}

/**
 * Get the delimiter used to separate values in a formatted date.
 *
 * @param {string} dateFormat - The date format to get the delimiter from.
 *
 * @return {string} - The delimiting character.
 */
function getDateFormatDelimiter(dateFormat) {
  return dateFormat[dateFormat.search(/[^A-Z]/i)];
}

/**
 * getDateFragments with substitution of missing parts by the alt value
 *
 * @param  {object} options - The options to get the alt date value with.
 * @param  {object} options.mlJsonData - The ml data used to get the date fragments.
 * @param  {string} options.dateFormat - The preferred date format.
 *
 * @return {string} - The formatted date.
 */
function getAltDate({ mlJsonData, dateFormat = 'MM/DD/YYYY' }) {
  const { year, month, day } = getDateFragments({ mlJsonData });
  const delimiter = getDateFormatDelimiter(dateFormat);
  const alt = '__';
  const dateParts = {
    DD: day || alt,
    MM: month || alt,
    MMM: !isNil(month) ? dayjs().month(Number(month) - 1).format('MMM') : alt,
    YYYY: year || alt,
  };

  return dateFormat.split(delimiter).map((part) => dateParts[part]).join(delimiter);
}

/**
 * Gets texts of segments pointed at by provided locations.
 * This is useful for grabbing extracted text when you have locations information from ml_json
 * and segments data from the extracted document doc_json.
 *
 * @param  {Array<object>} locations - array of locations in a format we store it in the ml_json
 * @param  {Array<object>} segments - segments of the extracted document
 * @param  {object} options
 * @param  {boolean} options.translated - if the translated text should be used
 * @param  {boolean} options.pageNumbers - if the segment page number should be returned
 *
 * @return {string[]|[{ text: string, page: number }]} - array of texts
 */
const getLocationsTexts = (locations, segments, { translated = false, pageNumbers = false } = {}) => (locations || [])
  .map((location) => {
    const segment = segments[location.segment_index];
    const text = (translated && has(segment, 'translated_text')) ? segment.translated_text : segment.text;

    return pageNumbers
      ? { text, page: segment.page_number, stated_page: location.stated_page }
      : text;
  });

/**
 * Gets the paragraphs that make up the extracted text for a clause.
 *
 * @param {object} options
 * @param {object} [options.segments] - Extracted document text.
 * @param {object} options.mlJsonData - ML data indicating the content and locations of identified clauses.
 * @param {string} options.mlKey - The key to get the text for.
 * @param {boolean} [options.useTranslatedText] - If translated_text should be returned.
 *
 * @return {Array<string>}
 */
function getParagraphs({ segments, mlJsonData, mlKey, useTranslatedText = false }) {
  if (!mlJsonData || mlJsonData[mlKey] === undefined) {
    return [];
  }

  return mlJsonData[mlKey].map((mlParagraph) => getParagraphText({ segments, mlParagraph, useTranslatedText }).text);
}

function getParagraphsWithAdditional({ segments, mlJsonData, mlKey, additionalProperties = ['locations'] }) {
  if (!mlJsonData || mlJsonData[mlKey] === undefined) {
    return [];
  }

  return mlJsonData[mlKey].map((mlParagraph) => getParagraphText({ segments, mlParagraph, additionalProperties }));
}

/**
 * Gets the paragraph text
 *
 * @param {object} options
 * @param {Array<object>} options.segments - Extracted document text.
 * @param {object} options.mlParagraph - ML data item from the mlValue array,
 *                  this object holds information about locations of text in the extracted document
 *                  and/or information about user edited text for this paragraph
 * @param {boolean} [options.useTranslatedText] - If translated_text should be returned.
 * @param {boolean} [options.pageNumbers] - If page numbers should be returned.
 * @param {boolean} [options.withLocations] - If locations data should be returned.
 * @param {string[]} [options.additionalProperties] - Additional properties to include on the paragraph.
 *
 * @return {{text: string, page?: string, locations?: object}}
 */
function getParagraphText({
  segments,
  mlParagraph,
  useTranslatedText = false,
  pageNumbers = false,
  additionalProperties,
}) {
  const textNormalized = !isNil(mlParagraph.text_normalized) ? mlParagraph.text_normalized : mlParagraph.text;
  let paragraph;

  if (isNil(textNormalized)) {
    const segmentTexts = getLocationsTexts(
      mlParagraph.locations,
      segments,
      { translated: useTranslatedText, pageNumbers },
    );

    const mapJoinTexts = (key, joiner = '-') => map(segmentTexts, key).join(joiner);

    paragraph = pageNumbers
      ? { text: mapJoinTexts('text', ' '), page: mapJoinTexts('page'), stated_page: mapJoinTexts('stated_page') }
      : { text: segmentTexts.join(' ') };
  }
  else {
    const getPageString = (pageKey) => ((pageNumbers && mlParagraph.locations?.length)
      ? map(mlParagraph.locations, pageKey).join('-')
      : null);

    paragraph = { text: textNormalized, page: getPageString('page'), stated_page: getPageString('stated_page') };
  }

  if (additionalProperties?.length) {
    Object.assign(paragraph, pick(mlParagraph, additionalProperties));
  }

  return paragraph;
}

const capitalize = (text) => `${text.charAt(0).toUpperCase()}${text.substring(1)}`;

function getDocumentType({ mlJsonData }) {
  const documentType = getParagraphs({ mlKey: 'document_type', mlJsonData })[0];
  return documentType
    // need to capitalize as it might come from text vs text_normalized,
    // simplify when we no longer need to return 'text' from `getParagraphs`
    ? capitalize(documentType)
    : '';
}

/**
 * Creates a mlValue object which only has text_normalized information
 * Note that you should not use this method to make edits to existing values
 * as it would remove all metadata information gathered when extracting text.
 * @param  {{ newValue: string | string[] }} options - single value or multiple values to store under mlKey
 * @return {string|object} clause structure
 */
function createMlValueWithoutMetadata({ newValue }) {
  const newValueArray = newValue instanceof Array ? newValue : [newValue];
  return newValueArray.map((value) => ({
    text_normalized: value,
    locations: [],
  }));
}

function createMlValue(newValue) {
  const newValueArray = newValue instanceof Array ? newValue : [newValue];
  return newValueArray.map(({ text, locations = [] }) => ({ text_normalized: text, locations }));
}

/**
 * Cleans provided ml_text for comparison.
 * @param {Array<string>} text Array of text values.
 *
 * @returns {Array<string>} Array of cleaned text values.
 */
const cleanMlText = (text) => (text.map((value) => value.replace(/[\n\r]/g, ' ').trim()));

module.exports = {
  appendParagraph,
  cleanMlText,
  // createEditedClause is deprecated, exposed so that migrations still work
  createEditedClause: createMlValueWithoutMetadata,
  createMlValue,
  createMlValueWithoutMetadata,
  getAltDate,
  getDateFormatDelimiter,
  getDateFragments,
  normalizeDateFragments,
  isDateEmpty,
  getDocumentType,
  getParagraphs,
  getParagraphsWithAdditional,
  getParagraphText,
  getLocationsTexts,
  formatNumber,
};
