import DiffMatchPatch from 'diff-match-patch';
import _ from 'lodash';

const WORD_BOUNDARY_PATTERN = /\W/;

function indexOfWordBoundary(target: string, startIndex: number) {
  const n = target.length;
  for (let i = startIndex; i < n; i += 1) {
    if (WORD_BOUNDARY_PATTERN.test(target[i])) {
      return i;
    }
  }
  return -1;
}

function tokenize(text: string, callback: Function) {
  let wordStart = 0;
  let wordEnd = -1;
  while (wordEnd < text.length - 1) {
    wordEnd = indexOfWordBoundary(text, wordStart);
    if (wordEnd !== -1) {
      if (wordStart !== wordEnd) {
        const word = text.substring(wordStart, wordEnd);
        callback(word);
      }
      const punct = text[wordEnd];
      callback(punct);
      wordStart = wordEnd + 1;
    } else {
      const word = text.substring(wordStart, text.length);
      callback(word);
      wordEnd = text.length;
      break;
    }
  }
}

function diffWordsToChars(text1: string, text2: string) {
  const wordArray = [];
  const wordHash: Record<string, any> = {};

  wordArray[0] = '';

  const diffLinesToWordsMunge = (text: string) => {
    let chars = '';
    let wordArrayLength = wordArray.length;
    tokenize(text, (word: string) => {
      // eslint-disable-next-line no-prototype-builtins
      if (wordHash.hasOwnProperty ? wordHash.hasOwnProperty(word) : wordHash[word] !== undefined) {
        chars += String.fromCharCode(wordHash[word]);
      } else {
        chars += String.fromCharCode(wordArrayLength);
        wordHash[word] = wordArrayLength;
        // eslint-disable-next-line no-plusplus
        wordArray[wordArrayLength++] = word;
      }
    });
    return chars;
  };

  const chars1 = diffLinesToWordsMunge(text1);
  const chars2 = diffLinesToWordsMunge(text2);
  return { chars1, chars2, lineArray: wordArray };
}

function diffWordMode(differ: DiffMatchPatch, text1: string, text2: string) {
  const { chars1, chars2, lineArray } = diffWordsToChars(text1, text2);
  const diffs = differ.diff_main(chars1, chars2, false);
  // eslint-disable-next-line no-underscore-dangle
  differ.diff_charsToLines_(diffs, lineArray);
  return diffs;
}

export function getDiffs(text1: string, text2: string, improveReadability: boolean) {
  const differ = new DiffMatchPatch();

  if (improveReadability) {
    const diffs = diffWordMode(differ, text1, text2);

    differ.diff_cleanupSemantic(diffs);
    return diffs;
  }
  return differ.diff_main(text1, text2);
}

export function getLowestCharacterTrigramCounts(text: string, trigrams: Array<any>) {
  const highlightChars = new Array(text.length);
  trigrams.forEach(tri => {
    const re = new RegExp(tri.words.split('\n').map(_.escapeRegExp).join('[^a-z0-9$]+'), 'ig');

    text.replace(re, (match, index) => {
      for (let i = index; i < index + match.length; i += 1) {
        highlightChars[i] = Math.min(highlightChars[i] || tri.count, tri.count);
      }
      return match;
    });
  });

  return highlightChars;
}
