import { HTML_ENTITIES_LIST } from '../data/html-entities';
import { ErrorResponse } from '../message/error';

/** Unsafe inner characters that must be escaped. */
const HTML_INNER_ENTITIES: Record<string, string> = {
  '<': '&lt;',
  '&': '&amp;'
}

/** Cached validators of HTML entities. */
let HTML_ENTITIES: Record<string, string | undefined> | undefined = undefined;

/** Blacklist of HTML elements to ignore inner text. */
const HTML_IGNORE = new Set<string | undefined>(['a', 'head', 'link', 'meta', 'script', 'style', 'title']);
/** List of self-closing elements with optional ending slash. */
const HTML_VOID = new Set<string | undefined>(['area', 'base', 'br', 'col', 'doctype', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
/** List of block-level HTML elements. */
// const HTML_BLOCK = new Set<string | undefined>(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'pre', 'address', 'blockquote', 'dl', 'div', 'fieldset', 'form', 'hr', 'noscript', 'table']);

/** Get mapping of HTML entities. */
export function htmlEntities() {
  // Return cached list.
  if (HTML_ENTITIES) return HTML_ENTITIES;

  // Decompress HTML entity string into list.
  let list = HTML_ENTITIES_LIST.split(',');
  HTML_ENTITIES = {};

  // Expand entity list into map.
  for (let i = 0; i < list.length / 2; ++i) HTML_ENTITIES[list[2 * i]!] = list[2 * i + 1]!;
  HTML_ENTITIES['comma'] = ',';
  return HTML_ENTITIES;
}

/** Parser for converting HTML to plaintext. */
export class HTMLParser {
  /** Sanitize HTML to plaintext for email. */
  static plaintext(html: string): string | ErrorResponse {
    try {
      // Skip initial doctype tag.
      let plaintext: string[] = [];
      this.inner(html, 0, plaintext, false);
      return plaintext.join('');
    } catch (e: any) {
      return new ErrorResponse(`Error converting HTML to plaintext: ${e}`);
    }
  }

  /** Recursively parse inner text inside html. */
  private static inner(html: string, i: number, plaintext: string[], ignore: boolean, open?: string): number {
    let s = i;
    for (let c = html[i]; c; c = html[++i]) {
      switch (c) {
      case '&':
        // Process HTML entity.
        let [entity, escape] = this.entity(html, i);
        if (!ignore) plaintext.push(`${html.slice(s, i)}${escape}`);
        i += entity.length - 1;
        s = i + 1;
        break;
      case '<':
        // Append characters so far.
        if (!ignore) plaintext.push(html.slice(s, i));

        // Peek next character.
        switch (html[i + 1]) {
        case '/':
          // Closing tag.
          if (open === undefined) throw 'Unexpected closing tag';
          return this.close(html, i + 2, open);
        case '!':
          let tag = html.slice(i + 2, i + 2 + 'DOCTYPE'.length);
          if (tag.startsWith('--')) {
            // Comment
            i = s = this.comment(html, i + 4);
            --i;
          } else if (tag.toUpperCase() === 'DOCTYPE') {
            // !DOCTYPE
            i = s = this.open(html, i + 2, plaintext, ignore);
            --i;
          } else throw 'Unexpected character ! after open tag';
          break;
        default:
          // Open tag.
          i = s = this.open(html, i + 1, plaintext, ignore);
          --i;
        } break;
      case '>':
        throw 'Unexpected >';
      default:
        break;
      }
    }

    if (!ignore) plaintext.push(html.slice(s, i));
    return i;
  }

  /** Parse <open> tag of html. */
  private static open(html: string, i: number, plaintext: string[], ignore: boolean): number {
    let name = this.ident(html, i);
    i += name.length;

    for (let c = html[i]; c; c = html[++i]) {
      switch (c) {
      case ' ': case '\t': case '\r': case '\n':
        break;
      case '/':
        // Explicit self-terminating tag.
        if (html[i + 1] !== '>') throw 'Unexpected / without />';
        if (!HTML_VOID.has(name)) throw `Invalid self-terminating element: <${name} />`;
        return i + 2;
      case '>':
        // Implicit self-terminating HTML tag.
        if (HTML_VOID.has(name)) return i + 1;
          // Find matching close tag.
        else return this.inner(html, i + 1, plaintext, ignore || HTML_IGNORE.has(name), name);
      case '<':
        throw 'Unexpected <';
      default:
        i = this.attribute(html, i, plaintext, name);
      }
    }
    
    return i;
  }

  /** Parse </close> tag of html. */
  private static close(html: string, i: number, open: string) {
    let tag = this.ident(html, i);
    if (tag.toLowerCase() !== open.toLowerCase()) throw `Found close tag: <${tag}/> expected: <${open}/>`;
    i += tag.length;

    for (; i < html.length; ++i) {
      if (html[i] === '>') return i + 1;
    }

    throw 'Unexpected end of input in close tag';
  }

  /** Parse attribute. */
  private static attribute(html: string, i: number, plaintext: string[], open: string) {
    let tag = this.ident(html, i);
    i += tag.length;

    if (html[i] === '=') {
      let value = this.value(html, i + 2);
      if (open === 'a' && tag === 'href') plaintext.push(value);
      i += value.length + 2;
    } else --i;

    return i;
  }

  /** Parse tag or attribute name inside html. */
  private static ident(html: string, i: number): string {
    let s = i;

    for (let c = html[i]; c; c = html[++i]) {
      switch (c) {
      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm':
      case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
      case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M':
      case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
      case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-':
        break;
      default:
        return html.slice(s, i).toLowerCase();
      }
    }

    throw 'Unexpected end of input inside tag name';
  }

  /** Parse value inside attribute string. */
  private static value(html: string, i: number) {
    for (let s = i; i < html.length; ++i) {
      if (html[i] === '"') return html.slice(s, i);
    }

    throw 'Unexpected end of input inside attribute value';
  }

  /** Skip past comment tag. */
  private static comment(html: string, i: number) {
    for (; i < html.length; ++i) {
      if (html[i] === '-' && html[i + 1] === '-' && html[i + 2] === '>') return i + 3;
    }

    throw 'Unexpected end of input inside comment';
  }

  /** Parse an HTML entity at position. */
  private static entity(html: string, i: number): [entity: string, escape: string] {
    let s = i;
    ++i;

    loop:
    for (let c = html[i]; c; c = html[++i]) {
      switch (c) {
      case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm':
      case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
      case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M':
      case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':
      case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
        break;
      default:
        break loop;
      }
    }

    // Determine matched portion.
    let entity = html.slice(s + 1, i);
    let full = `&${entity}${html[i] === ';' ? ';' : ''}`;
    let code = +entity;

    // Check for entity name.
    if (isNaN(code)) {
      let escape = htmlEntities()[entity];
      if (escape === undefined) return [full, full];
      return [full, escape];
    } else {
      return [full, String.fromCharCode(code)];
    }
  }
}

/** Perform some basic uglifying on HTML. */
export function htmlUglify(text: string) {
  return text.replace(/>[ \t\r\n]*</g, '><');
}

/** Sanitize HTML for insertion into inner text. */
export function htmlSanitizeInner(text: string) {
  return text.replace(/[<&]/g, match => HTML_INNER_ENTITIES[match]!);
}

/** Sanitize HTML to be added to href. */
export function htmlSanitizeLink(text: string) {
  return /^https?:\/\//g.test(text) ? encodeURI(text) : 'about:blank';
}