import { BufferLike } from "./binary";
import { errorResponse } from "./message";
import { XmlParser } from "./xml";
import { Unzip, zipExtract } from "./zip";

/** Full path of DOCX contents. */
export const DOCX_DOCUMENT_PATH = 'word/document.xml';

/** Parse contents of DOCX into plaintext. */
export function docxPlaintext(xml: string, parser: XmlParser) {
  let document = parser.parseFromString(xml, 'text/xml');
  let paragraphs: string[][] = [];

  let nodes = document.getElementsByTagName('w:p');
  for (let element of Array.from(nodes)) {
    let paragraph: string[] = [];
    
    let nodes = element.getElementsByTagName('*');
    for (let element of Array.from(nodes)) {
      switch (element.tagName) {
      case 'w:br':
        paragraph.push('\n');
        break;
      case 'w:t':
        paragraph.push(element.textContent || ' ');
        break;
      }
    }

    paragraphs.push(paragraph);
  }

  return paragraphs.map(paragraph => paragraph.join('')).join('\n\n');
}

/** Extract plaintext from DOCX file. */
export async function docxExtract(unzip: Unzip, parser: XmlParser, data: BufferLike) {
  let xml = await zipExtract(unzip, data, DOCX_DOCUMENT_PATH);
  if (errorResponse(xml)) return xml;
  return await docxPlaintext(xml, parser);
}