import JSZip from 'jszip';

// Represents text content within a table cell
export interface TableCell {
  text: string;
}

export // Represents a row in a table
  interface TableRow {
  cells: TableCell[];
}

// Represents a table structure found on a slide
export interface Table {
  rows: TableRow[];
}

// Represents a single slide with extracted information
export interface Slide {
  slideNumber: number; // Based on the file name like slide1.xml -> 1
  title?: string;     // Text content identified as the title
  textContent: string; // Concatenated text from other shapes/paragraphs
  tables: Table[];     // Array of tables found on the slide
  slideNotes?: string; // Notes associated with the slide
}

export const parsePowerPointFromUrl = async (url: string): Promise<Slide[]> => {
  console.log(`Attempting to parse PPTX from URL: ${url}`);

  // 1. Basic check on URL extension (less reliable than file check)
  try {
    const parsedUrl = new URL(url); // Basic validation of URL format
    if (!parsedUrl.pathname.toLowerCase().endsWith('.pptx')) {
      // You might warn instead of throwing, depending on requirements
      console.warn(`URL "${url}" does not end with .pptx. Proceeding with fetch.`);
      // throw new Error("Invalid URL: Must end with .pptx for basic validation.");
    }
  } catch (e) {
    throw new Error(`Invalid URL provided: "${url}"`);
  }

  let arrayBuffer: ArrayBuffer;
  try {
    // 2. Fetch the file content from the URL
    console.log(`Workspaceing content from ${url}...`);
    const response = await fetch(url);
    if (!response.ok) {
      throw new Error(`HTTP error fetching file: ${response.status} ${response.statusText}`);
    }

    // Optional: Check Content-Type for more confidence (PPTX MIME type)
    const contentType = response.headers.get('content-type');
    if (contentType && !contentType.includes('application/vnd.openxmlformats-officedocument.presentationml.presentation') && !contentType.includes('application/octet-stream')) {
      console.warn(`Content-Type header "${contentType}" might not indicate a PPTX file. Trying anyway.`);
    }

    arrayBuffer = await response.arrayBuffer();
    console.log(`Successfully fetched ${arrayBuffer.byteLength} bytes.`);

  } catch (error: any) {
    console.error(`Error fetching or reading data from URL "${url}":`, error);
    throw new Error(`Could not fetch PPTX data from URL. ${error.message}`);
  }

  const zip = await JSZip.loadAsync(arrayBuffer);
  console.log("Fetched data loaded into JSZip.");

  return await parsePowerPointFromZip(zip);
};

export const readSlidesFromPPTXFile = async (file: File): Promise<Slide[]> => {
  if (!file.name.toLowerCase().endsWith('.pptx')) {
    throw new Error("Invalid file type. Only .pptx files are supported by this function.");
  }
  try {
    const arrayBuffer = await file.arrayBuffer();
    const zip = await JSZip.loadAsync(arrayBuffer);
    return await parsePowerPointFromZip(zip);
  } catch (error) {
    console.error("Error processing PPTX file:", error);
    throw error;
  }
};

const parsePowerPointFromZip = async (zip: JSZip): Promise<Slide[]> => {
  // 4. Initialize structures for parsing
  const slides: Slide[] = [];
  const parser = new DOMParser(); // Needed to parse XML files within the PPTX

  // --- Helper Functions ---
  /**
   * Extracts text recursively from an XML element, joining text from <a:t> tags.
   */
  const extractTextFromElement = (element: Element | null): string => {
    if (!element) return '';
    let text = '';
    const textNodes = element.getElementsByTagNameNS('http://schemas.openxmlformats.org/drawingml/2006/main', 't');
    for (let i = 0; i < textNodes.length; i++) {
      text += (textNodes[i].textContent || '') + ' '; // Add space between runs
    }
    // Add newline after each paragraph <a:p> within the element if needed for structure
    const paraNodes = element.getElementsByTagNameNS('http://schemas.openxmlformats.org/drawingml/2006/main', 'p');
    if (paraNodes.length > 1 && text.trim().length > 0) {
      text += '\n';
    }

    return text.trim();
  };

  /**
   * Checks if a shape is a title placeholder.
   */
  const isTitleShape = (shapeElement: Element): boolean => {
    const placeholder = shapeElement.querySelector('p\\:nvPr p\\:ph, nvPr ph'); // CSS selector for p:nvPr > p:ph
    if (placeholder) {
      const type = placeholder.getAttribute('type');
      return type === 'title' || type === 'ctrTitle';
    }
    return false;
  };

  /**
  * Parses an <a:tbl> element into our Table structure.
  */
  const parseTableElement = (tableElement: Element): Table => {
    const table: Table = { rows: [] };
    const rowElements = tableElement.getElementsByTagNameNS('http://schemas.openxmlformats.org/drawingml/2006/main', 'tr');

    for (let i = 0; i < rowElements.length; i++) {
      const row: TableRow = { cells: [] };
      const cellElements = rowElements[i].getElementsByTagNameNS('http://schemas.openxmlformats.org/drawingml/2006/main', 'tc');

      for (let j = 0; j < cellElements.length; j++) {
        // Text within a cell is usually inside <a:txBody>
        const textBody = cellElements[j].getElementsByTagNameNS('http://schemas.openxmlformats.org/drawingml/2006/main', 'txBody')[0];
        const cellText = extractTextFromElement(textBody);
        row.cells.push({ text: cellText });
      }
      table.rows.push(row);
    }
    return table;
  };

  // --- Main Parsing Logic ---

  // Get slide filenames (e.g., "slide1.xml", "slide10.xml") and sort them numerically
  const slideFiles: { name: string; entry: JSZip.JSZipObject }[] = [];
  zip.folder('ppt/slides')?.forEach((relativePath, zipEntry) => {
    if (!zipEntry.dir && relativePath.startsWith('slide') && relativePath.endsWith('.xml')) {
      slideFiles.push({ name: relativePath, entry: zipEntry });
    }
  });

  // Sort slides numerically based on filename
  slideFiles.sort((a, b) => {
    const numA = parseInt(a.name.match(/(\d+)/)?.[0] || '0', 10);
    const numB = parseInt(b.name.match(/(\d+)/)?.[0] || '0', 10);
    return numA - numB;
  });

  // Create a map to store notes for each slide
  const slideNotesMap: { [slideNumber: number]: string } = {};
  zip.folder('ppt/notesSlides')?.forEach(async (relativePath, zipEntry) => {
    if (!zipEntry.dir && relativePath.startsWith('notesSlide') && relativePath.endsWith('.xml')) {
      const slideNumberMatch = relativePath.match(/notesSlide(\d+)\.xml/);
      if (slideNumberMatch) {
        const slideNumber = parseInt(slideNumberMatch[1], 10);
        const xmlContent = await zipEntry.async('string');
        const xmlDoc = parser.parseFromString(xmlContent, 'application/xml');
        const notesText = extractTextFromElement(xmlDoc.querySelector('p\\:notes p\\:cSld p\\:txBody'));
        slideNotesMap[slideNumber] = notesText;
      }
    }
  });

  for (const slideFile of slideFiles) {
    try {
      const slideNumber = parseInt(slideFile.name.match(/(\d+)/)?.[0] || '0', 10);
      const xmlContent = await slideFile.entry.async('string');
      const xmlDoc = parser.parseFromString(xmlContent, 'application/xml');

      let slideTitle: string | undefined = undefined;
      let slideTextContent: string = '';
      const slideTables: Table[] = [];

      // Process shapes (<p:sp>) for text and titles
      const shapeElements = xmlDoc.getElementsByTagNameNS('http://schemas.openxmlformats.org/presentationml/2006/main', 'sp');
      for (let i = 0; i < shapeElements.length; i++) {
        const shape = shapeElements[i];
        const textBody = shape.getElementsByTagNameNS('http://schemas.openxmlformats.org/presentationml/2006/main', 'txBody')[0];
        const shapeText = extractTextFromElement(textBody); // Extract all text within the shape

        if (isTitleShape(shape) && shapeText) {
          // If it's identified as a title shape and has text, assign it
          if (!slideTitle) { // Take the first title found
            slideTitle = shapeText;
          } else {
            // Optionally append additional title shapes text? Or ignore?
            // slideTitle += '\n' + shapeText;
          }
        } else if (shapeText) {
          // Otherwise, append its text to the main content
          slideTextContent += shapeText + '\n';
        }
      }

      // Process graphic frames (<p:graphicFrame>) for tables
      const graphicFrames = xmlDoc.getElementsByTagNameNS('http://schemas.openxmlformats.org/presentationml/2006/main', 'graphicFrame');
      for (let i = 0; i < graphicFrames.length; i++) {
        const frame = graphicFrames[i];
        // Look for a table (<a:tbl>) within the graphic data (<a:graphicData>)
        const tableElement = frame.getElementsByTagNameNS('http://schemas.openxmlformats.org/drawingml/2006/main', 'tbl')[0];
        if (tableElement) {
          slideTables.push(parseTableElement(tableElement));
        }
        // Note: Could add similar logic here for charts (<c:chart>) if needed later
      }

      slides.push({
        slideNumber: slideNumber,
        title: slideTitle,
        textContent: slideTextContent.trim(),
        tables: slideTables,
        slideNotes: slideNotesMap[slideNumber], // Add slide notes if available
      });

    } catch (error) {
      console.error(`Error processing slide ${slideFile.name}:`, error);
      // Decide if one failed slide should stop everything or just be skipped
      // throw error; // Re-throw to stop processing
    }
  }

  return slides;
};
