import fs from 'fs';
import { parse, HTMLElement } from 'node-html-parser';

interface ArticleDetail {
  title: string;
  link: string;
  image: string;
  content: string;
  publishDate: string;
  author: string;
}

/**
 * Parse article detail page and extract information
 * @param htmlFilePath - Path to HTML file
 * @returns Article details object
 */
function parseArticleDetail(htmlFilePath: string): ArticleDetail | null {
  try {
    const htmlContent = fs.readFileSync(htmlFilePath, 'utf-8');
    const root = parse(htmlContent);

    const article: ArticleDetail = {
      title: '',
      link: '',
      image: '',
      content: '',
      publishDate: '',
      author: ''
    };

    // 1. Title
    const titleElement = root.querySelector('.headline__text');
    if (titleElement) article.title = titleElement.text.trim();

    // 2. Link
    const canonicalLink = root.querySelector('link[rel="canonical"]');
    article.link = canonicalLink?.getAttribute('href') || '';
    if (!article.link) {
      const ogUrl = root.querySelector('meta[property="og:url"]');
      if (ogUrl) article.link = ogUrl.getAttribute('content') || '';
    }

    // 3. Featured Image
    const ogImage = root.querySelector('meta[property="og:image"]');
    article.image = ogImage?.getAttribute('content') || '';
    if (!article.image) {
      const firstImage = root.querySelector('.article__content img, .image__picture img, picture img');
      if (firstImage) article.image = firstImage.getAttribute('src') || firstImage.getAttribute('data-src') || '';
    }

    // 4. Publish Date
    const timestampElement = root.querySelector('time[datetime]');
    article.publishDate = timestampElement?.getAttribute('datetime') || '';
    if (!article.publishDate) {
      const timestampText = root.querySelector('.timestamp__time-since');
      if (timestampText) article.publishDate = timestampText.text.trim();
    }

    // 5. Author
    const authorElement = root.querySelector('.byline__name');
    if (authorElement) article.author = authorElement.text.trim();

    // 6. Content
    const contentParagraphs: string[] = [];
    const articleBody = root.querySelector('.article__content, .article-body, .l-container');
    if (articleBody) {
      const paragraphs = articleBody.querySelectorAll('p.paragraph, p');
      paragraphs.forEach((p: HTMLElement) => {
        const text = p.text.trim();
        if (text && !text.includes('CNN') && !text.includes('©') && text.length > 20) {
          contentParagraphs.push(text);
        }
      });
    }
    if (contentParagraphs.length === 0) {
      const allParagraphs = root.querySelectorAll('p');
      const filtered = allParagraphs
        .map(p => p.text.trim())
        .filter(t => t.length > 50);
      article.content = filtered.slice(0, 10).join('\n\n');
    } else {
      article.content = contentParagraphs.join('\n\n');
    }

    return article;
  } catch (error) {
    console.error('Error parsing article detail:', error);
    return null;
  }
}

/**
 * Parse multiple article detail pages
 */
function parseMultipleArticles(htmlFiles: string[]): ArticleDetail[] {
  const articles: ArticleDetail[] = [];
  htmlFiles.forEach(filePath => {
    const article = parseArticleDetail(filePath);
    if (article && article.title) articles.push(article);
  });
  return articles;
}

/**
 * Main function for testing
 */
function main(): void {
  const htmlFilePath = 'source-pages/china-people-cafe-change-name-intl-hnk.html';

  // Create data folder if not exists
  if (!fs.existsSync('data')) fs.mkdirSync('data');

  console.log('Parsing article detail...\n');
  const article = parseArticleDetail(htmlFilePath);

  if (article) {
    console.log('=== ARTICLE DETAILS ===\n');
    console.log(`Title: ${article.title}\n`);
    console.log(`Link: ${article.link}\n`);
    console.log(`Image: ${article.image}\n`);
    console.log(`Author: ${article.author}\n`);
    console.log(`Publish Date: ${article.publishDate}\n`);
    console.log(`Content Preview:\n${article.content.substring(0, 500)}...\n`);
    console.log(`\nTotal content length: ${article.content.length} characters`);

    fs.writeFileSync('data/article-detail.json', JSON.stringify(article, null, 2), 'utf-8');
    console.log('\nArticle details saved to data/article-detail.json');
  } else {
    console.log('Failed to parse article');
  }
  
}

// Run main
main();

// Export for other modules
export { parseArticleDetail, parseMultipleArticles };