import emojiRegex from 'emoji-regex';
import { truncate } from 'lodash-es';
import { confluenceToMarkdown } from './confluenceToMarkdown';
import { docxToMarkdown } from './docxToMarkdown';
import { fileToMarkdown } from './fileToMarkdown';
import { htmlToMarkdown } from './htmlToMarkdown';
import parseTitle from './parseTitle';
import {
	readFileInputEventAsArrayBuffer,
	readFileInputEventAsText,
} from './utils';

interface ImportableFile {
	type: string;
	getContent: (file: File) => Promise<ArrayBuffer | string>;
	getMarkdown: (content: ArrayBuffer | string) => Promise<string> | string;
}

const importMapping: ImportableFile[] = [
	{
		type: 'application/msword',
		getContent: readFileInputEventAsText,
		getMarkdown: confluenceToMarkdown,
	},
	{
		type: 'application/octet-stream',
		getContent: readFileInputEventAsArrayBuffer,
		getMarkdown: docxToMarkdown,
	},
	{
		type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
		getContent: readFileInputEventAsArrayBuffer,
		getMarkdown: docxToMarkdown,
	},
	{
		type: 'text/html',
		getContent: readFileInputEventAsText,
		getMarkdown: htmlToMarkdown,
	},
	{
		type: 'text/plain',
		getContent: readFileInputEventAsText,
		getMarkdown: fileToMarkdown,
	},
	{
		type: 'text/markdown',
		getContent: readFileInputEventAsText,
		getMarkdown: fileToMarkdown,
	},
];

function getExtension(fileName: string) {
	const extension = fileName.split('.').pop();
	return extension ? extension.toLowerCase() : '';
}

async function documentImporter(
	file: File,
	{
		mimeType,
		fileName,
	}: {
		mimeType: string;
		fileName: string;
	}
): Promise<{
	text: string;
	title: string;
}> {
	const fileInfo = importMapping.filter((item) => {
		if (item.type === mimeType) {
			if (
				mimeType === 'application/octet-stream' &&
				getExtension(fileName) !== 'docx'
			) {
				return false;
			}

			return true;
		}

		if (item.type === 'text/markdown' && getExtension(fileName) === 'md') {
			return true;
		}

		return false;
	})[0];

	if (!fileInfo) {
		throw new Error(`File type ${mimeType} not supported`);
	}

	let title = fileName.replace(/\.[^/.]+$/, '');
	const content = await fileInfo.getContent(file);
	let text = await fileInfo.getMarkdown(content);
	text = text.trim();

	// Find and extract first emoji, in the case of some imports it can be outside
	// of the title, at the top of the document.
	const regex = emojiRegex();
	const matches = regex.exec(text);
	const firstEmoji = matches ? matches[0] : undefined;
	const textStartsWithEmoji = firstEmoji && text.startsWith(firstEmoji);
	if (textStartsWithEmoji) {
		text = text.replace(firstEmoji, '').trim();
	}

	// If the first line of the imported text looks like a markdown heading
	// then we can use this as the document title
	if (text.startsWith('# ')) {
		const result = parseTitle(text);
		title = result.title;
		text = text.replace(`# ${title}`, '').trimStart();
	}

	// If we parsed an emoji from _above_ the title then add it back at prefixing
	if (textStartsWithEmoji) {
		title = `${firstEmoji} ${title}`;
	}

	// Replace any <br> generated by the turndown plugin with escaped newlines
	// to match our hardbreak parser.
	text = text.replace(/<br>/gi, '\\n');

	// It's better to truncate particularly long titles than fail the import.
	title = truncate(title, { length: 120 });

	return {
		text,
		title,
	};
}

export default documentImporter;
