import type MarkdownIt from 'markdown-it';
import type Token from 'markdown-it/lib/token';

/**
 * This is how line breaks work for us:
 *
 * 1. A \n character that is not preceded by 2 or more spaces or a backslash by will be parsed as a 'softbreak' token by markdown-it.
 * This will be rendered as a space in the editor according to the markdown specs: https://spec.commonmark.org/0.31.2/#soft-line-breaks
 *
 * 2. A \n character preceeded by 2 or more spaces (  \n) or a backslash (\\n) will be parsed as a 'hardbreak' token by markdown-it. See below for more on the 'hardbreak' token.
 *
 * A 'hardbreak' token will break any paragraph into two paragraphs by the code below.
 *
 * In addition to breaking paragraphs, the code below will also convert any double backslashs in a line (\\) as a new empty line.
 * This is a workaround since markdown doesn't support empty lines.
 */

// matches a newline character (\n) that is not preceded by a backslash (\), or if the newline is at the beginning of the string (definition of a softbreak but for unescaped \n)
const UNESCAPED_SOFT_BREAK_REGEX = /(?:^|[^\\])\\n/;

function isEmptyLine(token: Token) {
	return token.type === 'text' && token.content === '\\';
}

function isHardbreak(token: Token) {
	return token.type === 'hardbreak';
}

export default function markdownBreakToParagraphs(md: MarkdownIt) {
	// Insert a new rule after the "inline" rules are parsed
	md.core.ruler.after('inline', 'breaks', (state) => {
		const { Token } = state;
		const { tokens } = state;

		for (let i = tokens.length - 1; i > 0; i -= 1) {
			const currentToken = tokens[i];

			// Convert unescaped \n in the text into real br tag
			if (
				currentToken.type === 'inline' &&
				currentToken.content?.match(UNESCAPED_SOFT_BREAK_REGEX)
			) {
				const existing = currentToken.children || [];
				currentToken.children = [];

				existing.forEach((child) => {
					const breakParts = child.content.split(UNESCAPED_SOFT_BREAK_REGEX);

					// A schema agnostic way to know if a node is inline code would be
					// great, for now we are stuck checking the node type.
					if (breakParts.length > 1 && child.type !== 'code_inline') {
						breakParts.forEach((part, index) => {
							const token = new Token('text', '', 1);
							token.content = part.trim();
							currentToken.children?.push(token);

							if (index < breakParts.length - 1) {
								const brToken = new Token('br', 'br', 1);
								currentToken.children?.push(brToken);
							}
						});
					} else {
						currentToken.children?.push(child);
					}
				});
			}

			const tokenChildren = currentToken.children || [];
			const hasHardbreakOrEmptyLineChildren =
				tokenChildren.filter(
					(child) => isHardbreak(child) || isEmptyLine(child)
				).length > 0;

			if (hasHardbreakOrEmptyLineChildren) {
				const nodes: Array<Token> = [];
				let childrenAccumulator: Array<Token> = [];

				nodes.push(new Token('paragraph_open', 'p', 1));

				for (let j = 0; j < tokenChildren.length; j += 1) {
					const child = tokenChildren[j];

					if (isEmptyLine(child)) {
						// clear the line content as this is an empty line
						child.content = '';
					}

					if (isHardbreak(child)) {
						const inline = new Token('inline', '', 0);
						inline.level = 1;
						inline.children = childrenAccumulator;
						inline.content = '';
						nodes.push(inline);

						childrenAccumulator = [];

						nodes.push(new Token('paragraph_close', 'p', -1));
						nodes.push(new Token('paragraph_open', 'p', 1));
					} else if (!isHardbreak(child)) {
						childrenAccumulator.push(child);
					}
				}

				if (childrenAccumulator.length > 0) {
					const inline = new Token('inline', '', 0);
					inline.level = 1;
					inline.children = childrenAccumulator;
					inline.content = '';
					nodes.push(inline);
				}

				nodes.push(new Token('paragraph_close', 'p', -1));

				tokens.splice(i - 1, 3, ...nodes);
			}
		}

		return false;
	});
}
