feat: move title extraction into commons package

Signed-off-by: Tilman Vatteroth <git@tilmanvatteroth.de>
This commit is contained in:
Tilman Vatteroth 2023-04-08 21:31:27 +02:00
parent 8de8a50bec
commit 3962cafa5d
6 changed files with 124 additions and 36 deletions

View file

@ -4,13 +4,11 @@
* SPDX-License-Identifier: AGPL-3.0-only
*/
import { NodeProcessor } from '../../node-preprocessors/node-processor'
import { extractFirstHeading } from '@hedgedoc/commons'
import { Optional } from '@mrdrogdrog/optional'
import type { Document, Node, Element } from 'domhandler'
import { isTag, isText } from 'domhandler'
import type { Document } from 'domhandler'
import type { EventEmitter2 } from 'eventemitter2'
const headlineTagRegex = /^h[1-6]$/gi
/**
* Searches for the first headline tag and extracts its plain text content.
*/
@ -22,40 +20,9 @@ export class ExtractFirstHeadlineNodeProcessor extends NodeProcessor {
}
process(nodes: Document): Document {
Optional.ofNullable(this.checkNodesForHeadline(nodes.children))
.map((foundHeadlineNode) => this.extractInnerTextFromNode(foundHeadlineNode).trim())
Optional.ofNullable(extractFirstHeading(nodes))
.filter((text) => text !== '')
.ifPresent((text) => this.eventEmitter.emit(ExtractFirstHeadlineNodeProcessor.EVENT_NAME, text))
return nodes
}
private checkNodesForHeadline(nodes: Node[]): Node | undefined {
return nodes.find((node) => isTag(node) && node.name.match(headlineTagRegex))
}
private extractInnerTextFromNode(node: Node): string {
if (isText(node)) {
return node.nodeValue
} else if (isTag(node)) {
return this.extractInnerTextFromTag(node)
} else {
return ''
}
}
private extractInnerTextFromTag(node: Element): string {
if (node.name === 'a' && this.findAttribute(node, 'class')?.value.includes('heading-anchor')) {
return ''
} else if (node.name === 'img') {
return this.findAttribute(node, 'alt')?.value ?? ''
} else {
return node.children.reduce((state, child) => {
return state + this.extractInnerTextFromNode(child)
}, '')
}
}
private findAttribute(node: Element, attributeName: string) {
return node.attributes.find((attribute) => attribute.name === attributeName)
}
}