mirror of
https://github.com/hedgedoc/hedgedoc.git
synced 2025-05-20 10:15:17 -04:00
feat: move title extraction into commons package
Signed-off-by: Tilman Vatteroth <git@tilmanvatteroth.de>
This commit is contained in:
parent
8de8a50bec
commit
3962cafa5d
6 changed files with 124 additions and 36 deletions
|
@ -4,13 +4,11 @@
|
|||
* SPDX-License-Identifier: AGPL-3.0-only
|
||||
*/
|
||||
import { NodeProcessor } from '../../node-preprocessors/node-processor'
|
||||
import { extractFirstHeading } from '@hedgedoc/commons'
|
||||
import { Optional } from '@mrdrogdrog/optional'
|
||||
import type { Document, Node, Element } from 'domhandler'
|
||||
import { isTag, isText } from 'domhandler'
|
||||
import type { Document } from 'domhandler'
|
||||
import type { EventEmitter2 } from 'eventemitter2'
|
||||
|
||||
const headlineTagRegex = /^h[1-6]$/gi
|
||||
|
||||
/**
|
||||
* Searches for the first headline tag and extracts its plain text content.
|
||||
*/
|
||||
|
@ -22,40 +20,9 @@ export class ExtractFirstHeadlineNodeProcessor extends NodeProcessor {
|
|||
}
|
||||
|
||||
process(nodes: Document): Document {
|
||||
Optional.ofNullable(this.checkNodesForHeadline(nodes.children))
|
||||
.map((foundHeadlineNode) => this.extractInnerTextFromNode(foundHeadlineNode).trim())
|
||||
Optional.ofNullable(extractFirstHeading(nodes))
|
||||
.filter((text) => text !== '')
|
||||
.ifPresent((text) => this.eventEmitter.emit(ExtractFirstHeadlineNodeProcessor.EVENT_NAME, text))
|
||||
return nodes
|
||||
}
|
||||
|
||||
private checkNodesForHeadline(nodes: Node[]): Node | undefined {
|
||||
return nodes.find((node) => isTag(node) && node.name.match(headlineTagRegex))
|
||||
}
|
||||
|
||||
private extractInnerTextFromNode(node: Node): string {
|
||||
if (isText(node)) {
|
||||
return node.nodeValue
|
||||
} else if (isTag(node)) {
|
||||
return this.extractInnerTextFromTag(node)
|
||||
} else {
|
||||
return ''
|
||||
}
|
||||
}
|
||||
|
||||
private extractInnerTextFromTag(node: Element): string {
|
||||
if (node.name === 'a' && this.findAttribute(node, 'class')?.value.includes('heading-anchor')) {
|
||||
return ''
|
||||
} else if (node.name === 'img') {
|
||||
return this.findAttribute(node, 'alt')?.value ?? ''
|
||||
} else {
|
||||
return node.children.reduce((state, child) => {
|
||||
return state + this.extractInnerTextFromNode(child)
|
||||
}, '')
|
||||
}
|
||||
}
|
||||
|
||||
private findAttribute(node: Element, attributeName: string) {
|
||||
return node.attributes.find((attribute) => attribute.name === attributeName)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue