bloodyWords = bloodSpeeches.reduce((o, {president, transcript, date}, bloodSpeechesIndex) => {
const spaceLimit = 3
const punctuationRE = /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~]/
for (const re of bloodRegexes) {
let words = []
let locations = []
let surroundingPhrases = [];
let mapper = (word) => ({
president, date, word: word.trim(), surroundingPhrases, locations, bloodSpeechesIndex
});
[...transcript.matchAll(re)]
.map(match => {
let matchLen = match[0].length
words.push(match[0])
locations.push(match.index)
let phraseStartIndex = match.index
let phraseEndIndex = match.index + matchLen
let startSpaces = 0
let endSpaces = 0
let currentChar = transcript[phraseStartIndex]
while (
phraseStartIndex > 0
&& startSpaces < spaceLimit
&& !punctuationRE.test(currentChar)
) {
phraseStartIndex -= 1
currentChar = transcript[phraseStartIndex]
if (/\s/.test(currentChar)) {
startSpaces += 1
}
}
currentChar = transcript[phraseEndIndex]
while (
phraseEndIndex < transcript.length
&& endSpaces < spaceLimit
&& !punctuationRE.test(currentChar)
) {
phraseEndIndex += 1
currentChar = transcript[phraseEndIndex]
if (/\s/.test(currentChar)) {
endSpaces += 1
}
}
surroundingPhrases.push(transcript.substring(phraseStartIndex, phraseEndIndex).trim())
})
o = o.concat(...words.map(mapper))
}
return o
}, [])