Public
Edited
Jan 5, 2024
1 fork
2 stars
Insert cell
Insert cell
Insert cell
{
let doc=nlp("Hi Dr. Miller the price is 4.59 for the U.C.L.A. Ph.Ds.")
// sentence parsing happens automatically...

// return an array of sentence texts:
let arr = doc.json().map(o=> o.text)
return arr
}
Insert cell
Insert cell
{
let abbrevs = nlp().model.one.abbreviations
let mine = {
abbr: true, //add known abbreviation
co: false //remove known abbreviation
}
// add them in
Object.assign(abbrevs, mine)

return nlp('the new abbr. is working. whoo hoo').out('array')
}
Insert cell
Insert cell
md`you can always see the term-splitting, in a number of ways:`
Insert cell
nlp('i wanna').json()
Insert cell
nlp('new york, new york. it\'s a hell-of-a town.').terms().json()
Insert cell
Insert cell
{
// add 'semi' as a non-word prefix:
nlp.world().model.one.prefixes.semi = true
return nlp('semi-detatched but slightly-ajar').terms().json({normal:true}).map(t=>t.normal)
}
Insert cell
Insert cell
{
nlp('spencer\'s cool').debug(); // see dev console
return null
}

Insert cell
Insert cell
Insert cell
nlp(`"Oh! say, do you see?!"`).docs[0].map(t=>[t.pre, t.text, t.post])
Insert cell
Insert cell
{
let world = nlp.world()
// support "=foo=" as a word
world.model.one.prePunctuation['='] = true
world.model.one.postPunctuation['='] = true
return nlp.tokenize('=cool=').json()[0].terms[0]
}
Insert cell
{
let world = nlp.world()
// re-interpret "=foo=" as "foo" (default)
world.model.one.prePunctuation['='] = false
world.model.one.postPunctuation['='] = false
return nlp.tokenize('=cool=').json()[0].terms[0]
}
Insert cell
Insert cell
{
let doc = nlp.tokenize('spencer kelly is working', {working:'NotCool'})
return JSON.stringify(doc.out('tags')[0], null, 2)
}
Insert cell
Insert cell
{
const methods = nlp.world().methods
//change the sentence-splitting tokenizer
methods.one.tokenize.splitSentences = function (str) {
return str.split(/[.?!]/) //(demonstration purposes!)
}
//change the term-splitting tokenizer
methods.one.tokenize.splitTerms = function (str) {
return str.split(/ /) //works for me!
}
//now the hyphenated term is combined:
return nlp('one two-three four five').terms().out('array')
}
Insert cell
Insert cell
nlp.world().methods.one.tokenize
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more