Public
Edited
Jan 5, 2024
1 fork
2 stars
Insert cell
Insert cell
Insert cell
{
let doc=nlp("Hi Dr. Miller the price is 4.59 for the U.C.L.A. Ph.Ds.")
// sentence parsing happens automatically...

// return an array of sentence texts:
let arr = doc.json().map(o=> o.text)
return arr
}
Insert cell
Insert cell
{
let abbrevs = nlp().model.one.abbreviations
let mine = {
abbr: true, //add known abbreviation
co: false //remove known abbreviation
}
// add them in
Object.assign(abbrevs, mine)

return nlp('the new abbr. is working. whoo hoo').out('array')
}
Insert cell
Insert cell
md`you can always see the term-splitting, in a number of ways:`
Insert cell
nlp('i wanna').json()
Insert cell
nlp('new york, new york. it\'s a hell-of-a town.').terms().json()
Insert cell
Insert cell
{
// add 'semi' as a non-word prefix:
nlp.world().model.one.prefixes.semi = true
return nlp('semi-detatched but slightly-ajar').terms().json({normal:true}).map(t=>t.normal)
}
Insert cell
Insert cell
{
nlp('spencer\'s cool').debug(); // see dev console
return null
}

Insert cell
Insert cell
Insert cell
nlp(`"Oh! say, do you see?!"`).docs[0].map(t=>[t.pre, t.text, t.post])
Insert cell
Insert cell
{
let world = nlp.world()
// support "=foo=" as a word
world.model.one.prePunctuation['='] = true
world.model.one.postPunctuation['='] = true
return nlp.tokenize('=cool=').json()[0].terms[0]
}
Insert cell
{
let world = nlp.world()
// re-interpret "=foo=" as "foo" (default)
world.model.one.prePunctuation['='] = false
world.model.one.postPunctuation['='] = false
return nlp.tokenize('=cool=').json()[0].terms[0]
}
Insert cell
Insert cell
{
let doc = nlp.tokenize('spencer kelly is working', {working:'NotCool'})
return JSON.stringify(doc.out('tags')[0], null, 2)
}
Insert cell
Insert cell
{
const methods = nlp.world().methods
//change the sentence-splitting tokenizer
methods.one.tokenize.splitSentences = function (str) {
return str.split(/[.?!]/) //(demonstration purposes!)
}
//change the term-splitting tokenizer
methods.one.tokenize.splitTerms = function (str) {
return str.split(/ /) //works for me!
}
//now the hyphenated term is combined:
return nlp('one two-three four five').terms().out('array')
}
Insert cell
Insert cell
nlp.world().methods.one.tokenize
Insert cell
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more