Public
Edited
Nov 9, 2023
2 stars
Insert cell
Insert cell
Insert cell
nlp(input).ngrams( {min:2} ) //sequences with at least 2 terms
Insert cell
Insert cell
Insert cell
{
const r = nlp('one two three. One two. Three four.')
return r.ngrams()
}
Insert cell
Insert cell
{
const r = nlp('one two three. One two. Three four.')
return r.unigrams()
}
Insert cell
Insert cell
{
const r = nlp('one two three. One two. Three four.')
return r.bigrams()
}
Insert cell
Insert cell
{
const r = nlp('one two three. One two. Three four.')
return r.trigrams()
}
Insert cell
Insert cell
{
const r = nlp('one two three. One two. Three four.')
return r.startgrams()
}
Insert cell
Insert cell
{
const r = nlp('one two three. One two. Three four.')
return r.endgrams()
}
Insert cell
Insert cell
{
const r = nlp('one two three. One two. Three four.')
return r.edgegrams()
}
Insert cell
Insert cell
{
let doc = nlp(`Bubba, the owner of bubba's shrip.`)
doc.normalize({possessives:true,})
return doc.ngrams()
}
Insert cell
Insert cell
{
let doc = nlp('He played (briefly) one year in Toronto, then suddenly played 1 year in Los Angeles.')
doc.remove('#Adverb') // remove 'suddenly'
doc.parentheses().remove() // remove '(briefly)'
// once compromise-numbers is installed...
//doc.numbers().toNumber() // 'one' -> '1'
doc.splitAfter('@hasComma') // avoid 'Toronto then' gram
return doc.ngrams({size:2})
}
Insert cell
Insert cell
Insert cell
nlp = {
let nlp = await require('compromise')
let plugin = await require('compromise-stats/builds/compromise-stats.min.js')
nlp.extend(plugin)
return nlp
}
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more