function tokenize(text) {
let tokens = [
...text.matchAll(
/(?<url>[\w]+:\/\/[\S]+)|(?<mention>@\w+)|(?<word>[\w]+)|(?<space>[\s\n]+)|(?<noword>((?!\s)[\W])+)/g
)
];
tokens = tokens.map(({input, ...t}) => {
return { ...t, end_index: t.index + t[0].length };
});
tokens = tokens.filter(t => !t.groups.space)
const chunks = get_chunks(tokens);
const ngrams = get_ngrams(chunks, max_n, min_n);
return Array.from(new Set(ngrams));
}