createShouldQueue = () => {
const nasaMarsNewsUrl = /\/mars\.nasa\.gov\/news\/([\d]+)\//;
const allowUrlRegex = crawler.allowRegex(location => location.href);
const ignoreMarsNewsDoubles = crawler.ignoreDoubles(location => {
const match = location.href.match(nasaMarsNewsUrl);
const newsId = match?.[1]
return newsId;
});
return crawler.chain(
allowUrlRegex([nasaMarsNewsUrl]),
ignoreMarsNewsDoubles(),
allowMaximum(5, console),
);
}