async function getNextPage(currentPage) {
const text = await fetch(
`https://en.wikipedia.org/w/api.php?format=json&origin=*&action=parse&prop=text&page=${currentPage}`
).then((response) => response.json());
if (!text.parse) {
return false;
}
const parser = new DOMParser();
const docText = parser.parseFromString(text.parse.text["*"], "text/html");
if (docText.querySelectorAll("p")[0].innerHTML == "Redirect to:") {
return getNextPage(
docText.querySelectorAll("a")[0].href.split("/").slice(-1)[0]
);
}
const pList = docText.querySelectorAll(
".mw-parser-output>p:not(.mw-empty-elt)"
);
const modifiedPList = [...pList]
.filter((d) => d.textContent.trim())
.map((d) => {
[...d.querySelectorAll("small")].map((d) => d.parentNode.removeChild(d));
[...d.querySelectorAll("span.nowrap")].map((d) =>
d.parentNode.removeChild(d)
);
[...d.querySelectorAll("span.IPA")].map((d) =>
d.parentNode.removeChild(d)
);
[...d.querySelectorAll("span#coordinates")].map((d) =>
d.parentNode.removeChild(d)
);
const removedParens = d.outerHTML.replace(/[^_]\(([^)]+)\)/g, "");
return parser
.parseFromString(removedParens, "text/html")
.querySelectorAll("p")[0];
});
const aList = modifiedPList
.map((d) => [...d.querySelectorAll("a:not(.extiw)")])
.flat();
return [...aList]
.filter(
(d) =>
d.href.split("/").splice(-2)[0] == "wiki" &&
!d.href.split("/").splice(-1)[0].includes(":")
)[0]
.href.split("/")
.slice(-1)[0]
.split("#")[0];
}