Published
Edited
Oct 27, 2018
1 fork
1 star
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
viewof category_selector = select([...category_to_id_map.keys()])
Insert cell
Insert cell
selected_abc_text = d3
.buffer('https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/brown.zip')
.then(arrayBuffer=> {
let zip = JSZip();
return zip.loadAsync(arrayBuffer);
})
.then(zip=>{
return zip.file(`brown/${id_selector}`).async('string');
})
Insert cell
JSZip = require("jszip@3.1.5/dist/jszip.min.js")
Insert cell
d3=require('d3')

Insert cell
import {select} from "@jashkenas/inputs"
Insert cell
//https://stackoverflow.com/a/35073573/1175496
generator_slicer = function (arrayOfArrays, start, count){
var outer_index = 0;
var returned_index = 1;
return (function* array_flatten_iterator (arrayOfArrays) {
for (let item of arrayOfArrays) {
if (Array.isArray(item)) {
yield* array_flatten_iterator(item);
} else {
if (outer_index>=start){
if (returned_index <=count){
yield item;
returned_index ++;
}
}
outer_index++
}
}
})(arrayOfArrays)
}
Insert cell
Insert cell
Array.from(generator_slicer(ca01_paras, 100, 50))
Insert cell

generator_word_slicer = function (arrayOfArrays, start, count){
var outer_index = 0;
var returned_index = 1;
return (function* array_flatten_iterator (arrayOfArrays) {
for (let item of arrayOfArrays) {
// TypeError: Cannot use 'in' operator to search for 'Symbol(Symbol.iterator)' in The
// if (Symbol.iterator in item) {
// if (typeof item[Symbol.iterator] === 'function' && item.length>1){
if (Array.isArray(item) || typeof(item)=='string' && item.length>1){
yield* array_flatten_iterator(item);
} else {
if (outer_index>=start){
if (returned_index <=count){
yield item;
returned_index ++;
}
}
outer_index++
}
}
})(arrayOfArrays)
}
Insert cell
Array.from(generator_word_slicer(ca01_paras, 0, 20))
Insert cell
// Dealing with two issues:
// Readability, by inferring where spaces will go (which count against character count), and showing paragraph breaks
// Filtering out weird characters I dont understand, they *shouldnt* count against character count
generator_nltk_slicer = function (paragraphArray, start, count){
let outer_index = 0;
let returned_index = 1;
return (function* array_flatten_iterator (arrayOfArrays) {
//....
})(arrayOfArrays)
}
Insert cell
Array.from(generator_word_slicer(ca01_paras, 0, 20))
Insert cell
html`${[1,2].map((i)=>`<p>${i}</p>`)}` // I'm impressed it suports nested backticks , without requiring me to escape inner backticks
Insert cell
Insert cell
one_two ={
return html`${[1,2].map((i)=>{
return `
<p>
${i}
</p>`
})}` ;
}
Insert cell
function one_two_func() {
return html`${[1,2].map((i)=>{
return `
<p>
${i}
</p>`
})}` ;
}
Insert cell
Insert cell
sample_of_chars = function* (array_of_paras, start, count) {
let read_chars = [];
let returned_chars = [];
let read_char_count = 0;
let returned_char_count = 0;
html`${array_of_paras.slice(0,2).map(function*(para__array_of_sent, para__index){
let para;
while (returned_chars < count && (para=para__array_of_sent.shift())){
yield '<p>${para__index}</p>'
}
para__array_of_sent.map(function*(sent__array_of_word){
// if (read_char_count > start){
// yield `<p>`;
// }
sent__array_of_word.map((word__string_of_chars)=>{
//yield ' '
Array.from(word__string_of_chars).map(()=>{
});
//if the next word is not a period (?) then join with spaces:
read_char_count++;
//if (read_char_
return ' ';
})
})
// return `
// <p>
// ${para__array_of_sent.join(' ')}
// </p>`
})}` ;
}
Insert cell
Insert cell
Insert cell
Insert cell
sample_of_chars_gen = function* (array_of_paras, start, count) {
//JSON.stringify
count = count || Infinity;
let read_chars = [];
let returned_chars = [];
let read_char_count = 0;
let returned_char_count = 0;
let current_para__array_of_sent;
let current_para__index=0;
// returned_chars < count , as long as thats true, keep going
while (returned_char_count < count
//&& (current_para__array_of_sent=array_of_paras.shift())){
&& (current_para__array_of_sent=array_of_paras[current_para__index])
&& ++current_para__index){
let is_para_opened = false;
// as long as thats true mark a new paragraph
// Note how I need to repeat this condition at the end
// Maybe I can do <p>{}</p> like this ?
// if (read_char_count > start){
// yield `<p>`;
// }
let current_sent__array_of_word;
let current_sent__index=0;
//Could do a sentence wrapper?
while (returned_char_count < count
&& (current_sent__array_of_word=current_para__array_of_sent[current_sent__index])
&& ++current_sent__index){
//
// && read_chars > start
let current_word__array_of_chars;
let current_word__index=0;
while (returned_char_count < count
//-----------------------------------------------------------------------v
// splitting to convert string to array, so can shift it
&& (current_word__array_of_chars=current_sent__array_of_word[current_word__index])
&& ++current_word__index){ //.shift().split(''))){
let no_space_prefix = new Set([',','.',`''`])
let no_space_suffix = new Set(['``']);//, '"', `''`])
// if (current_word__array_of_chars[0]!='.'){
// if (current_word__array_of_chars==='deserves'){
// yield current_sent__array_of_word[current_word__index-1];
// }
if (current_sent__index>0
//Certain chars shouldnt be preceded by spaces
&& ! no_space_prefix.has(current_word__array_of_chars)
&& ! no_space_suffix.has(current_sent__array_of_word[current_word__index-2])){
// (current_char in !='"'
// && current_char!='.'){
//read_char_count >= start){
if (read_char_count >= start){
yield ' ';
returned_char_count++;
}
read_char_count++;
}
// }
//Fix things about the brown corpus and/or my nltk tagging process which I dont understand
//
current_word__array_of_chars = current_word__array_of_chars
.replace('``','"')
.replace(`''`,'"');
let current_char__index=0;
let current_char;//=current_word__array_of_chars[current_char__index];
while (returned_char_count < count
//&& returned_char_count < count
&& (current_char=current_word__array_of_chars[current_char__index])
&& ++current_char__index ){ //.shift())){
read_char_count++;
if (read_char_count >= start){
if (!is_para_opened){
//read_char_count >= start){
yield `<p>`;
is_para_opened = true;
}
returned_char_count++;
//console.log(current_char, returned_char_count, count, current_char__index);
yield current_char;
//returned_chars.push(current_char);

}
}
}
}
//a space might close
if (returned_char_count==count){
// ${returned_char_count}
yield `<span class="blinker" style="display:inline-block; height:24px; min-width:15px; margin-left:2px; line-height:1.5em" > </span>`;

}
//console.log('para');
if (is_para_opened){ //returned_chars>0){
//read_char_count > start){
yield `</p`;
}
}
}
Insert cell
Insert cell
import { button } from '@jashkenas/inputs'
Insert cell
import { number } from '@jashkenas/inputs'
Insert cell
Insert cell
Insert cell
Insert cell
// html`<div style="font-size:1.5em; text-align:justify;">
// ${[...(sample_of_chars_gen(ca01_paras, 0, 500))]}
// </div>`
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more