mutable data = corpusData.titles
.filter(t=>excluded.indexOf(t.id)===-1).map(t=>{
console.log('selected title', t.id, t.title, '('+t.txt_length+')');
let obj = {
'source': t.id,
'year': corpusData.publications.find(d=>d.id===t.id).year,
'title': t.title,
'length': +t.txt_length,
'soggetto': 0,
'dubbio': 0,
'misto': 0,
'definitivo': 0,
'soggetto_perc': 0,
'dubbio_perc': 0,
'misto_perc': 0,
'definitivo_perc': 0,
'chunks': [],
'levels_doubt': []
}
obj.details = data_to_use.filter(dd=>{
return dd["ID opera"]===t.id;
});
obj.details.forEach(dd=>{
delete dd['id JS'];
delete dd['Alternative'];
delete dd['Testo Alternativo'];
delete dd['category'];
delete dd['id'];
delete dd['occorrenza'];
delete dd['soggetto'];
dd.includes_mixed_text=false;
})
// get data about this composition and calculate the amount of characters for each category
let _this_data = raw_data.find(d=>d.key===t.id);
if (_this_data) {
// data about all individual characters
let char_data = [];
// consecutive chucnks of text of the same category
let chunks = [];
for (let char = 0; char < t.txt_length; char++) {
// sections of 'soggetto' and 'dubbio' to which this individual character belongs
const _s = _this_data.all_sogg.filter(s=> char>=s[0] && char<s[1]);
const _d = _this_data.all_dubb.filter(d=>{
return char>=d[0] && char<d[1]
});
// category to which this character corresponds
let char_cat;
if (_s.length>0 && _d.length>0){
obj.misto++;
char_cat = "misto";
// Add a flag to the original data if mixed text
obj.details.forEach(dd=>{
if (char >= Number(dd.soggetto_starts_at) && char <= Number(dd.soggetto_ends_at)) {
// console.log(dd.includes_mixed_text)
dd.includes_mixed_text=true;
// console.log(dd.includes_mixed_text)
}
})
} else if (_s.length>0) {
obj.soggetto++;
char_cat = "soggetto"
} else if (_d.length>0) {
obj.dubbio++;
char_cat = "dubbio"
} else {
obj.definitivo++;
char_cat = "definitivo"
}
// NOT WORKING WELL
// data about this character
char_data.push({
index: char,
category: char_cat,
was_subject: _s.length, // how many times it was part of a 'subject'
was_doubt: _d.length, // how many times it was part of a 'doubt'
});
// calculate chuncks
let this_chunk = {
start: char,
end: char+1,
category: char_cat
}
if (chunks.length < 1) {
chunks.push(this_chunk);
} else if (chunks[chunks.length-1].category === char_cat) {
chunks[chunks.length-1].end = char+1
} else {
chunks.push(this_chunk);
}
} // close for loop
// store data about chunks
obj.chunks = chunks;
// NOT WORKING WELL
// calculate "levels doubt" (see below for explaination)
char_data = d3.nest()
.key(d=>d.category)
.key(d=>d.was_subject)
.rollup(values=>values.length)
.entries(char_data)
.map(d=>{
d.values = d.values.map(dd=>{
return {
...dd,
'percentage':dd.value/t.txt_length*100
}
}).sort((a,b)=>+a.key-b.key)
return d
});
// NOT WORKING WELL
// format for treemap
char_data = char_data.map(l=>{
l.name = l.key;
l.children = l.values.map(c=>{
return {
'name': c.key,
'value': c.value
}
});
delete l.key;
delete l.values;
return l;
});
// NOT WORKING WELL
// store
obj.levels_doubt=char_data;
}
// case in which all text is 'definitivo'
if (obj.dubbio + obj.misto + obj.soggetto === 0) {
obj.definitivo = obj.length;
}
// calculate percentages
obj.definitivo_perc = +(obj.definitivo/obj.length * 100).toFixed(3);
obj.soggetto_perc = +(obj.soggetto/obj.length * 100).toFixed(3);
obj.misto_perc = +(obj.misto/obj.length * 100).toFixed(3);
obj.dubbio_perc = +(obj.dubbio/obj.length * 100).toFixed(3);
// cleaning
obj["id"]=obj["source"];
delete obj["source"];
// check length of text with sum of amount of characters for each category
const check_length = obj.misto + obj.soggetto + obj.dubbio + obj.definitivo;
if (check_length!==+t.txt_length) {
console.warn('Warning:', t.title, check_length, t.txt_length);
}
// return data
// console.log(obj);
return obj;
})