Public
Edited
Jun 20, 2024
Insert cell
Insert cell
Insert cell
bigclivedotcom
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
bigclivedotcom = _.uniqBy( await FileAttachment("bigclivedotcom.csv").csv(), (obj) => obj.URL.toLowerCase())
Insert cell
titles = bigclivedotcom.map(({ Title }) => Title).sort((a, b) => a.localeCompare(b));
Insert cell
availableSubTitleFiles
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
SubFiles = FileAttachment("bigclivedotcom@1.zip").zip()
Insert cell
// remove ' (2).en.srt' from the end of subtitle filenames and remove duplicates.

availableSubTitleFiles = _.uniqBy(
SubFiles.filenames.map(filename => {
let f = filename.slice(0, -7);
if (f.slice(-4) === ' (2)') {
f = f.slice(0, -4);
}

return { zfilename: filename, filename: f, URL: findMatchingURL(f) };
}),
'filename'
);
Insert cell
// Function to find matching URL in bigclivedotcom
function findMatchingURL(title) {
// Normalize the title to lowercase
const normalizedTitle = title.toLowerCase();

// Search for the matching entry in bigclivedotcom
const matchingEntry = bigclivedotcom.find(entry =>
entry.Title.toLowerCase().includes(normalizedTitle)
);

// Return the URL if found, otherwise return null
return matchingEntry ? matchingEntry.URL.split("v=")[1].split("&")[0] : null;
}


Insert cell
titles.length // before filter
Insert cell
availableSubTitleFiles.length // after filer
Insert cell
Insert cell
Insert cell
Insert cell
chunks = _.chunk(processedFiles, 740)
Insert cell
chunk0 = chunks[0]
Insert cell
chunk1 = chunks[1]
Insert cell
chunk2 = chunks[2]
Insert cell
Insert cell
Insert cell
ShowFile = SubFiles.filenames[0]
Insert cell
srtText = SubFiles.file(ShowFile).text()
Insert cell
parseSrtText(srtText)
Insert cell
// convert .srt file to text string
// it turns the file into an object called subtitles {count: 1, time: "00:00:00,080 --> 00:00:01,839", body: "great thing about LED lighting is it's"}
// then the reduce method to concatenate the body properties of the subtitle objects.

function parseSrtText(text) {
// Split the text by carriage return (\r)
const lines = text.split("\r");

// Create an empty array to store subtitle objects
const subtitles = [];

// Loop through each line segment (ignoring empty lines)
for (let i = 0; i < lines.length; i += 4) {
// Extract the subtitle number (assuming it's the first line in the segment)
const number = parseInt(lines[i].trim());

// Check if the number is valid (avoid empty entries)
if (number) {
// Extract timestamp and text content
const time = lines[i + 1].trim();
const body = lines.slice(i + 2, i + 4).join("\r").trim();

// Create a subtitle object with desired properties
const subtitle = {
count: number,
time,
body,
};

// Add the subtitle object to the array
subtitles.push(subtitle);
}
}

// Use reduce to concatenate the body properties of the subtitle objects
const concatenatedText = subtitles.reduce((acc, subtitle) => acc + subtitle.body + " ", "");

return concatenatedText.trim(); // Trim any trailing whitespace
}
Insert cell
// otext = old_parseSrtText(srtText).reduce((acc, subtitle) => acc + subtitle.body + " ", "")
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more