Public
Edited
Jul 7, 2023
Paused
1 fork
Insert cell
Insert cell
Insert cell
Insert cell
full_data
Type Table, then Shift-Enter. Ctrl-space for more options.

Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
complaints (1).db
-- downsampled country data index
SELECT * FROM beta WHERE model_index = 19
Insert cell
complaints (1).db
-- downsampled country data index
SELECT model_index FROM metadata
WHERE use_ner = 1 AND use_translation = 0 AND model_path = 'paraphrase-multilingual-MiniLM-L12-v2'
Insert cell
Insert cell
Insert cell
paraphrase_beta_table.parquet
SELECT word, score, topic FROM "paraphrase_beta_table" WHERE topic IN (${select1})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
db.sql`SELECT COUNT(ratings) as tot_N, airline FROM reviews GROUP BY airline`
Insert cell
ratings_by_airline = db.query(`
WITH tmp as (
SELECT COUNT(ratings) as tot_n, airline FROM reviews GROUP BY airline
)

SELECT COUNT(ratings) as n, r.airline, tmp.tot_n, r.ratings
FROM reviews r
LEFT JOIN tmp
ON tmp.airline = r.airline
GROUP BY (r.airline, r.ratings, tmp.tot_n)
ORDER BY r.airline
`)
Insert cell
sent_by_airline = db.query(`SELECT compound, airline FROM reviews`)
Insert cell
count_lang = db.query(`SELECT COUNT(lang) as n, lang FROM reviews GROUP BY lang`)
Insert cell
count_airline = db.query(`SELECT COUNT(airline) as n, airline FROM reviews GROUP BY airline`)
Insert cell
count_country = db.query(`SELECT COUNT(country) as n, country FROM reviews GROUP BY country`)
Insert cell
db = DuckDBClient.of({
reviews: FileAttachment("tripadvisor_reviews_multi_downsampled_trans.parquet"),
paraphrase_multilingual_MiniLM_L12_v2_True_True: FileAttachment("paraphrase-multilingual-MiniLM-L12-v2_True_True@2.parquet"),
paraphrase_multilingual_MiniLM_L12_v2_True_False: FileAttachment("paraphrase_multilingual_MiniLM_L12_v2_True_False.parquet"),
paraphrase_multilingual_MiniLM_L12_v2_False_True: FileAttachment("paraphrase-multilingual-MiniLM-L12-v2_False_True@1.parquet"),
paraphrase_multilingual_MiniLM_L12_v2_False_False: FileAttachment("paraphrase-multilingual-MiniLM-L12-v2_False_False@1.parquet"),
doc2vec_False_False: FileAttachment("doc2vec_False_False.parquet"),
doc2vec_false_true: FileAttachment("doc2vec_False_True.parquet"),
doc2vec_true_false: FileAttachment("doc2vec_True_False.parquet"),
doc2vec_true_true: FileAttachment("doc2vec_True_True.parquet"),
allMinilmL6V2_false_true: FileAttachment("all-MiniLM-L6-v2_False_True.parquet"),
allMinilmL6V2_true_false: FileAttachment("all-MiniLM-L6-v2_True_False.parquet"),
allMinilmL6V2_true_true: FileAttachment("all-MiniLM-L6-v2_True_True.parquet"),
allMinilmL6V2_false_false: FileAttachment("all-MiniLM-L6-v2_False_False.parquet"),
sentence_transformerslabse_false_false: FileAttachment("sentence_transformersLaBSE_False_False.parquet"),
sentence_transformerslabse_false_true: FileAttachment("sentence_transformersLaBSE_False_True.parquet"),
sentence_transformerslabse_true_false: FileAttachment("sentence_transformersLaBSE_True_False.parquet"),
sentence_transformerslabse_true_true: FileAttachment("sentence_transformersLaBSE_True_True.parquet"),
reduced_topic: FileAttachment("reduced_topic_map@1.csv")
})
Insert cell
db.query("SELECT * FROM reviews ")
Insert cell
full_data = db.query(`
WITH tmp as (
SELECT review_id, airline, country, ratings, lang, translated_text, title, date_pub
FROM reviews
)

SELECT embedding_x, embedding_y, doc_top, reduced_topic, text, airline, country, tmp.review_id, ratings, lang, translated_text, title, date_pub::DATE as date_pub
FROM ${form.select_m} m
LEFT JOIN tmp
ON m.review_id = tmp.review_id
LEFT JOIN reduced_topic r
ON m.doc_top = r.original_topic
`)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
pd = plot_ratings()
Insert cell

Purpose-built for displays of data

Observable is your go-to platform for exploring data and creating expressive data visualizations. Use reactive JavaScript notebooks for prototyping and a collaborative canvas for visual data exploration and dashboard creation.
Learn more