Published
Edited
Apr 4, 2019
Insert cell
Insert cell
Insert cell
Insert cell
// Load a file locally
viewof text = html`<input type=file * ">`
Insert cell
Insert cell
//turn JSON into text
tweetsStrings = Files.text(text)
Insert cell
//make text into dataframe
tweetsDF = Object.values(JSON.parse(tweetsStrings))
Insert cell
//create temporary file where we see what tweetsDF was originally
tweetsDF_temp = Object.values(JSON.parse(tweetsStrings))
Insert cell
//get unique location names from this raw data set
unique_locations_tweetsDF = z.unique(z.getCol("location",tweetsDF_temp)) //get unique location names
Insert cell
//group the raw tweets by user location
count_tweetsDF = {
let countArray = []
let groupedLocations = z.groupBy(x => x.location, tweetsDF_temp)
for (const key of Object.keys(groupedLocations)) {
countArray.push({location: key, count: groupedLocations[key].length})
}
return countArray
}
Insert cell
//sort by count of tweets; we see that variations of 'Boston, MA' are most common
countSorted_tweetsDF = z.sortByCol('count', 'des', count_tweetsDF)
Insert cell
// Loop through all the locations, and see if they are similar to Boston, then create a new column
newTweets = {
let cleanTweets = []
let locations = z.getCol("location", tweetsDF)
//for each row in tweetsDF...
for (let i in tweetsDF) {
//make a duplicate tweet structure to load into cleanTweets
let tweet = tweetsDF[i]
if (tweetsDF[i].location.toLowerCase().includes('boston')){
tweet.location = 'Boston, MA'
}
else if(tweetsDF[i].location.toLowerCase().includes('cambridge')){
tweet.location = 'Cambridge, MA'
}
else if(tweetsDF[i].location==""){
tweet.location = 'no location'
}
else {
//push the clean tweet to cleanTweets
}
cleanTweets.push(tweet)
}
return cleanTweets
}
Insert cell
//group newTweets by count
count_cleaned = {
let countArray = []
let groupedLocations = z.groupBy(x => x.location, newTweets)
for (const key of Object.keys(groupedLocations)) {
countArray.push({location: key, count: groupedLocations[key].length})
}
return countArray
}
Insert cell
//sort by count of tweets
countSorted_clean = z.sortByCol('count', 'des', count_cleaned)
Insert cell
Insert cell
vegalite({
data: {values: countSorted_clean.slice(0,10)},
mark: "bar",
encoding: {
x: {field: "location", type: "nominal", sort:"*"},
y: {field: "count", type: "quantitative"},
color: {"field": "location", "type": "nominal"}
}
})
Insert cell
Insert cell
//these aren't necessarily the stated locations of the users; they can be different from the "location"
notNull = z.filter(r => r.lat != null, tweetsDF)
Insert cell
vegalite({
data: {values: notNull},
mark: "point",
encoding: {
x: {field: "lat", type: "quantitative","scale": {"domain": [42.33,42.38]}},
y: {field: "lon", type: "quantitative","scale": {"domain": [-71,-71.15]}},
//color: {"field": "location", "type": "nominal"}
}
})
Insert cell
Insert cell
// Load a file locally
viewof hashtag_text = html`<input type=file * ">`
Insert cell
//turn JSON into text
tweetsStringsBrexit = Files.text(hashtag_text)
Insert cell
tweets_brexit = Object.values(JSON.parse(tweetsStringsBrexit))
Insert cell
//reorganizing tweets_brexit into the same structure as the first exercise
tweets_brexit_DF = {
let otherArray = []
for(let i in tweets_brexit){
let curr = tweets_brexit[i][1]
otherArray.push(curr)
}
return otherArray
}
Insert cell
Insert cell
//get all the locations in the tweets_brexit_DF
locationsArray = {
let newArray = []
for(let i in tweets_brexit_DF){
let currentText = tweets_brexit_DF[i].location
newArray.push(currentText)
}
return newArray
}
Insert cell
//get unique location names, there are a lot of England or cities in England
unique_locations_brexit = z.unique(z.getCol("location", tweets_brexit_DF))
Insert cell
//group by user location
count_brexit = {
let countArray = []
let groupedLocations = z.groupBy(x => x.location, tweets_brexit_DF)
for (const key of Object.keys(groupedLocations)) {
countArray.push({location: key, count: groupedLocations[key].length})
}
return countArray
}
Insert cell
//sort by count; lots of null locations
//locations have varying degrees of specificity (i.e. England; London, England; UK)
//chose to format London since it had a large number of entries as "London" or "London, England"
countSorted_brexit = z.sortByCol('count', 'des', count_brexit)
Insert cell
cleanedTweetsBrexit = {
let newLocations = []
let locations = z.getCol("location", tweets_brexit_DF)
for (let id in locations) {
if (locations[id].toLowerCase().includes('london')){
newLocations.push('London, England')
}
else if(locations[id]==""){
newLocations.push('null value')
}
else {
newLocations.push(locations[id])
}
}
return z.addCol("cleanLocations", newLocations, tweets_brexit_DF)
}
Insert cell
Insert cell
//none of the tweets collected have lat/lon
notNullBrexit = z.filter(r => r.lat != null, tweets_brexit_DF)
Insert cell
vegalite({
data: {values: countSorted_brexit},
mark: "bar",
encoding: {
x: {field: "lat", type: "quantitative"},
y: {field: "lon", type: "quantitative"},
color: {"field": "search", "type": "nominal"}
}
})
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell

One platform to build and deploy the best data apps

Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Learn more