Platform
Solutions
Resources
Pricing
Sign in
Sign up
jk979
Workspace
Fork
Published
Big Data
By
jk979
Edited
Apr 4, 2019
Insert cell
Insert cell
Insert cell
Insert cell
// Load a file locally
viewof
text
=
html
`<input type=file * ">`
Insert cell
Insert cell
//turn JSON into text
tweetsStrings
=
Files
.
text
(
text
)
Insert cell
//make text into dataframe
tweetsDF
=
Object
.
values
(
JSON
.
parse
(
tweetsStrings
)
)
Insert cell
//create temporary file where we see what tweetsDF was originally
tweetsDF_temp
=
Object
.
values
(
JSON
.
parse
(
tweetsStrings
)
)
Insert cell
//get unique location names from this raw data set
unique_locations_tweetsDF
=
z
.
unique
(
z
.
getCol
(
"location"
,
tweetsDF_temp
)
)
//get unique location names
Insert cell
//group the raw tweets by user location
count_tweetsDF
=
{
let
countArray
=
[
]
let
groupedLocations
=
z
.
groupBy
(
x
=>
x
.
location
,
tweetsDF_temp
)
for
(
const
key
of
Object
.
keys
(
groupedLocations
)
)
{
countArray
.
push
(
{
location
:
key
,
count
:
groupedLocations
[
key
]
.
length
}
)
}
return
countArray
}
Insert cell
//sort by count of tweets; we see that variations of 'Boston, MA' are most common
countSorted_tweetsDF
=
z
.
sortByCol
(
'count'
,
'des'
,
count_tweetsDF
)
Insert cell
// Loop through all the locations, and see if they are similar to Boston, then create a new column
newTweets
=
{
let
cleanTweets
=
[
]
let
locations
=
z
.
getCol
(
"location"
,
tweetsDF
)
//for each row in tweetsDF...
for
(
let
i
in
tweetsDF
)
{
//make a duplicate tweet structure to load into cleanTweets
let
tweet
=
tweetsDF
[
i
]
if
(
tweetsDF
[
i
]
.
location
.
toLowerCase
(
)
.
includes
(
'boston'
)
)
{
tweet
.
location
=
'Boston, MA'
}
else
if
(
tweetsDF
[
i
]
.
location
.
toLowerCase
(
)
.
includes
(
'cambridge'
)
)
{
tweet
.
location
=
'Cambridge, MA'
}
else
if
(
tweetsDF
[
i
]
.
location
==
""
)
{
tweet
.
location
=
'no location'
}
else
{
//push the clean tweet to cleanTweets
}
cleanTweets
.
push
(
tweet
)
}
return
cleanTweets
}
Insert cell
//group newTweets by count
count_cleaned
=
{
let
countArray
=
[
]
let
groupedLocations
=
z
.
groupBy
(
x
=>
x
.
location
,
newTweets
)
for
(
const
key
of
Object
.
keys
(
groupedLocations
)
)
{
countArray
.
push
(
{
location
:
key
,
count
:
groupedLocations
[
key
]
.
length
}
)
}
return
countArray
}
Insert cell
//sort by count of tweets
countSorted_clean
=
z
.
sortByCol
(
'count'
,
'des'
,
count_cleaned
)
Insert cell
Insert cell
vegalite
(
{
data
:
{
values
:
countSorted_clean
.
slice
(
0
,
10
)
}
,
mark
:
"bar"
,
encoding
:
{
x
:
{
field
:
"location"
,
type
:
"nominal"
,
sort
:
"*"
}
,
y
:
{
field
:
"count"
,
type
:
"quantitative"
}
,
color
:
{
"field"
:
"location"
,
"type"
:
"nominal"
}
}
}
)
Insert cell
Insert cell
//these aren't necessarily the stated locations of the users; they can be different from the "location"
notNull
=
z
.
filter
(
r
=>
r
.
lat
!=
null
,
tweetsDF
)
Insert cell
vegalite
(
{
data
:
{
values
:
notNull
}
,
mark
:
"point"
,
encoding
:
{
x
:
{
field
:
"lat"
,
type
:
"quantitative"
,
"scale"
:
{
"domain"
:
[
42.33
,
42.38
]
}
}
,
y
:
{
field
:
"lon"
,
type
:
"quantitative"
,
"scale"
:
{
"domain"
:
[
-
71
,
-
71.15
]
}
}
,
//color: {"field": "location", "type": "nominal"}
}
}
)
Insert cell
Insert cell
// Load a file locally
viewof
hashtag_text
=
html
`<input type=file * ">`
Insert cell
//turn JSON into text
tweetsStringsBrexit
=
Files
.
text
(
hashtag_text
)
Insert cell
tweets_brexit
=
Object
.
values
(
JSON
.
parse
(
tweetsStringsBrexit
)
)
Insert cell
//reorganizing tweets_brexit into the same structure as the first exercise
tweets_brexit_DF
=
{
let
otherArray
=
[
]
for
(
let
i
in
tweets_brexit
)
{
let
curr
=
tweets_brexit
[
i
]
[
1
]
otherArray
.
push
(
curr
)
}
return
otherArray
}
Insert cell
Insert cell
//get all the locations in the tweets_brexit_DF
locationsArray
=
{
let
newArray
=
[
]
for
(
let
i
in
tweets_brexit_DF
)
{
let
currentText
=
tweets_brexit_DF
[
i
]
.
location
newArray
.
push
(
currentText
)
}
return
newArray
}
Insert cell
//get unique location names, there are a lot of England or cities in England
unique_locations_brexit
=
z
.
unique
(
z
.
getCol
(
"location"
,
tweets_brexit_DF
)
)
Insert cell
//group by user location
count_brexit
=
{
let
countArray
=
[
]
let
groupedLocations
=
z
.
groupBy
(
x
=>
x
.
location
,
tweets_brexit_DF
)
for
(
const
key
of
Object
.
keys
(
groupedLocations
)
)
{
countArray
.
push
(
{
location
:
key
,
count
:
groupedLocations
[
key
]
.
length
}
)
}
return
countArray
}
Insert cell
//sort by count; lots of null locations
//locations have varying degrees of specificity (i.e. England; London, England; UK)
//chose to format London since it had a large number of entries as "London" or "London, England"
countSorted_brexit
=
z
.
sortByCol
(
'count'
,
'des'
,
count_brexit
)
Insert cell
cleanedTweetsBrexit
=
{
let
newLocations
=
[
]
let
locations
=
z
.
getCol
(
"location"
,
tweets_brexit_DF
)
for
(
let
id
in
locations
)
{
if
(
locations
[
id
]
.
toLowerCase
(
)
.
includes
(
'london'
)
)
{
newLocations
.
push
(
'London, England'
)
}
else
if
(
locations
[
id
]
==
""
)
{
newLocations
.
push
(
'null value'
)
}
else
{
newLocations
.
push
(
locations
[
id
]
)
}
}
return
z
.
addCol
(
"cleanLocations"
,
newLocations
,
tweets_brexit_DF
)
}
Insert cell
Insert cell
//none of the tweets collected have lat/lon
notNullBrexit
=
z
.
filter
(
r
=>
r
.
lat
!=
null
,
tweets_brexit_DF
)
Insert cell
vegalite
(
{
data
:
{
values
:
countSorted_brexit
}
,
mark
:
"bar"
,
encoding
:
{
x
:
{
field
:
"lat"
,
type
:
"quantitative"
}
,
y
:
{
field
:
"lon"
,
type
:
"quantitative"
}
,
color
:
{
"field"
:
"search"
,
"type"
:
"nominal"
}
}
}
)
Insert cell
Insert cell
Insert cell
Insert cell
Insert cell
One platform
to build and deploy the best data apps
Experiment and prototype by building visualizations in live JavaScript notebooks. Collaborate with your team and decide which concepts to build out.
Use Observable Framework to build data apps locally. Use data loaders to build in any language or library, including Python, SQL, and R.
Seamlessly deploy to Observable. Test before you ship, use automatic deploy-on-commit, and ensure your projects are always up-to-date.
Try it for free
Learn more
Fork
View
Export
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
text
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
tweetsStrings
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
tweetsDF
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
tweetsDF_temp
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
unique_locations_tweetsDF
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
count_tweetsDF
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
countSorted_tweetsDF
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
newTweets
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
count_cleaned
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
countSorted_clean
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
notNull
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
hashtag_text
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
tweetsStringsBrexit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
tweets_brexit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
tweets_brexit_DF
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
locationsArray
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
unique_locations_brexit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
count_brexit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
countSorted_brexit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
cleanedTweetsBrexit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
notNullBrexit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
Edit
Add comment
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
vegalite
Edit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
z
Edit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML
d3
Edit
Add comment
Copy import
Select
Duplicate
Copy link
Embed
Delete
JavaScript
Markdown
HTML