Add reddit comments

This commit is contained in:
Omar Roth 2018-03-03 15:06:14 -06:00
parent 2e892e8dd4
commit 24cea8f3e7
3 changed files with 152 additions and 18 deletions

View File

@ -46,6 +46,36 @@ class Video
}) })
end end
class RedditSubmit
JSON.mapping({
data: RedditSubmitData,
})
end
class RedditSubmitData
JSON.mapping({
children: Array(RedditThread),
})
end
class RedditThread
JSON.mapping({
data: RedditThreadData,
})
end
class RedditThreadData
JSON.mapping({
subreddit: String,
id: String,
num_comments: Int32,
score: Int32,
author: String,
permalink: String,
title: String,
})
end
# See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html # See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
def ci_lower_bound(pos, n) def ci_lower_bound(pos, n)
if n == 0 if n == 0
@ -226,3 +256,67 @@ def make_client(url, context)
client.connect_timeout = 10.seconds client.connect_timeout = 10.seconds
return client return client
end end
def get_reddit_comments(id, client)
youtube_url = URI.escape("https://youtube.com/watch?v=#{id}")
search_results = client.get("/submit.json?url=#{youtube_url}").body
search_results = RedditSubmit.from_json(search_results)
top_reddit_thread = search_results.data.children.sort_by { |child| child.data.score }[-1]
comments = client.get("/r/#{top_reddit_thread.data.subreddit}/comments/#{top_reddit_thread.data.id}?sort=top&depth=3").body
comments = JSON.parse(comments)
return comments[1]["data"]["children"], top_reddit_thread
end
def template_comments(root)
html = ""
root.each do |child|
if child["data"]["body_html"]?
author = child["data"]["author"]
score = child["data"]["score"]
body_html = HTML.unescape(child["data"]["body_html"].as_s)
replies_html = ""
if child["data"]["replies"] != ""
replies_html = template_comments(child["data"]["replies"]["data"]["children"])
end
# TODO: Allow for expanding comments instead of just dismissing them
content = <<-END_HTML
<p>
<a class="link" href="javascript:void(0)" onclick="dismiss(this.parentNode.parentNode)">[ - ]</a>
#{score}
<b>#{author}</b>
</p>
<p>#{body_html}</p>
#{replies_html}
END_HTML
if child["data"]["depth"].as_i > 0
html += <<-END_HTML
<div class="pure-g">
<div class="pure-u-1-24"></div>
<div class="pure-u-23-24">
#{content}
</div>
</div>
END_HTML
else
html += <<-END_HTML
<div class="pure-g">
<div class="pure-u-1">
#{content}
</div>
</div>
END_HTML
end
end
end
html = html.gsub(/(https:\/\/)|(http:\/\/)?(www\.)?(youtube\.com)/, "")
return html
end

View File

@ -37,7 +37,7 @@ end
Kemal::CLI.new Kemal::CLI.new
PG_DB = DB.open "postgres://kemal:kemal@localhost:5432/invidious" PG_DB = DB.open "postgres://kemal:kemal@localhost:5432/invidious"
URL = URI.parse("https://www.youtube.com") YT_URL = URI.parse("https://www.youtube.com")
CONTEXT = OpenSSL::SSL::Context::Client.new CONTEXT = OpenSSL::SSL::Context::Client.new
CONTEXT.verify_mode = OpenSSL::SSL::VerifyMode::NONE CONTEXT.verify_mode = OpenSSL::SSL::VerifyMode::NONE
CONTEXT.add_options( CONTEXT.add_options(
@ -45,26 +45,29 @@ CONTEXT.add_options(
OpenSSL::SSL::Options::NO_SSL_V2 | OpenSSL::SSL::Options::NO_SSL_V2 |
OpenSSL::SSL::Options::NO_SSL_V3 OpenSSL::SSL::Options::NO_SSL_V3
) )
pool = Deque.new((threads * 1.2 + 1).to_i) do youtube_pool = Deque.new((threads * 1.2 + 1).to_i) do
make_client(URL, CONTEXT) make_client(YT_URL, CONTEXT)
end
reddit_pool = Deque.new((threads * 1.2 + 1).to_i) do
make_client(URI.parse("https://api.reddit.com"), CONTEXT)
end end
# Refresh pool by crawling YT # Refresh youtube_pool by crawling YT
threads.times do threads.times do
spawn do spawn do
io = STDOUT io = STDOUT
ids = Deque(String).new ids = Deque(String).new
random = Random.new random = Random.new
client = get_client(pool) client = get_client(youtube_pool)
search(random.base64(3), client) do |id| search(random.base64(3), client) do |id|
ids << id ids << id
end end
pool << client youtube_pool << client
loop do loop do
client = get_client(pool) yt_client = get_client(youtube_pool)
if ids.empty? if ids.empty?
search(random.base64(3), client) do |id| search(random.base64(3), client) do |id|
@ -73,8 +76,8 @@ threads.times do
end end
if rand(300) < 1 if rand(300) < 1
pool << make_client(URL, CONTEXT) youtube_pool << make_client(YT_URL, CONTEXT)
client = get_client(pool) yt_client = get_client(youtube_pool)
end end
begin begin
@ -82,7 +85,7 @@ threads.times do
video = get_video(id, client, PG_DB) video = get_video(id, client, PG_DB)
rescue ex rescue ex
io << id << " : " << ex.message << "\n" io << id << " : " << ex.message << "\n"
pool << make_client(URL, CONTEXT) youtube_pool << make_client(YT_URL, CONTEXT)
next next
ensure ensure
ids.delete(id) ids.delete(id)
@ -105,7 +108,20 @@ threads.times do
end end
end end
pool << client youtube_pool << client
end
end
end
threads.times do
spawn do
loop do
client = get_client(reddit_pool)
client.get("/")
sleep 10.seconds
reddit_pool << client
end end
end end
end end
@ -115,7 +131,7 @@ top_videos = [] of Video
spawn do spawn do
loop do loop do
top = rank_videos(PG_DB, 40) top = rank_videos(PG_DB, 40)
client = get_client(pool) client = get_client(youtube_pool)
args = [] of String args = [] of String
if top.size > 0 if top.size > 0
@ -137,7 +153,7 @@ spawn do
top_videos = videos top_videos = videos
pool << client youtube_pool << client
end end
end end
@ -163,9 +179,9 @@ get "/watch" do |env|
env.params.query.delete_all("listen") env.params.query.delete_all("listen")
end end
client = get_client(pool) yt_client = get_client(youtube_pool)
begin begin
video = get_video(id, client, PG_DB) video = get_video(id, yt_client, PG_DB)
rescue ex rescue ex
error_message = ex.message error_message = ex.message
next templated "error" next templated "error"
@ -220,7 +236,17 @@ get "/watch" do |env|
calculated_rating = 0.0 calculated_rating = 0.0
end end
pool << client reddit_client = get_client(reddit_pool)
begin
reddit_comments, reddit_thread = get_reddit_comments(id, reddit_client)
rescue ex
reddit_comments = JSON.parse("[]")
reddit_thread = nil
end
reddit_pool << reddit_client
youtube_pool << yt_client
templated "watch" templated "watch"
end end
@ -235,7 +261,7 @@ get "/search" do |env|
page = env.params.query["page"]? && env.params.query["page"].to_i? ? env.params.query["page"].to_i : 1 page = env.params.query["page"]? && env.params.query["page"].to_i? ? env.params.query["page"].to_i : 1
client = get_client(pool) client = get_client(youtube_pool)
html = client.get("https://www.youtube.com/results?q=#{URI.escape(query)}&page=#{page}&sp=EgIQAVAU").body html = client.get("https://www.youtube.com/results?q=#{URI.escape(query)}&page=#{page}&sp=EgIQAVAU").body
html = XML.parse_html(html) html = XML.parse_html(html)
@ -286,7 +312,7 @@ get "/search" do |env|
end end
end end
pool << client youtube_pool << client
templated "search" templated "search"
end end

View File

@ -60,6 +60,10 @@ var player = videojs('player', options, function() {
} }
}); });
}); });
function dismiss(target) {
target.style.display = 'none';
}
</script> </script>
<h1> <h1>
@ -94,6 +98,16 @@ var player = videojs('player', options, function() {
<div style="margin-right:1em;"> <div style="margin-right:1em;">
<%= video.description %> <%= video.description %>
</div> </div>
<% if reddit_thread && !reddit_comments.as_a.empty? %>
<hr style="margin-right:1em;">
<div style="margin-right:1em;">
<h3><%= reddit_thread.data.title %></h3>
<b>
<a target="_blank" class="link" href="https://reddit.com<%= reddit_thread.data.permalink %>">View comments on Reddit</a>
</b>
<%= template_comments(reddit_comments) %>
</div>
<% end %>
</div> </div>
<div class="pure-u-1 pure-u-md-1-5"> <div class="pure-u-1 pure-u-md-1-5">