From 55b5fa845f7e137b9f98a4eafa1f0eb2114e17a4 Mon Sep 17 00:00:00 2001 From: Stefan Sundin Date: Sun, 5 May 2024 19:10:01 -0700 Subject: [PATCH] YouTube improvements. Add a hidden way to use the RSS Box endpoint (hold the shift key when submitting the form). Add YouTube feed options to the modal. Fix the shorts filter by querying youtube.com/shorts/ for each video (it will make the query more expensive, consider using `min_length` instead). --- app.rb | 63 ++++++++++++++++++++++++---------- app/services/google.rb | 1 - app/services/youtube.rb | 29 ++++++++++++++++ config/initializers/05-bool.rb | 8 +++++ public/js/main.js | 53 +++++++++++++++++++++++----- views/index.erb | 26 ++++++++++++++ 6 files changed, 151 insertions(+), 29 deletions(-) create mode 100644 app/services/youtube.rb diff --git a/app.rb b/app.rb index 4ac4938..2fc3945 100644 --- a/app.rb +++ b/app.rb @@ -282,15 +282,14 @@ # https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g/videos # https://www.youtube.com/channel/SWu5RTwuNMv6U # https://www.youtube.com/channel/UCd6MoB9NC6uYN2grvUNT-Zg/search?query=aurora - elsif /youtube\.com\/(?user|c|show)\/(?[^\/?#]+)(?:\/search\?query=(?[^&#]+))?/ =~ params[:q] + elsif /youtube\.com\/user\/(?[^\/?#]+)(?:\/search\?query=(?[^&#]+))?/ =~ params[:q] # https://www.youtube.com/user/khanacademy/videos - # https://www.youtube.com/c/khanacademy - # https://www.youtube.com/show/redvsblue # https://www.youtube.com/user/AmazonWebServices/search?query=aurora + elsif /youtube\.com\/(?c\/[^\/?#]+)(?:\/search\?query=(?[^&#]+))?/ =~ params[:q] + # https://www.youtube.com/c/khanacademy # https://www.youtube.com/c/khanacademy/search?query=Frequency+stability # there is no way to resolve these accurately through the API, the best way is to look for the channelId meta tag in the website HTML # note that slug != username, e.g. https://www.youtube.com/c/kawaiiguy and https://www.youtube.com/user/kawaiiguy are two different channels - user = "#{type}/#{slug}" elsif /(?:youtu\.be|youtube\.com\/(?:embed|v|shorts))\/(?[^?#]+)/ =~ params[:q] # https://youtu.be/vVXbgbMp0oY?t=1s # https://www.youtube.com/embed/vVXbgbMp0oY @@ -307,14 +306,17 @@ # https://www.youtube.com/watch?v=vVXbgbMp0oY&t=5s elsif /youtube\.com\/.*[?&]list=(?[^&#]+)/ =~ params[:q] # https://www.youtube.com/playlist?list=PL0QrZvg7QIgpoLdNFnEePRrU-YJfr9Be7 - elsif /youtube\.com\/(?[^\/?#]+)/ =~ params[:q] + elsif /youtube\.com\/(?[^\/?#]+)/ =~ params[:q] # https://www.youtube.com/khanacademy # https://www.youtube.com/@awscommunity elsif /\b(?(?:UC[^\/?#]{22,}|S[^\/?#]{12,}))/ =~ params[:q] # it's a channel id + elsif params[:q].start_with?("@") + # it's a handle + handle = params[:q] else - # it's probably a channel name - user = params[:q] + # maybe it is a handle? + handle = "@#{params[:q]}" end if playlist_id @@ -324,9 +326,28 @@ if user channel_id, _ = App::Cache.cache("youtube.user", user.downcase, 60*60, 60) do - response = App::HTTP.get("https://www.youtube.com/#{user}") + response = App::YouTube.get("/channels", query: { forUsername: user }) + raise(App::GoogleError, response) if !response.success? + if response.json["items"] && response.json["items"].length > 0 + response.json["items"][0]["id"] + else + "Error: Could not find the user. Please try with a video url instead." + end + end + elsif handle + channel_id, _ = App::Cache.cache("youtube.handle", handle.downcase, 60*60, 60) do + response = App::YouTube.get("/channels", query: { forHandle: handle }) + raise(App::GoogleError, response) if !response.success? + if response.json["items"] && response.json["items"].length > 0 + response.json["items"][0]["id"] + else + "Error: Could not find the user. Please try with a video url instead." + end + end + elsif path + channel_id, _ = App::Cache.cache("youtube.path", path.downcase, 60*60, 60) do + response = App::HTTP.get("https://www.youtube.com/#{path}") if response.redirect? - # https://www.youtube.com/tyt -> https://www.youtube.com/user/theyoungturks (different from https://www.youtube.com/user/tyt) response = App::HTTP.get(response.redirect_url) end next "Error: Could not find the user. Please try with a video url instead." if response.code == 404 @@ -336,7 +357,7 @@ end elsif video_id channel_id, _ = App::Cache.cache("youtube.video", video_id, 60*60, 60) do - response = App::Google.get("/youtube/v3/videos", query: { part: "snippet", id: video_id }) + response = App::YouTube.get("/videos", query: { part: "snippet", id: video_id }) raise(App::GoogleError, response) if !response.success? if response.json["items"].length > 0 response.json["items"][0]["snippet"]["channelId"] @@ -354,7 +375,7 @@ return [422, "Something went wrong. Try again later."] if channel_id.nil? return [422, channel_id] if channel_id.start_with?("Error:") - if query || params[:type] + if query || params.has_key?(:shift) username, _ = App::Cache.cache("youtube.channel", channel_id, 60*60, 60) do # it is no longer possible to get usernames using the API # note that the values include " - YouTube" at the end if the User-Agent is a browser @@ -370,7 +391,11 @@ query = CGI.unescape(query) # youtube uses + here instead of %20 redirect Addressable::URI.new(path: "/youtube/#{channel_id}/#{username}", query_values: { q: query }.merge(params.slice(:tz))).normalize.to_s, 301 elsif channel_id - redirect "https://www.youtube.com/feeds/videos.xml" + Addressable::URI.new(query: "channel_id=#{channel_id}").normalize.to_s, 301 + if params.has_key?(:shift) + redirect Addressable::URI.new(path: "/youtube/#{channel_id}/#{username}", query_values: params.slice(:tz)).normalize.to_s, 301 + else + redirect "https://www.youtube.com/feeds/videos.xml" + Addressable::URI.new(query: "channel_id=#{channel_id}").normalize.to_s, 301 + end else return [404, "Could not find the channel."] end @@ -386,12 +411,12 @@ data, _ = App::Cache.cache("youtube.ics", channel_id, 60*60, 60) do # The API is really inconsistent in listing scheduled live streams, but the RSS endpoint seems to consistently list them, so experiment with using that response = App::HTTP.get("https://www.youtube.com/feeds/videos.xml?channel_id=#{channel_id}") - next "Error: It seems like this channel no longer exists." if response.code == 404 + next "Error: This channel no longer exists or has no videos." if response.code == 404 raise(App::GoogleError, response) if !response.success? doc = Nokogiri::XML(response.body) ids = doc.xpath("//yt:videoId").map(&:text) - response = App::Google.get("/youtube/v3/videos", query: { part: "snippet,liveStreamingDetails,contentDetails", id: ids.join(",") }) + response = App::YouTube.get("/videos", query: { part: "snippet,liveStreamingDetails,contentDetails", id: ids.join(",") }) raise(App::GoogleError, response) if !response.success? items = response.json["items"].sort_by! do |video| @@ -455,12 +480,12 @@ data, @updated_at = App::Cache.cache("youtube.videos", channel_id, 60*60, 60) do # The results from this query are not sorted by publishedAt for whatever reason.. probably due to some uploads being scheduled to be published at a certain time - response = App::Google.get("/youtube/v3/playlistItems", query: { part: "snippet", playlistId: playlist_id, maxResults: 10 }) - next "Error: It seems like this channel no longer exists." if response.code == 404 + response = App::YouTube.get("/playlistItems", query: { part: "snippet", playlistId: playlist_id, maxResults: 10 }) + next "Error: This channel no longer exists or has no videos." if response.code == 404 raise(App::GoogleError, response) if !response.success? ids = response.json["items"].sort_by { |v| Time.parse(v["snippet"]["publishedAt"]) }.reverse.map { |v| v["snippet"]["resourceId"]["videoId"] } - response = App::Google.get("/youtube/v3/videos", query: { part: "snippet,liveStreamingDetails,contentDetails", id: ids.join(",") }) + response = App::YouTube.get("/videos", query: { part: "snippet,liveStreamingDetails,contentDetails", id: ids.join(",") }) raise(App::GoogleError, response) if !response.success? response.json["items"].map do |video| @@ -507,7 +532,7 @@ if params.has_key?(:shorts) remove_shorts = (params[:shorts] == "0") - @data.select! { |v| v["title"].downcase.include?("#shorts") != remove_shorts } + @data.select! { |v| App::YouTube.is_short?(v["id"]) != remove_shorts } end if params.has_key?(:min_length) && min_length = params[:min_length].parse_duration @@ -1495,7 +1520,7 @@ data, @updated_at = App::Cache.cache("imgur.user", @username.downcase, 60*60, 60) do # can't use user_id in this request unfortunately response = App::Imgur.get("/account/#{@username}/submissions") - next "Error: It seems like this user no longer exists." if response.code == 404 + next "Error: This user no longer exists." if response.code == 404 raise(App::ImgurError, response) if !response.success? || response.body.empty? response.json["data"].map do |image| image.slice("animated", "cover", "datetime", "description", "gifv", "height", "id", "images_count", "is_album", "nsfw", "score", "size", "title", "type", "width") diff --git a/app/services/google.rb b/app/services/google.rb index 186576b..6538ea0 100644 --- a/app/services/google.rb +++ b/app/services/google.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -# https://developers.google.com/youtube/v3/docs/ module App class GoogleError < HTTPError; end diff --git a/app/services/youtube.rb b/app/services/youtube.rb new file mode 100644 index 0000000..4df7af5 --- /dev/null +++ b/app/services/youtube.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true +# https://developers.google.com/youtube/v3/docs/ + +module App + class YouTube < Google + BASE_URL = "https://www.googleapis.com/youtube/v3" + + def self.is_short?(video_id) + is_short, _ = App::Cache.cache("youtube.shorts", video_id, 7*24*60*60, 60) do + url = "https://www.youtube.com/shorts/#{video_id}" + uri = Addressable::URI.parse(url) + opts = { + use_ssl: uri.scheme == "https", + open_timeout: 10, + read_timeout: 10, + } + Net::HTTP.start(uri.host, uri.port, opts) do |http| + response = http.request_get(uri.request_uri) + $metrics[:requests_total].increment(labels: { service: "youtube", response_code: response.code }) + next (response.code == "200").to_i.to_s + end + rescue => e + raise(self::ERROR_CLASS, e) + end + + return is_short == "1" + end + end +end diff --git a/config/initializers/05-bool.rb b/config/initializers/05-bool.rb index 0c14081..4e18663 100644 --- a/config/initializers/05-bool.rb +++ b/config/initializers/05-bool.rb @@ -8,6 +8,10 @@ def <=>(other) return 0 end end + + def to_i + return 0 + end end class TrueClass @@ -18,4 +22,8 @@ def <=>(other) return 0 end end + + def to_i + return 1 + end end diff --git a/public/js/main.js b/public/js/main.js index 73fc417..f1c14ae 100644 --- a/public/js/main.js +++ b/public/js/main.js @@ -130,12 +130,23 @@ $(document).ready(async function() { }); }); + let shiftKey = false; + document.addEventListener("keydown", function (e) { + shiftKey = e.shiftKey; + }); + document.addEventListener("keyup", function (e) { + shiftKey = e.shiftKey; + }); + $("#services form").submit(async function(event) { event.preventDefault(); const form = $(this); const action = form.attr("action"); - const qs = form.serialize(); + let qs = form.serialize(); + if (shiftKey) { + qs += "&shift"; + } const submit = form.find('input[type="submit"]'); const submit_value = submit.attr("value"); submit.attr("value", "Working..."); @@ -172,7 +183,7 @@ $(document).ready(async function() { if (pathname.endsWith("/")) { pathname = pathname.substring(0, pathname.length-1); } - url = `${window.location.protocol}//${window.location.host}${pathname}${data}`; + url = `${window.location.origin}${pathname}${data}`; // initiate a request just to get a head start on resolving urls fetch(url); } else { @@ -181,6 +192,11 @@ $(document).ready(async function() { } } + // Normalize URL + const uri = new URL(url); + uri.search = uri.searchParams.toString(); + url = uri.toString(); + const feed_modal = $("#feed-modal"); const feed_url = $("#feed-url"); feed_url.val(url).trigger("input"); @@ -197,7 +213,16 @@ $(document).ready(async function() { const url = $("#feed-url").val(); console.log(url); modal.find("form").hide(); - modal.find(`#${action}-options`).show().attr("action", url).trigger("change"); + if (url.startsWith(window.location.origin)) { + if (action === "youtube") { + const uri = new URL(url); + const q = uri.searchParams.get("q"); + if (q) { + $("#youtube_title_filter").val(q); + } + } + modal.find(`#${action}-options`).show().attr("action", url).trigger("change"); + } }); $("#copy-button").click(function() { @@ -216,14 +241,24 @@ $(document).ready(async function() { return false; }); - $("#feed-modal form").change(function() { + $("#feed-modal form").on("input", function(e) { const form = $(this); - const qs = $.param(form.serializeArray().filter(input => input.value !== "")); - let url = form.attr("action"); - if (qs !== "") { - url += `?${qs}`; + const uri = new URL(form.attr("action")); + const inputs = form.serializeArray(); + for (const input of inputs) { + if (input.value === "") { + if (uri.searchParams.has(input.name)) { + uri.searchParams.delete(input.name); + } + continue; + } + uri.searchParams.set(input.name, input.value); + } + const url = uri.toString(); + $("#feed-url").val(url).trigger("input"); + if (e.target.tagName !== "INPUT" || e.target.type !== "text") { + $("#feed-url").select(); } - $("#feed-url").val(url).trigger("input").select(); }); $("[data-download-filename]").click(async function() { diff --git a/views/index.erb b/views/index.erb index 25428ac..17459a9 100644 --- a/views/index.erb +++ b/views/index.erb @@ -221,6 +221,32 @@ +
+
+ +
+
+ +
+
+ +
+
+
+ +
+
+ +
+
+
+