Skip to content

Commit

Permalink
feat(scraper): add each scraper and its parser flow
Browse files Browse the repository at this point in the history
  • Loading branch information
sinkaroid committed Apr 18, 2023
1 parent 5cf38dc commit 759c79c
Show file tree
Hide file tree
Showing 14 changed files with 971 additions and 0 deletions.
78 changes: 78 additions & 0 deletions src/scraper/pornhub/pornhubGetController.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import { load } from "cheerio";
import LustPress from "../../LustPress";
import { IVideoData } from "../../interfaces";

const lust = new LustPress();

export async function scrapeContent(url: string) {
try {
const resolve = await lust.fetchBody(url);
const $ = load(resolve);

class PornHub {
link: string;
id: string;
title: string;
image: string;
duration: string;
views: string;
rating: string;
videoInfo: string;
upVote: string;
downVote: string;
video: string;
tags: string[];
models: string[];
constructor() {
this.link = $("link[rel='canonical']").attr("href") || "None";
this.id = this.link.split("=")[1] || "None";
this.title = $("meta[property='og:title']").attr("content") || "None";
this.image = $("meta[property='og:image']").attr("content") || "None";
//get <meta property="video:duration" content="
this.duration = $("meta[property='video:duration']").attr("content") || "0";
this.views = $("div.views > span.count").text() || "None";
this.rating = $("div.ratingPercent > span.percent").text() || "None";
this.videoInfo = $("div.videoInfo").text() || "None";
this.upVote = $("span.votesUp").attr("data-rating") || "None";
this.downVote = $("span.votesDown").attr("data-rating") || "None";
this.video = $("meta[property='og:video:url']").attr("content") || "None";
this.tags = $("div.video-info-row")
.find("a")
.map((i, el) => {
return $(el).text();
}).get();
this.tags.shift();
this.tags = this.tags.map((el) => lust.removeHtmlTagWithoutSpace(el));
this.models = $("div.pornstarsWrapper.js-pornstarsWrapper")
.find("a")
.map((i, el) => {
return $(el).attr("data-mxptext");
}).get();
}
}

const ph = new PornHub();
const data: IVideoData = {
success: true,
data: {
title: lust.removeHtmlTagWithoutSpace(ph.title),
id: ph.id,
image: ph.image,
duration: lust.secondToMinute(Number(ph.duration)),
views: ph.views,
rating: ph.rating,
uploaded: ph.videoInfo,
upvoted: ph.upVote,
downvoted: ph.downVote,
models: ph.models,
tags: ph.tags.filter((el) => el !== "Suggest" && el !== " Suggest")
},
source: ph.link,
assets: [ph.video, ph.image]
};
return data;
} catch (err) {
const e = err as Error;
throw Error(e.message);
}
}
59 changes: 59 additions & 0 deletions src/scraper/pornhub/pornhubSearchController.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import { load } from "cheerio";
import LustPress from "../../LustPress";
import c from "../../utils/options";
import { ISearchVideoData } from "../../interfaces";

const lust = new LustPress();

export async function scrapeContent(url: string) {
try {
const res = await lust.fetchBody(url);
const $ = load(res);

class PornhubSearch {
search: object[];
data: object;
constructor() {
this.search = $("div.wrap")
.map((i, el) => {
const link = $(el).find("a").attr("href");
const id = link?.split("=")[1];
const title = $(el).find("a").attr("title");
const image = $(el).find("img").attr("src");
const duration = $(el).find("var.duration").text();
const views = $(el).find("div.videoDetailsBlock").find("span.views").text();
return {
link: `${c.PORNHUB}${link}`,
id: id,
title: title,
image: image,
duration: duration,
views: views,
video: `${c.PORNHUB}/embed/${id}`,
};
}).get();

this.data = this.search.filter((el: any) => {
return el.link.includes("javascript:void(0)") === false && el.image?.startsWith("data:image") === false;
});
}

}

const ph = new PornhubSearch();
if (ph.search.length === 0) throw Error("No result found");
const data = ph.data as string[];
const result: ISearchVideoData = {
success: true,
data: data,
source: url,
};
return result;



} catch (err) {
const e = err as Error;
throw Error(e.message);
}
}
79 changes: 79 additions & 0 deletions src/scraper/redtube/redtubeGetController.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { load } from "cheerio";
import LustPress from "../../LustPress";
import { IVideoData } from "../../interfaces";

const lust = new LustPress();

export async function scrapeContent(url: string) {
try {
const resolve = await lust.fetchBody(url);
const $ = load(resolve);

class RedTube {
link: string;
id: string;
title: string;
image: string;
duration: string;
views: string;
rating: string;
publish: string;
upVote: string;
downVote: null;
video: string;
tags: string[];
models: string[];
constructor() {
this.link = $("link[rel='canonical']").attr("href") || "None";
this.id = this.link.split("/")[3] || "None";
this.title = $("meta[property='og:title']").attr("content") || "None";
this.image = $("meta[property='og:image']").attr("content") || "None";
this.duration = $("meta[property='og:video:duration']").attr("content") || "0";
this.views = $("span.video_view_count").text() || "None";
this.rating = $("div.rating_percent.js_rating_percent").attr("data-percent") + "%" || "None";
this.publish = $("span.video-infobox-date-added").text().replace("Published on ", "") || "None";
this.upVote = this.rating;
this.downVote = null;
this.video = $("meta[name='twitter:player']").attr("content") || "None";
this.tags = $("a.item.video_carousel_item.video_carousel_category, a.item.video_carousel_item.video_carousel_tag")
.map((i, el) => {
return $(el).text();
}).get();
this.tags = this.tags.map((el) => lust.removeHtmlTagWithoutSpace(el));
this.models = $("div.pornstar-name.pornstarPopupWrapper")
.find("a")
.map((i, el) => {
return $(el).text();
}
).get();
this.models = this.models.map((el) => lust.removeHtmlTagWithoutSpace(el));
this.models = this.models.filter((el) => !el.includes("Subscribe") && !el.includes("Rank"))
.filter((el, i, arr) => arr.indexOf(el) === i);
}
}

const red = new RedTube();
const data: IVideoData = {
success: true,
data: {
title: lust.removeHtmlTagWithoutSpace(red.title),
id: red.id,
image: red.image,
duration: lust.secondToMinute(Number(red.duration)),
views: red.views,
rating: red.rating,
uploaded: red.publish,
upvoted: red.upVote,
downvoted: red.downVote,
models: red.models,
tags: red.tags
},
source: red.link,
assets: [red.video, red.image]
};
return data;
} catch (err) {
const e = err as Error;
throw Error(e.message);
}
}
72 changes: 72 additions & 0 deletions src/scraper/redtube/redtubeSearchController.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { load } from "cheerio";
import LustPress from "../../LustPress";
import c from "../../utils/options";
import { ISearchVideoData } from "../../interfaces";

const lust = new LustPress();

export async function scrapeContent(url: string) {
try {
const res = await lust.fetchBody(url);
const $ = load(res);

class RedTubeSearch {
views: string[];
search: object[];
data: object;
constructor() {
this.views = $("span.video_count")
.map((i, el) => {
const views = $(el).text();
return views;
}).get();
this.search = $("a.video_link")
.map((i, el) => {
const link = $(el).attr("href");
const id = link?.split("/")[1];
const title = $(el).find("img").attr("alt");
const image = $(el).find("img").attr("data-src");
const duration = $(el).find("span.duration").text().split(" ").map((el: string) => {
return el.replace(/[^0-9:]/g, "");
}).filter((el: string) => {
return el.includes(":");
}).join(" ");

return {
link: `${c.REDTUBE}${link}`,
id: id,
title: title,
image: image,
duration: duration,
views: this.views[i],
video: `https://embed.redtube.com/?id=${id}`,

};
}).get();




this.data = this.search.filter((el: any) => {
return el.link.includes("javascript:void(0)") === false && el.image?.startsWith("data:image") === false;
});
}

}

const red = new RedTubeSearch();

if (red.search.length === 0) throw Error("No result found");
const data = red.data as string[];
const result: ISearchVideoData = {
success: true,
data: data,
source: url,
};
return result;

} catch (err) {
const e = err as Error;
throw Error(e.message);
}
}
83 changes: 83 additions & 0 deletions src/scraper/xhamster/xhamsterGetController.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import { load } from "cheerio";
import LustPress from "../../LustPress";
import { IVideoData } from "../../interfaces";

const lust = new LustPress();

export async function scrapeContent(url: string) {
try {
const resolve = await lust.fetchBody(url);
const $ = load(resolve);

class Xhamster {
link: string;
id: string;
title: string;
image: string;
duration: any;
views: string;
rating: string;
publish: string;
upVote: string;
downVote: string;
video: string;
tags: string[];
models: string[];
constructor() {
this.link = $("link[rel='canonical']").attr("href") || "None";
this.id = this.link.split("/")[3] + "/" + this.link.split("/")[4] || "None";
this.title = $("meta[property='og:title']").attr("content") || "None";
this.image = $("meta[property='og:image']").attr("content") || "None";
this.duration = $("script#initials-script").html() || "None";
//remove window.initials={ and };
this.duration = this.duration.replace("window.initials=", "");
this.duration = this.duration.replace(/;/g, "");
this.duration = JSON.parse(this.duration);
this.duration = this.duration.videoModel.duration || "None";
this.views = $("div.header-icons").find("span").first().text() || "None";
this.rating = $("div.header-icons").find("span").eq(1).text() || "None";
this.publish = $("div.entity-info-container__date").attr("data-tooltip") || "None";
this.upVote = $("div.rb-new__info").text().split("/")[0].trim() || "None";
this.downVote = $("div.rb-new__info").text().split("/")[1].trim() || "None";
this.video = "https://xheve2.com/embed/" + this.link.split("-").pop() || "None";
this.tags = $("a.video-tag")
.map((i, el) => {
return $(el).text();
}).get();
this.tags = this.tags.map((el) => lust.removeHtmlTagWithoutSpace(el));
this.models = $("a.video-tag")
.map((i, el) => {
return $(el).attr("href");
}
).get();
this.models = this.models.filter((el) => el.startsWith("https://xheve2.com/pornstars/"));
this.models = this.models.map((el) => el.replace("https://xheve2.com/pornstars/", ""));
}
}

const xh = new Xhamster();
const data: IVideoData = {
success: true,
data: {
title: lust.removeHtmlTagWithoutSpace(xh.title),
id: xh.id,
image: xh.image,
duration: lust.secondToMinute(Number(xh.duration)),
views: xh.views,
rating: xh.rating,
uploaded: xh.publish,
upvoted: xh.upVote,
downvoted: xh.downVote,
models: xh.models,
tags: xh.tags
},
source: xh.link,
assets: [xh.video, xh.image]
};
return data;

} catch (err) {
const e = err as Error;
throw Error(e.message);
}
}
Loading

0 comments on commit 759c79c

Please sign in to comment.