Skip to content

Commit

Permalink
feat(crawler): read data from npm and display on page (#44)
Browse files Browse the repository at this point in the history
* chore(crawler): refactor github crawler

* feat(crawler): read data from npm

* remove github env
  • Loading branch information
marianfoo committed Apr 2, 2022
1 parent eabbcbe commit 26c7d4b
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 211 deletions.
249 changes: 80 additions & 169 deletions crawler/src/gh-repos.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ const MyOctokit = Octokit.plugin(throttling);

import axios from "axios";
import { readFileSync, writeFileSync } from "fs";
import { Artifact, Package, Type, Sources, DataJson } from "./types";
import { Artifact, Package, Type, Source, DataJson } from "./types";

export default class GitHubRepositoriesProvider {
static source = "github-packages";


static octokit = new MyOctokit({
auth: process.env.GITHUB_TOKEN,
throttle: {
Expand All @@ -30,207 +29,119 @@ export default class GitHubRepositoriesProvider {
},
});

static transformRepo(rawRepo: any, lastMonth: any) {
const id = `gh-repo-stars-${rawRepo.full_name}`;
const old: Artifact | undefined = Object.values(lastMonth).find(
//@ts-ignore
(old: Artifact) => old.id === id
);
const tags = [`${rawRepo.stargazers_count} ${rawRepo.stargazers_count === 1 ? "star" : "stars"}`, `${rawRepo.forks} ${rawRepo.forks === 1 ? "fork" : "forks"}`];
rawRepo.license && tags.push(rawRepo.license.spdx_id);
if (old) {
return {
...old,
name: rawRepo.full_name,
description: rawRepo.description,
link: rawRepo.html_url,
tags,
updatedAt: rawRepo.updated_at,
type: "code-repository",
aggregatedCount: rawRepo.stargazers_count + rawRepo.forks,
currentCount: rawRepo.stargazers_count + rawRepo.forks - old.aggregatedCount,
};
static async get(sources: Source[]): Promise<Package[]> {


let packages: Package[] = [];




for (const source of sources) {
source.path = `${source.owner}/${source.repo}`;
if (source.subpath && source.subpackages) {
let repoInfo = await this.getRepoInfo(source);
for (const subpackage of source.subpackages) {
let path: string = `${source.subpath}/${subpackage}/`;
let packageInfo = await this.fetchRepo(source, path, repoInfo);
packages.push(packageInfo);
}
} else {
let repoInfo = await this.getRepoInfo(source);
let packageInfo = await this.fetchRepo(source, "", repoInfo);
packages.push(packageInfo);
}
}
return {
id,
name: rawRepo.full_name,
description: rawRepo.description,
link: rawRepo.html_url,
createdAt: rawRepo.created_at,
aggregatedCount: rawRepo.stargazers_count + rawRepo.forks,
currentCount: rawRepo.stargazers_count + rawRepo.forks,
updatedAt: rawRepo.updated_at,
type: "code-repository",
tags,
};

return packages;
}

static async getRepoInfo(source: Sources) {
static async getRepoInfo(source: Source) {
let packageObject: Package = {
name: source.repo,
description: "",
type: "cc",
link: ``,
type: "",
githublink: ``,
readme: "",
createdAt: "",
updatedAt: "",
author: "",
license: "",
stars: 0,
forks: 0
}
forks: 0,
downloads: 0,
npmlink: ""
};
let repo = await GitHubRepositoriesProvider.octokit.rest.repos.get({
owner: source.owner,
repo: source.repo,
});
});
packageObject.createdAt = repo.data.created_at;
packageObject.updatedAt = repo.data.updated_at;
packageObject.link = repo.data.html_url;
packageObject.githublink = repo.data.html_url;
packageObject.forks = repo.data.forks;
packageObject.stars = repo.data.stargazers_count;
packageObject.license = repo.data.license.key;
return packageObject;
}

static async fetchMonoRepos(source: Sources): Promise<Package[]> {
let packagesReturn: Package[] = [];
let repoInfo = await this.getRepoInfo(source);
for (const subpackage of source.subpackages) {
static async fetchRepo(source: Source, path: string, repoInfo: any): Promise<Package> {
let packageJson: Package = {
name: "",
description: "",
author: "",
license: "",
type: "",
readme: "",
forks: 0,
stars: 0,
updatedAt: "",
createdAt: "",
githublink: "",
downloads: 0,
npmlink: ""
};
try {
const data = await GitHubRepositoriesProvider.octokit.rest.repos.getContent({
mediaType: {
format: "raw",
},
owner: source.owner,
repo: source.repo,
path: `${path}package.json`,
});
let string = data.data.toString();
packageJson = JSON.parse(string);
// TODO: replace with specific reference to type
try {
const data = await GitHubRepositoriesProvider.octokit.rest.repos.getContent({
mediaType: {
format: "raw",
},
owner: source.owner,
repo: source.repo,
path: `${source.subpath}/${subpackage}/package.json`,
});
let string = data.data.toString();
let packageJson: Package = JSON.parse(string);
try {
let nameArray = packageJson.name.split("-");
packageJson.type = nameArray[1];
} catch (error) {}
packageJson.license = repoInfo.license;
packageJson.forks = repoInfo.forks;
packageJson.stars = repoInfo.stars;
packageJson.updatedAt = repoInfo.updatedAt;
packageJson.createdAt = repoInfo.createdAt;
packageJson.link = `${repoInfo.link}/tree/main/${source.subpath}/${subpackage}`;
try {
const readme = await GitHubRepositoriesProvider.octokit.rest.repos.getContent({
mediaType: {
format: "raw",
},
owner: source.owner,
repo: source.repo,
path: `${source.subpath}/${subpackage}/README.md`,
});
let readmeString = readme.data.toString();
packageJson.readme = readmeString;
} catch (error) {
console.log("No README found");
}


packagesReturn.push(packageJson);
} catch (error) {
console.log(error);
}
}
return packagesReturn;
}

static async fetchSingleRepos(source: Sources): Promise<Package[]> {
let repoInfo = await this.getRepoInfo(source);

let packagesReturn: Package[] = [];
let nameArray = packageJson.name.split("-");
packageJson.type = nameArray[1];
} catch (error) {}
packageJson.license = repoInfo.license;
packageJson.forks = repoInfo.forks;
packageJson.stars = repoInfo.stars;
// data only from npm
// packageJson.updatedAt = repoInfo.updatedAt;
// packageJson.createdAt = repoInfo.createdAt;
packageJson.githublink = `${repoInfo.githublink}/tree/main/${path}`;
try {
const data = await GitHubRepositoriesProvider.octokit.rest.repos.getContent({
const readme = await GitHubRepositoriesProvider.octokit.rest.repos.getContent({
mediaType: {
format: "raw",
},
owner: source.owner,
repo: source.repo,
path: `package.json`,
path: `${path}README.md`,
});
let string = data.data.toString();
let packageJson: Package = JSON.parse(string);
packageJson.license = repoInfo.license;
packageJson.forks = repoInfo.forks;
packageJson.stars = repoInfo.stars;
packageJson.updatedAt = repoInfo.updatedAt;
packageJson.createdAt = repoInfo.createdAt;
packageJson.link = repoInfo.link;
try {
// TODO: read from keywords
packageJson.type = "cc";
} catch (error) {}
try {
const readme = await GitHubRepositoriesProvider.octokit.rest.repos.getContent({
mediaType: {
format: "raw",
},
owner: source.owner,
repo: source.repo,
path: `README.md`
});
let readmeString = readme.data.toString();
packageJson.readme = readmeString;
} catch (error) {
console.log("No README found");
}


packagesReturn.push(packageJson);
let readmeString = readme.data.toString();
packageJson.readme = readmeString;
} catch (error) {
console.log(error);
}

return packagesReturn;
}

static async get(lastMonth: any): Promise<Package[]> {
// new DataJson
let dataJson: DataJson = {
types: [],
packages: [],
}

let packages: Package[] = [];

const sourcesJsonString = readFileSync(`${__dirname}/../sources.json`, "utf8");
let Sources: Sources[] = JSON.parse(sourcesJsonString);

for (const source of Sources) {
source.path = `${source.owner}/${source.repo}`;
if (source.subpath && source.subpackages) {
const monoRepos = await this.fetchMonoRepos(source);
packages = packages.concat(monoRepos);

} else {
const singleRepos = await this.fetchSingleRepos(source);
packages = packages.concat(singleRepos);
console.log(`No README.md found for ${packageJson.githublink}`);
}
} catch (error) {
console.log(error);
}

let typesArray: Type[] = [];
for (const packageContent of packages) {
let type: Type = {
name: packageContent.type,
};
if (!typesArray.find((type) => type.name === packageContent.type)) {
typesArray.push(type);
}
}

dataJson.packages = packages;
dataJson.types = typesArray;

writeFileSync(`${__dirname}/../../uimodule/src/model/data.json`, JSON.stringify(dataJson));

return packages;


return packageJson;
}
}
62 changes: 29 additions & 33 deletions crawler/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,40 +1,36 @@
// require("dotenv").config();
// import { readFileSync, writeFileSync } from "fs";
import { readFileSync, writeFileSync } from "fs";

import GitHubRepositoriesProvider from "./gh-repos";
import { Artifact, Package } from "./types";
import NPMProvider from "./npm";
import { Artifact, Package, Source, Type, DataJson } from "./types";

(async () => {
// get current month data
let currentTrendsJson: any = {};
// let currentAllItemsJson: any = {};
// try {
// const filecontentTrends = readFileSync(`${__dirname}/../../frontend/trends/webapp/model/trends.json`, "utf8");
// currentTrendsJson = JSON.parse(filecontentTrends);
// } catch (e) {
// if (e.code !== "ENOENT") {
// throw e;
// }
// }
// try {
// const filecontentAllItems = readFileSync(`${__dirname}/../../frontend/trends/webapp/model/allItems.json`, "utf8");
// currentAllItemsJson = JSON.parse(filecontentAllItems);
// } catch (e) {
// if (e.code !== "ENOENT") {
// throw e;
// }
// }

// update only github an npm
const Providers = [GitHubRepositoriesProvider];

const artifacts = await Promise.all(
Providers.map(async (provider) => {
// console.log(`Start provider '${provider.name}'.`);
const items: Package[] = await provider.get(currentTrendsJson);
// console.log(`Provider '${provider.name}' returned ${items.length} items.`);
return items;
})
);
let dataJson: DataJson = {
types: [],
packages: [],
};

const sourcesJsonString = readFileSync(`${__dirname}/../sources.json`, "utf8");
let sources: Source[] = JSON.parse(sourcesJsonString);

let githubPackages: Package[] = await GitHubRepositoriesProvider.get(sources);
githubPackages = await NPMProvider.get(githubPackages);

// extract type from packages info
let typesArray: Type[] = [];
for (const packageContent of githubPackages) {
let type: Type = {
name: packageContent.type,
};
if (!typesArray.find((type) => type.name === packageContent.type)) {
typesArray.push(type);
}
}

dataJson.packages = githubPackages;
dataJson.types = typesArray;

writeFileSync(`${__dirname}/../../uimodule/src/model/data.json`, JSON.stringify(dataJson));

})();
Loading

0 comments on commit 26c7d4b

Please sign in to comment.