-
Notifications
You must be signed in to change notification settings - Fork 48
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Workaround for the GitHub Search API limit #276
Changes from 2 commits
32d6b3b
7a48992
af51d80
31cbe08
26f7c90
47e1dd0
5db3523
78d0a64
543c1ec
b9055a5
91592f2
ef1c33a
3e6ef71
1d91f1f
a024b05
791d08f
8d806a7
dc9f5bc
33dba20
2fcb023
6eab75a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,6 +73,8 @@ static ArgumentParser getArgumentParser() { | |
.help("regex of repository names to exclude from pull request generation"); | ||
parser.addArgument("-B") | ||
.help("additional body text to include in pull requests"); | ||
parser.addArgument("-l", "--" + Constants.GIT_API_SEARCH_LIMIT) | ||
.help("limit the search results for github api (default: 1000)"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And you won't need the null check you added either ;) |
||
return parser; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,14 +48,22 @@ public void execute(final Namespace ns, final DockerfileGitHubUtil dockerfileGit | |
Multimap<String, String> pathToDockerfilesInParentRepo = ArrayListMultimap.create(); | ||
|
||
Set<Map.Entry<String, JsonElement>> imageToTagStore = parseStoreToImagesMap(ns.get(Constants.STORE)); | ||
Integer gitApiSearchLimit; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for any of the below integer parsing b/c of adding the |
||
if (ns.get(Constants.GIT_API_SEARCH_LIMIT) == null || Integer.parseInt(ns.get(Constants.GIT_API_SEARCH_LIMIT)) > Constants.GIT_API_SEARCH_LIMIT_NUMBER) { | ||
gitApiSearchLimit = Constants.GIT_API_SEARCH_LIMIT_NUMBER; | ||
} else { | ||
gitApiSearchLimit = Integer.parseInt(ns.get(Constants.GIT_API_SEARCH_LIMIT)); | ||
} | ||
for (Map.Entry<String, JsonElement> imageToTag : imageToTagStore) { | ||
String image = imageToTag.getKey(); | ||
log.info("Repositories with image {} being forked.", image); | ||
imageToTagMap.put(image, imageToTag.getValue().getAsString()); | ||
PagedSearchIterable<GHContent> contentsWithImage = | ||
this.dockerfileGitHubUtil.findFilesWithImage(image, ns.get(Constants.GIT_ORG)); | ||
forkRepositoriesFound(pathToDockerfilesInParentRepo, | ||
imagesFoundInParentRepo, contentsWithImage, image); | ||
List<PagedSearchIterable<GHContent>> contentsWithImage = | ||
this.dockerfileGitHubUtil.findFilesWithImage(image, ns.get(Constants.GIT_ORG), null, gitApiSearchLimit); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You'll need to change your method signature as well, but it is worth it to avoid |
||
for (int i = 0; i < contentsWithImage.size(); i++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: cleaner to do this the modern java way:
|
||
forkRepositoriesFound(pathToDockerfilesInParentRepo, | ||
imagesFoundInParentRepo, contentsWithImage.get(i), image); | ||
} | ||
} | ||
|
||
GHMyself currentUser = this.dockerfileGitHubUtil.getMyself(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,29 +58,41 @@ public void execute(final Namespace ns, DockerfileGitHubUtil dockerfileGitHubUti | |
new GitForkBranch(ns.get(Constants.IMG), ns.get(Constants.TAG), ns.get(Constants.GIT_BRANCH)); | ||
|
||
log.info("Finding Dockerfiles with the given image..."); | ||
Optional<PagedSearchIterable<GHContent>> contentsWithImage = dockerfileGitHubUtil.getGHContents(ns.get(Constants.GIT_ORG), img); | ||
|
||
Integer gitApiSearchLimit; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto here as well...this just boils down to |
||
if (ns.get(Constants.GIT_API_SEARCH_LIMIT) == null || Integer.parseInt(ns.get(Constants.GIT_API_SEARCH_LIMIT)) > Constants.GIT_API_SEARCH_LIMIT_NUMBER) { | ||
gitApiSearchLimit = Constants.GIT_API_SEARCH_LIMIT_NUMBER; | ||
} else { | ||
gitApiSearchLimit = Integer.parseInt(ns.get(Constants.GIT_API_SEARCH_LIMIT)); | ||
} | ||
Optional<List<PagedSearchIterable<GHContent>>> contentsWithImage = dockerfileGitHubUtil.getGHContents(ns.get(Constants.GIT_ORG), img, gitApiSearchLimit); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is longer than 100 characters (found 161). |
||
if (contentsWithImage.isPresent()) { | ||
Multimap<String, GitHubContentToProcess> pathToDockerfilesInParentRepo = | ||
pullRequestSender.forkRepositoriesFoundAndGetPathToDockerfiles(contentsWithImage.get(), gitForkBranch); | ||
List<IOException> exceptions = new ArrayList<>(); | ||
List<String> skippedRepos = new ArrayList<>(); | ||
|
||
for (String currUserRepo : pathToDockerfilesInParentRepo.keySet()) { | ||
Optional<GitHubContentToProcess> forkWithContentPaths = | ||
pathToDockerfilesInParentRepo.get(currUserRepo).stream().findFirst(); | ||
if (forkWithContentPaths.isPresent()) { | ||
try { | ||
changeDockerfiles(ns, pathToDockerfilesInParentRepo, forkWithContentPaths.get(), skippedRepos); | ||
} catch (IOException e) { | ||
log.error(String.format("Error changing Dockerfile for %s", forkWithContentPaths.get().getParent().getFullName()), e); | ||
exceptions.add(e); | ||
List<PagedSearchIterable<GHContent>> contentsFoundWithImage = contentsWithImage.get(); | ||
for (int i = 0; i < contentsFoundWithImage.size(); i++ ) { | ||
Multimap<String, GitHubContentToProcess> pathToDockerfilesInParentRepo = | ||
pullRequestSender.forkRepositoriesFoundAndGetPathToDockerfiles(contentsFoundWithImage.get(i), gitForkBranch); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is longer than 100 characters (found 133). |
||
|
||
|
||
List<IOException> exceptions = new ArrayList<>(); | ||
List<String> skippedRepos = new ArrayList<>(); | ||
|
||
for (String currUserRepo : pathToDockerfilesInParentRepo.keySet()) { | ||
Optional<GitHubContentToProcess> forkWithContentPaths = | ||
pathToDockerfilesInParentRepo.get(currUserRepo).stream().findFirst(); | ||
if (forkWithContentPaths.isPresent()) { | ||
try { | ||
changeDockerfiles(ns, pathToDockerfilesInParentRepo, forkWithContentPaths.get(), skippedRepos); | ||
} catch (IOException e) { | ||
log.error(String.format("Error changing Dockerfile for %s", forkWithContentPaths.get().getParent().getFullName()), e); | ||
exceptions.add(e); | ||
} | ||
} else { | ||
log.warn("Didn't find fork for {} so not changing Dockerfiles", currUserRepo); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is longer than 100 characters (found 102). |
||
} | ||
} else { | ||
log.warn("Didn't find fork for {} so not changing Dockerfiles", currUserRepo); | ||
} | ||
} | ||
|
||
ResultsProcessor.processResults(skippedRepos, exceptions, log); | ||
ResultsProcessor.processResults(skippedRepos, exceptions, log); | ||
} | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,4 +34,6 @@ private Constants() { | |
public static final String GIT_PR_BODY = "B"; | ||
public static final String GIT_ADDITIONAL_COMMIT_MESSAGE = "c"; | ||
public static final String GIT_REPO_EXCLUDES = "excludes"; | ||
public static final String GIT_API_SEARCH_LIMIT = "git-api-search-limit"; | ||
public static final Integer GIT_API_SEARCH_LIMIT_NUMBER = 1000; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need based on above suggestion |
||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -20,8 +20,7 @@ | |||||
import org.slf4j.LoggerFactory; | ||||||
|
||||||
import java.io.*; | ||||||
import java.util.List; | ||||||
import java.util.Optional; | ||||||
import java.util.*; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using the '.' form of import should be avoided - java.util.. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wrong lexicographical order for 'java.util.*' import. Should be before 'org.slf4j.LoggerFactory'. |
||||||
import java.util.concurrent.TimeUnit; | ||||||
|
||||||
/** | ||||||
|
@@ -83,28 +82,78 @@ public GHRepository getRepo(String repoName) throws IOException { | |||||
return gitHubUtil.getRepo(repoName); | ||||||
} | ||||||
|
||||||
public PagedSearchIterable<GHContent> findFilesWithImage(String image, String org) throws IOException { | ||||||
public List<PagedSearchIterable<GHContent>> findFilesWithImage(String image, String orgToInclude, String orgToExclude, Integer gitApiSearchLimit) throws IOException { | ||||||
GHContentSearchBuilder search = gitHubUtil.startSearch(); | ||||||
// Filename search appears to yield better / more results than language:Dockerfile | ||||||
// Root cause: linguist doesn't currently deal with prefixes of files: | ||||||
// https://github.com/github/linguist/issues/4566 | ||||||
search.filename("Dockerfile"); | ||||||
if (org != null) { | ||||||
search.user(org); | ||||||
if (orgToInclude != null) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should pass in a |
||||||
search.user(orgToInclude); | ||||||
} | ||||||
if (orgToExclude != null) { | ||||||
String queryExcludingOrg = String.format("-org:{}", orgToExclude); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. String.format takes |
||||||
search.q(queryExcludingOrg); | ||||||
} | ||||||
|
||||||
if (image.substring(image.lastIndexOf(' ') + 1).length() <= 1) { | ||||||
throw new IOException("Invalid image name."); | ||||||
} | ||||||
List<String> terms = GitHubImageSearchTermList.getSearchTerms(image); | ||||||
log.info("Searching for {} with terms: {}", image, terms); | ||||||
terms.forEach(search::q); | ||||||
PagedSearchIterable<GHContent> files = search.list(); | ||||||
List<PagedSearchIterable<GHContent>> allFiles = new ArrayList<>(); | ||||||
int totalCount = files.getTotalCount(); | ||||||
if (totalCount > 1000) { | ||||||
log.warn("Number of search results is above 1000! The GitHub Search API will only return around 1000 results - https://developer.github.com/v3/search/#about-the-search-api"); | ||||||
} | ||||||
log.info("Number of files found for {}: {}", image, totalCount); | ||||||
return files; | ||||||
if (totalCount > gitApiSearchLimit && orgToInclude != null) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this warning any more? |
||||||
log.warn("Number of search results is above {}! The GitHub Search API will only return around 1000 results - https://developer.github.com/v3/search/#about-the-search-api", Constants.GIT_API_SEARCH_LIMIT_NUMBER.toString()); | ||||||
} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: our style is not multi-line if else blocks |
||||||
else if (totalCount > gitApiSearchLimit) { | ||||||
return getSearchResultsExcludingOrgWithMostHits(image, files, gitApiSearchLimit); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not totally convinced that this will filter out multiple orgs if we need two several cycles of excludes. For example 3 orgs: We'll need to do something like: I don't think we do that here b/c we don't seem to make the org exclude str additive, but I might be missing something. |
||||||
} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need an else clause to warn that we encountered an unexpected code path? |
||||||
allFiles.add(files); | ||||||
return allFiles; | ||||||
} | ||||||
|
||||||
protected String getOrgNameWithMaximumHits(PagedSearchIterable<GHContent> files) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Modern java way of doing this:
|
||||||
GHRepository org; | ||||||
String orgName; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for expanded scope of these |
||||||
String orgWithMaximumHits = ""; | ||||||
int maxCount = 0; | ||||||
HashMap<String, Integer> orgHitsMap = new HashMap<>(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
for (GHContent ghContent : files) { | ||||||
org = ghContent.getOwner(); | ||||||
orgName = org.getOwnerName(); | ||||||
if (orgHitsMap.containsKey(orgName)) { | ||||||
int hits = orgHitsMap.get(orgName); | ||||||
orgHitsMap.put(orgName, hits + 1); | ||||||
} else { | ||||||
orgHitsMap.put(orgName, 1); | ||||||
} | ||||||
} | ||||||
for (Map.Entry<String, Integer> orgNameHits : orgHitsMap.entrySet()) { | ||||||
orgName = orgNameHits.getKey(); | ||||||
int numberOfHits = orgNameHits.getValue(); | ||||||
if (numberOfHits > maxCount) { | ||||||
orgWithMaximumHits = orgName; | ||||||
maxCount = numberOfHits; | ||||||
} | ||||||
} | ||||||
return orgWithMaximumHits; | ||||||
} | ||||||
|
||||||
protected List<PagedSearchIterable<GHContent>> getSearchResultsExcludingOrgWithMostHits(String image, PagedSearchIterable<GHContent> files, Integer gitApiSearchLimit) throws IOException { | ||||||
List<PagedSearchIterable<GHContent>> contentsForOrgWithMaximumHits; | ||||||
List<PagedSearchIterable<GHContent>> contentsExcludingOrgWithMaximumHits; | ||||||
List<PagedSearchIterable<GHContent>> allContentsWithImage = new ArrayList<>(); | ||||||
String orgWithMaximumHits = getOrgNameWithMaximumHits(files); | ||||||
contentsForOrgWithMaximumHits = findFilesWithImage(image, orgWithMaximumHits, null, gitApiSearchLimit); | ||||||
contentsExcludingOrgWithMaximumHits = findFilesWithImage(image, null, orgWithMaximumHits, gitApiSearchLimit); | ||||||
allContentsWithImage.add(contentsForOrgWithMaximumHits.get(0)); | ||||||
allContentsWithImage.add(contentsExcludingOrgWithMaximumHits.get(0)); | ||||||
|
||||||
return allContentsWithImage; | ||||||
} | ||||||
|
||||||
/* Workaround: The GitHub API caches API calls for up to 60 seconds, so back-to-back API calls with the same | ||||||
|
@@ -319,19 +368,21 @@ public boolean thisUserIsOwner(GHRepository repo) throws IOException { | |||||
* @param org GitHub organization | ||||||
* @param img image to find | ||||||
*/ | ||||||
public Optional<PagedSearchIterable<GHContent>> getGHContents(String org, String img) | ||||||
public Optional<List<PagedSearchIterable<GHContent>>> getGHContents(String org, String img, Integer gitApiSearchLimit) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Abbreviation in name 'getGHContents' must contain no more than '2' consecutive capital letters. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is longer than 100 characters (found 122). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Method |
||||||
throws IOException, InterruptedException { | ||||||
PagedSearchIterable<GHContent> contentsWithImage = null; | ||||||
List<PagedSearchIterable<GHContent>> contentsWithImage = null; | ||||||
for (int i = 0; i < 5; i++) { | ||||||
contentsWithImage = findFilesWithImage(img, org); | ||||||
if (contentsWithImage.getTotalCount() > 0) { | ||||||
contentsWithImage = findFilesWithImage(img, org, null, gitApiSearchLimit); | ||||||
if (contentsWithImage.get(0).getTotalCount() > 0) { | ||||||
break; | ||||||
} else { | ||||||
getGitHubUtil().waitFor(TimeUnit.SECONDS.toMillis(1)); | ||||||
} | ||||||
} | ||||||
|
||||||
int numOfContentsFound = contentsWithImage.getTotalCount(); | ||||||
int numOfContentsFound = 0; | ||||||
for (int i = 0; i < contentsWithImage.size(); i++) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A "NullPointerException" could be thrown; "contentsWithImage" is nullable here. |
||||||
numOfContentsFound += contentsWithImage.get(i).getTotalCount(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. More idiomatic these days to do: |
||||||
} | ||||||
if (numOfContentsFound <= 0) { | ||||||
log.info("Could not find any repositories with given image: {}", img); | ||||||
return Optional.empty(); | ||||||
|
This comment was marked as resolved.
Sorry, something went wrong.