diff --git a/java/ql/automodel/publish.sh b/java/ql/automodel/publish.sh deleted file mode 100755 index 7304f0da1809..000000000000 --- a/java/ql/automodel/publish.sh +++ /dev/null @@ -1,197 +0,0 @@ -#!/bin/bash -set -e - -help="Usage: ./publish [--override-release] [--dry-run] -Publish the automodel query pack. - -If no arguments are provided, publish the version of the codeql repo specified by the latest official release of the codeml-automodel repo. -If the --override-release argument is provided, your current local HEAD is used (for unofficial releases or patching). -If the --dry-run argument is provided, the release is not published (for testing purposes)." - -# Echo the help message -if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then - echo "$help" - exit 0 -fi - -# Check the number of arguments are valid -if [ $# -gt 2 ]; then - echo "Error: Invalid arguments provided" - echo "$help" - exit 1 -fi - -OVERRIDE_RELEASE=0 -DRY_RUN=0 -for arg in "$@" -do - case $arg in - --override-release) - OVERRIDE_RELEASE=1 - shift # Remove --override-release from processing - ;; - --dry-run) - DRY_RUN=1 - shift # Remove --dry-run from processing - ;; - *) - echo "Error: Invalid argument provided: $arg" - echo "$help" - exit 1 - ;; - esac -done - -# Describe what we're about to do based on the command-line arguments -if [ $OVERRIDE_RELEASE = 1 ]; then - echo "Publishing the current HEAD of the automodel repo" -else - echo "Publishing the version of the automodel repo specified by the latest official release of the codeml-automodel repo" -fi -if [ $DRY_RUN = 1 ]; then - echo "Dry run: we will step through the process but we won't publish the query pack" -else - echo "Not a dry run! Publishing the query pack" -fi - -# If we're publishing the codeml-automodel release then we will checkout the sha specified in the release. -# So we need to check that there are no uncommitted changes in the local branch. -# And, if we're publishing the current HEAD, it's cleaner to ensure that there are no uncommitted changes. -if ! git diff --quiet; then - echo "Error: Uncommitted changes exist. Please commit or stash your changes before publishing." - exit 1 -fi - -# Check the above environment variables are set -if [ -z "${GITHUB_TOKEN}" ]; then - echo "Error: GITHUB_TOKEN environment variable not set. Please set this to a token with package:write permissions to codeql." - exit 1 -fi -if [ -z "${GH_TOKEN}" ]; then - echo "Error: GH_TOKEN environment variable not set. Please set this to a token with repo permissions to github/codeml-automodel." - exit 1 -fi - -# Get the sha of the previous release, i.e. the last commit to the main branch that updated the query pack version -PREVIOUS_RELEASE_SHA=$(git rev-list -n 1 main -- ./src/qlpack.yml) -if [ -z "$PREVIOUS_RELEASE_SHA" ]; then - echo "Error: Could not get the sha of the previous release of codeml-automodel query pack" - exit 1 -else - echo "Previous query-pack release sha: $PREVIOUS_RELEASE_SHA" -fi - -CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) -CURRENT_SHA=$(git rev-parse HEAD) - -if [ $OVERRIDE_RELEASE = 1 ]; then - # Check that the current HEAD is downstream from PREVIOUS_RELEASE_SHA - if ! git merge-base --is-ancestor "$PREVIOUS_RELEASE_SHA" "$CURRENT_SHA"; then - echo "Error: The current HEAD is not downstream from the previous release" - exit 1 - fi -else - # Get the latest release of codeml-automodel - TAG_NAME=$(gh api -H 'Accept: application/vnd.github+json' -H 'X-GitHub-Api-Version: 2022-11-28' /repos/github/codeml-automodel/releases/latest | jq -r .tag_name) - # Check TAG_NAME is not empty - if [ -z "$TAG_NAME" ]; then - echo "Error: Could not get latest release of codeml-automodel" - exit 1 - fi - echo "Updating to latest automodel release: $TAG_NAME" - # Before downloading, delete any existing release.zip, and ignore failure if not present - rm release.zip || true - gh release download $TAG_NAME -A zip -O release.zip --repo 'https://github.com/github/codeml-automodel' - # Before unzipping, delete any existing release directory, and ignore failure if not present - rm -rf release || true - unzip -o release.zip -d release - REVISION=$(jq -r '.["codeql-sha"]' release/codeml-automodel*/codeml-automodel-release.json) - echo "The latest codeml-automodel release specifies the codeql sha $REVISION" - # Check that REVISION is downstream from PREVIOUS_RELEASE_SHA - if ! git merge-base --is-ancestor "$PREVIOUS_RELEASE_SHA" "$REVISION"; then - echo "Error: The codeql version $REVISION is not downstream of the query-pack version $PREVIOUS_RELEASE_SHA" - exit 1 - fi - # Get the version of the codeql code specified by the codeml-automodel release - git checkout "$REVISION" -fi - -# Get the absolute path of the automodel repo -AUTOMODEL_ROOT="$(readlink -f "$(dirname $0)")" -# Get the absolute path of the workspace root -WORKSPACE_ROOT="$AUTOMODEL_ROOT/../../.." -# Specify the groups of queries to test and publish -GRPS="automodel,-test" - -# Install the codeql gh extension -gh extensions install github/gh-codeql - -pushd "$AUTOMODEL_ROOT" -echo Testing automodel queries -gh codeql test run test -popd - -pushd "$WORKSPACE_ROOT" -echo "Preparing the release" -gh codeql pack release --groups $GRPS -v - -if [ $DRY_RUN = 1 ]; then - echo "Dry run: not publishing the query pack" - gh codeql pack publish --groups $GRPS --dry-run -v -else - echo "Not a dry run! Publishing the query pack" - gh codeql pack publish --groups $GRPS -v -fi - -echo "Bumping versions" -gh codeql pack post-release --groups $GRPS -v -popd - -# The above commands update -# ./src/CHANGELOG.md -# ./src/codeql-pack.release.yml -# ./src/qlpack.yml -# and add a new file -# ./src/change-notes/released/.md - -# Get the filename of the most recently created file in ./src/change-notes/released/*.md -# This will be the file for the new release -NEW_CHANGE_NOTES_FILE=$(ls -t ./src/change-notes/released/*.md | head -n 1) - -# Make a copy of the modified files -mv ./src/CHANGELOG.md ./src/CHANGELOG.md.dry-run -mv ./src/codeql-pack.release.yml ./src/codeql-pack.release.yml.dry-run -mv ./src/qlpack.yml ./src/qlpack.yml.dry-run -mv "$NEW_CHANGE_NOTES_FILE" ./src/change-notes/released.md.dry-run - -if [ $OVERRIDE_RELEASE = 1 ]; then - # Restore the original files - git checkout ./src/CHANGELOG.md - git checkout ./src/codeql-pack.release.yml - git checkout ./src/qlpack.yml -else - # Restore the original files - git checkout "$CURRENT_BRANCH" --force -fi - -if [ $DRY_RUN = 1 ]; then - echo "Inspect the updated dry-run version files:" - ls -l ./src/*.dry-run - ls -l ./src/change-notes/*.dry-run -else - # Add the updated files to the current branch - echo "Adding the version changes" - mv -f ./src/CHANGELOG.md.dry-run ./src/CHANGELOG.md - mv -f ./src/codeql-pack.release.yml.dry-run ./src/codeql-pack.release.yml - mv -f ./src/qlpack.yml.dry-run ./src/qlpack.yml - mv -f ./src/change-notes/released.md.dry-run "$NEW_CHANGE_NOTES_FILE" - git add ./src/CHANGELOG.md - git add ./src/codeql-pack.release.yml - git add ./src/qlpack.yml - git add "$NEW_CHANGE_NOTES_FILE" - echo "Added the following updated version files to the current branch:" - git status -s - echo "To complete the release, please commit these files and merge to the main branch" -fi - -echo "Done" \ No newline at end of file diff --git a/java/ql/automodel/src/AutomodelAlertSinkUtil.qll b/java/ql/automodel/src/AutomodelAlertSinkUtil.qll deleted file mode 100644 index f20c8e57b6cc..000000000000 --- a/java/ql/automodel/src/AutomodelAlertSinkUtil.qll +++ /dev/null @@ -1,183 +0,0 @@ -private import java -private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow -private import semmle.code.java.dataflow.TaintTracking -private import semmle.code.java.security.RequestForgeryConfig -private import semmle.code.java.security.CommandLineQuery -private import semmle.code.java.security.SqlConcatenatedQuery -private import semmle.code.java.security.SqlInjectionQuery -private import semmle.code.java.security.UrlRedirectQuery -private import semmle.code.java.security.TaintedPathQuery -private import semmle.code.java.security.SqlInjectionQuery -private import AutomodelJavaUtil - -private newtype TSinkModel = - MkSinkModel( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input, string kind, string provenance - ) { - ExternalFlow::sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance, - _) - } - -class SinkModel extends TSinkModel { - string package; - string type; - boolean subtypes; - string name; - string signature; - string ext; - string input; - string kind; - string provenance; - - SinkModel() { - this = MkSinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance) - } - - /** Gets the package for this sink model. */ - string getPackage() { result = package } - - /** Gets the type for this sink model. */ - string getType() { result = type } - - /** Gets whether this sink model considers subtypes. */ - boolean getSubtypes() { result = subtypes } - - /** Gets the name for this sink model. */ - string getName() { result = name } - - /** Gets the signature for this sink model. */ - string getSignature() { result = signature } - - /** Gets the input for this sink model. */ - string getInput() { result = input } - - /** Gets the extension for this sink model. */ - string getExt() { result = ext } - - /** Gets the kind for this sink model. */ - string getKind() { result = kind } - - /** Gets the provenance for this sink model. */ - string getProvenance() { result = provenance } - - /** Gets the number of instances of this sink model. */ - int getInstanceCount() { result = count(PotentialSinkModelExpr p | p.getSinkModel() = this) } - - /** Gets a string representation of this sink model. */ - string toString() { - result = - "SinkModel(" + package + ", " + type + ", " + subtypes + ", " + name + ", " + signature + ", " - + ext + ", " + input + ", " + kind + ", " + provenance + ")" - } - - /** Gets a string representation of this sink model as it would appear in a Models-as-Data file. */ - string getRepr() { - result = - "\"" + package + "\", \"" + type + "\", " + pyBool(subtypes) + ", \"" + name + "\", \"" + - signature + "\", \"" + ext + "\", \"" + input + "\", \"" + kind + "\", \"" + provenance + - "\"" - } -} - -/** An expression that may correspond to a sink model. */ -class PotentialSinkModelExpr extends Expr { - /** - * Holds if this expression has the given signature. The signature should contain enough - * information to determine a corresponding sink model, if one exists. - */ - pragma[nomagic] - predicate hasSignature( - string package, string type, boolean subtypes, string name, string signature, string input - ) { - exists(Call call, Callable callable, int argIdx | - call.getCallee().getSourceDeclaration() = callable and - ( - this = call.getArgument(argIdx) - or - this = call.getQualifier() and argIdx = -1 - ) and - (if argIdx = -1 then input = "Argument[this]" else input = "Argument[" + argIdx + "]") and - package = callable.getDeclaringType().getPackage().getName() and - type = callable.getDeclaringType().getErasure().(RefType).getNestedName() and - subtypes = considerSubtypes(callable) and - name = callable.getName() and - signature = ExternalFlow::paramsString(callable) - ) - } - - /** Gets a sink model that corresponds to this expression. */ - SinkModel getSinkModel() { - this.hasSignature(result.getPackage(), result.getType(), result.getSubtypes(), result.getName(), - result.getSignature(), result.getInput()) - } -} - -private string pyBool(boolean b) { - b = true and result = "True" - or - b = false and result = "False" -} - -/** - * Gets a string representation of the existing sink model at the expression `e`, in the format in - * which it would appear in a Models-as-Data file. Also restricts the provenance of the sink model - * to be `ai-generated`. - */ -string getSinkModelRepr(PotentialSinkModelExpr e) { - result = e.getSinkModel().getRepr() and - e.getSinkModel().getProvenance() = "ai-generated" -} - -/** - * Gets the string representation of a sink model in a format suitable for appending to an alert - * message. - */ -string getSinkModelQueryRepr(PotentialSinkModelExpr e) { - result = "\nsinkModel: " + getSinkModelRepr(e) -} - -/** - * A parameterised module that takes a dataflow config, and exposes a predicate for counting the - * number of AI-generated sink models that appear in alerts for that query. - */ -private module SinkTallier { - module ConfigFlow = TaintTracking::Global; - - predicate getSinkModelCount(int c, SinkModel s) { - s = any(ConfigFlow::PathNode sink).getNode().asExpr().(PotentialSinkModelExpr).getSinkModel() and - c = - strictcount(ConfigFlow::PathNode sink | - ConfigFlow::flowPath(_, sink) and - s = sink.getNode().asExpr().(PotentialSinkModelExpr).getSinkModel() - ) - } -} - -predicate sinkModelTallyPerQuery(string queryName, int alertCount, SinkModel sinkModel) { - queryName = "java/request-forgery" and - SinkTallier::getSinkModelCount(alertCount, sinkModel) - or - queryName = "java/command-line-injection" and - SinkTallier::getSinkModelCount(alertCount, sinkModel) - or - queryName = "java/concatenated-sql-query" and - SinkTallier::getSinkModelCount(alertCount, sinkModel) - or - queryName = "java/ssrf" and - SinkTallier::getSinkModelCount(alertCount, sinkModel) - or - queryName = "java/path-injection" and - SinkTallier::getSinkModelCount(alertCount, sinkModel) - or - queryName = "java/unvalidated-url-redirection" and - SinkTallier::getSinkModelCount(alertCount, sinkModel) - or - queryName = "java/sql-injection" and - SinkTallier::getSinkModelCount(alertCount, sinkModel) -} - -predicate sinkModelTally(int alertCount, SinkModel sinkModel) { - sinkModelTallyPerQuery(_, _, sinkModel) and - alertCount = sum(int c | sinkModelTallyPerQuery(_, c, sinkModel)) -} diff --git a/java/ql/automodel/src/AutomodelAlertSinks.ql b/java/ql/automodel/src/AutomodelAlertSinks.ql deleted file mode 100644 index e9af51b4d637..000000000000 --- a/java/ql/automodel/src/AutomodelAlertSinks.ql +++ /dev/null @@ -1,16 +0,0 @@ -/** - * @name Number of alerts per sink model - * @description Counts the number of alerts using `ai-generated` sink models. - * @kind table - * @id java/ml/metrics-count-alerts-per-sink-model - * @tags internal automodel metrics - */ - -private import java -private import AutomodelAlertSinkUtil - -from int alertCount, SinkModel s -where sinkModelTally(alertCount, s) and s.getProvenance() = "ai-generated" -select alertCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes, - s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext, - s.getKind() as kind, s.getProvenance() as provenance order by alertCount desc diff --git a/java/ql/automodel/src/AutomodelAlertSinksPerQuery.ql b/java/ql/automodel/src/AutomodelAlertSinksPerQuery.ql deleted file mode 100644 index 64a5038d1166..000000000000 --- a/java/ql/automodel/src/AutomodelAlertSinksPerQuery.ql +++ /dev/null @@ -1,19 +0,0 @@ -/** - * @name Number of alerts per sink model and query - * @description Counts the number of alerts per query using `ai-generated` sink models. - * @kind table - * @id java/ml/metrics-count-alerts-per-sink-model-and-query - * @tags internal automodel metrics - */ - -private import java -private import AutomodelAlertSinkUtil - -from string queryId, int alertCount, SinkModel s -where - sinkModelTallyPerQuery(queryId, alertCount, s) and - s.getProvenance() = "ai-generated" -select queryId, alertCount, s.getPackage() as package, s.getType() as type, - s.getSubtypes() as subtypes, s.getName() as name, s.getSignature() as signature, - s.getInput() as input, s.getExt() as ext, s.getKind() as kind, s.getProvenance() as provenance - order by queryId, alertCount desc diff --git a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll b/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll deleted file mode 100644 index 750d776891f3..000000000000 --- a/java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll +++ /dev/null @@ -1,677 +0,0 @@ -/** - * For internal use only. - */ - -private import java -private import semmle.code.Location as Location -private import semmle.code.java.dataflow.DataFlow -private import semmle.code.java.dataflow.TaintTracking -private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow -private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl -private import semmle.code.java.security.ExternalAPIs as ExternalAPIs -private import semmle.code.java.Expr as Expr -private import semmle.code.java.security.QueryInjection -private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions -private import AutomodelJavaUtil as AutomodelJavaUtil -private import semmle.code.java.security.PathSanitizer as PathSanitizer -import AutomodelSharedCharacteristics as SharedCharacteristics -import AutomodelEndpointTypes as AutomodelEndpointTypes - -newtype JavaRelatedLocationType = - CallContext() or - MethodDoc() or - ClassDoc() - -newtype TApplicationModeEndpoint = - TExplicitArgument(Call call, DataFlow::Node arg) { - AutomodelJavaUtil::isFromSource(call) and - exists(Argument argExpr | - arg.asExpr() = argExpr and call = argExpr.getCall() and not argExpr.isVararg() - ) and - not AutomodelJavaUtil::isUnexploitableType(arg.getType()) - } or - TInstanceArgument(Call call, DataFlow::Node arg) { - AutomodelJavaUtil::isFromSource(call) and - arg = DataFlow::getInstanceArgument(call) and - not call instanceof ConstructorCall and - not AutomodelJavaUtil::isUnexploitableType(arg.getType()) - } or - TImplicitVarargsArray(Call call, DataFlow::ImplicitVarargsArray arg, int idx) { - AutomodelJavaUtil::isFromSource(call) and - call = arg.getCall() and - idx = call.getCallee().getVaragsParameterIndex() and - not AutomodelJavaUtil::isUnexploitableType(arg.getType()) - } or - TMethodReturnValue(MethodCall call) { - AutomodelJavaUtil::isFromSource(call) and - not AutomodelJavaUtil::isUnexploitableType(call.getType()) - } or - TOverriddenParameter(Parameter p, Method overriddenMethod) { - AutomodelJavaUtil::isFromSource(p) and - p.getCallable().(Method).overrides(overriddenMethod) - } - -/** - * An endpoint is a node that is a candidate for modeling. - */ -abstract private class ApplicationModeEndpoint extends TApplicationModeEndpoint { - /** - * Gets the callable to be modeled that this endpoint represents. - */ - abstract Callable getCallable(); - - /** - * Gets the input (if any) for this endpoint, eg.: `Argument[0]`. - * - * For endpoints that are source candidates, this will be `none()`. - */ - abstract string getMaDInput(); - - /** - * Gets the output (if any) for this endpoint, eg.: `ReturnValue`. - * - * For endpoints that are sink candidates, this will be `none()`. - */ - abstract string getMaDOutput(); - - abstract Top asTop(); - - /** - * Converts the endpoint to a node that can be used in a data flow graph. - */ - abstract DataFlow::Node asNode(); - - string getExtensibleType() { - if not exists(this.getMaDInput()) and exists(this.getMaDOutput()) - then result = "sourceModel" - else - if exists(this.getMaDInput()) and not exists(this.getMaDOutput()) - then result = "sinkModel" - else none() // if both exist, it would be a summaryModel (not yet supported) - } - - abstract string toString(); -} - -class TCallArgument = TExplicitArgument or TInstanceArgument or TImplicitVarargsArray; - -/** - * An endpoint that represents an "argument" to a call in a broad sense, including - * both explicit arguments and the instance argument. - */ -abstract class CallArgument extends ApplicationModeEndpoint, TCallArgument { - Call call; - DataFlow::Node arg; - - override Callable getCallable() { result = call.getCallee().getSourceDeclaration() } - - override string getMaDOutput() { none() } - - override DataFlow::Node asNode() { result = arg } - - Call getCall() { result = call } - - override string toString() { result = arg.toString() } -} - -/** - * An endpoint that represents an explicit argument to a call. - */ -class ExplicitArgument extends CallArgument, TExplicitArgument { - ExplicitArgument() { this = TExplicitArgument(call, arg) } - - private int getArgIndex() { this.asTop() = call.getArgument(result) } - - override string getMaDInput() { result = "Argument[" + this.getArgIndex() + "]" } - - override Top asTop() { result = arg.asExpr() } -} - -/** - * An endpoint that represents the instance argument to a call. - */ -class InstanceArgument extends CallArgument, TInstanceArgument { - InstanceArgument() { this = TInstanceArgument(call, arg) } - - override string getMaDInput() { result = "Argument[this]" } - - override Top asTop() { if exists(arg.asExpr()) then result = arg.asExpr() else result = call } - - override string toString() { result = arg.toString() } -} - -/** - * An endpoint that represents an implicit varargs array. - * We choose to represent the varargs array as a single endpoint, rather than as multiple endpoints. - * - * This avoids the problem of having to deal with redundant endpoints downstream. - * - * In order to be able to distinguish between varargs endpoints and regular endpoints, we export the `isVarargsArray` - * meta data field in the extraction queries. - */ -class ImplicitVarargsArray extends CallArgument, TImplicitVarargsArray { - int idx; - - ImplicitVarargsArray() { this = TImplicitVarargsArray(call, arg, idx) } - - override string getMaDInput() { result = "Argument[" + idx + "]" } - - override Top asTop() { result = call } -} - -/** - * An endpoint that represents a method call. The `ReturnValue` of a method call - * may be a source. - */ -class MethodReturnValue extends ApplicationModeEndpoint, TMethodReturnValue { - MethodCall call; - - MethodReturnValue() { this = TMethodReturnValue(call) } - - override Callable getCallable() { result = call.getCallee().getSourceDeclaration() } - - override string getMaDInput() { none() } - - override string getMaDOutput() { result = "ReturnValue" } - - override Top asTop() { result = call } - - override DataFlow::Node asNode() { result.asExpr() = call } - - override string toString() { result = call.toString() } -} - -/** - * An endpoint that represents a parameter of an overridden method that may be - * a source. - */ -class OverriddenParameter extends ApplicationModeEndpoint, TOverriddenParameter { - Parameter p; - Method overriddenMethod; - - OverriddenParameter() { this = TOverriddenParameter(p, overriddenMethod) } - - override Callable getCallable() { - // NB: we're returning the overridden callable here. This means that the - // candidate model will be about the overridden method, not the overriding - // method. This is a more general model, that also applies to other - // subclasses of the overridden class. - result = overriddenMethod.getSourceDeclaration() - } - - private int getArgIndex() { p.getCallable().getParameter(result) = p } - - override string getMaDInput() { none() } - - override string getMaDOutput() { result = "Parameter[" + this.getArgIndex() + "]" } - - override Top asTop() { result = p } - - override DataFlow::Node asNode() { result.(DataFlow::ParameterNode).asParameter() = p } - - override string toString() { result = p.toString() } -} - -/** - * A candidates implementation. - * - * Some important notes: - * - This mode is using arguments as endpoints. - * - We use the `CallContext` (the surrounding call expression) as related location. - */ -module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig { - // for documentation of the implementations here, see the QLDoc in the CandidateSig signature module. - class Endpoint = ApplicationModeEndpoint; - - class EndpointType = AutomodelEndpointTypes::EndpointType; - - class SinkType = AutomodelEndpointTypes::SinkType; - - class SourceType = AutomodelEndpointTypes::SourceType; - - class RelatedLocation = Location::Top; - - class RelatedLocationType = JavaRelatedLocationType; - - // Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact. - predicate isSanitizer(Endpoint e, EndpointType t) { - exists(t) and - AutomodelJavaUtil::isUnexploitableType([ - // for most endpoints, we can get the type from the node - e.asNode().getType(), - // but not for calls to void methods, where we need to go via the AST - e.asTop().(Expr).getType() - ]) - or - t instanceof AutomodelEndpointTypes::PathInjectionSinkType and - e.asNode() instanceof PathSanitizer::PathInjectionSanitizer - } - - RelatedLocation asLocation(Endpoint e) { result = e.asTop() } - - predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2; - - predicate isSink(Endpoint e, string kind, string provenance) { - exists( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input - | - sinkSpec(e, package, type, subtypes, name, signature, ext, input) and - ExternalFlow::sinkModel(package, type, subtypes, name, [signature, ""], ext, input, kind, - provenance, _) - ) - or - isCustomSink(e, kind) and provenance = "custom-sink" - } - - predicate isSource(Endpoint e, string kind, string provenance) { - exists( - string package, string type, boolean subtypes, string name, string signature, string ext, - string output - | - sourceSpec(e, package, type, subtypes, name, signature, ext, output) and - ExternalFlow::sourceModel(package, type, subtypes, name, [signature, ""], ext, output, kind, - provenance, _) - ) - } - - predicate isNeutral(Endpoint e) { - exists(string package, string type, string name, string signature, string endpointType | - sinkSpec(e, package, type, _, name, signature, _, _) and - endpointType = "sink" - or - sourceSpec(e, package, type, _, name, signature, _, _) and - endpointType = "source" - | - ExternalFlow::neutralModel(package, type, name, [signature, ""], endpointType, _) - ) - } - - /** - * Holds if the endpoint concerns a callable with the given package, type, name and signature. - * - * If `subtypes` is `false`, only the exact callable is considered. If `true`, the callable and - * all its overrides are considered. - */ - additional predicate endpointCallable( - Endpoint e, string package, string type, boolean subtypes, string name, string signature - ) { - exists(Callable c | - c = e.getCallable() and subtypes in [true, false] - or - e.getCallable().(Method).getSourceDeclaration().overrides+(c) and subtypes = true - | - c.hasQualifiedName(package, type, name) and - signature = ExternalFlow::paramsString(c) - ) - } - - additional predicate sinkSpec( - Endpoint e, string package, string type, boolean subtypes, string name, string signature, - string ext, string input - ) { - endpointCallable(e, package, type, subtypes, name, signature) and - ext = "" and - input = e.getMaDInput() - } - - additional predicate sourceSpec( - Endpoint e, string package, string type, boolean subtypes, string name, string signature, - string ext, string output - ) { - endpointCallable(e, package, type, subtypes, name, signature) and - ext = "" and - output = e.getMaDOutput() - } - - /** - * Gets the related location for the given endpoint. - * - * The only related location we model is the the call expression surrounding to - * which the endpoint is either argument or qualifier (known as the call context). - */ - RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) { - type = CallContext() and - result = e.(CallArgument).getCall() - or - type = MethodDoc() and - result = e.getCallable().(Documentable).getJavadoc() - or - type = ClassDoc() and - result = e.getCallable().getDeclaringType().(Documentable).getJavadoc() - } -} - -/** - * Contains endpoints that are defined in QL code rather than as a MaD model. Ideally this predicate - * should be empty. - */ -private predicate isCustomSink(Endpoint e, string kind) { - e.asNode() instanceof QueryInjectionSink and kind = "sql" -} - -module CharacteristicsImpl = - SharedCharacteristics::SharedCharacteristics; - -class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic; - -class Endpoint = ApplicationCandidatesImpl::Endpoint; - -/* - * Predicates that are used to surface prompt examples and candidates for classification with an ML model. - */ - -/** - * A MetadataExtractor that extracts metadata for application mode. - */ -class ApplicationModeMetadataExtractor extends string { - ApplicationModeMetadataExtractor() { this = "ApplicationModeMetadataExtractor" } - - predicate hasMetadata( - Endpoint e, string package, string type, string subtypes, string name, string signature, - string input, string output, string isVarargsArray, string alreadyAiModeled, - string extensibleType - ) { - exists(Callable callable | e.getCallable() = callable | - (if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and - (if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and - package = callable.getDeclaringType().getPackage().getName() and - // we're using the erased types because the MaD convention is to not specify type parameters. - // Whether something is or isn't a sink doesn't usually depend on the type parameters. - type = callable.getDeclaringType().getErasure().(RefType).getNestedName() and - subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and - name = callable.getName() and - signature = ExternalFlow::paramsString(callable) and - ( - if e instanceof ImplicitVarargsArray - then isVarargsArray = "true" - else isVarargsArray = "false" - ) and - extensibleType = e.getExtensibleType() - ) and - ( - not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = "" - or - CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled) - ) - } -} - -/** - * Holds if the given `endpoint` should be considered a candidate for the `extensibleType`. - * - * The other parameters record various other properties of interest. - */ -predicate isCandidate( - Endpoint endpoint, string package, string type, string subtypes, string name, string signature, - string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled -) { - CharacteristicsImpl::isCandidate(endpoint, _) and - not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u | - u.appliesToEndpoint(endpoint) - ) and - any(ApplicationModeMetadataExtractor meta) - .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargs, - alreadyAiModeled, extensibleType) and - // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a - // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's - // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We - // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink - // types, and we don't need to reexamine it. - alreadyAiModeled.matches(["", "%ai-%"]) and - AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature) -} - -/** - * Holds if the given `endpoint` is a negative example for the `extensibleType` - * because of the `characteristic`. - * - * The other parameters record various other properties of interest. - */ -predicate isNegativeExample( - Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package, - string type, string subtypes, string name, string signature, string input, string output, - string isVarargsArray, string extensibleType -) { - characteristic.appliesToEndpoint(endpoint) and - // the node is known not to be an endpoint of any appropriate type - forall(AutomodelEndpointTypes::EndpointType tp | - tp = CharacteristicsImpl::getAPotentialType(endpoint) - | - characteristic.hasImplications(tp, false, _) - ) and - // the lowest confidence across all endpoint types should be at least highConfidence - confidence = - min(float c | - characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c) - ) and - confidence >= SharedCharacteristics::highConfidence() and - any(ApplicationModeMetadataExtractor meta) - .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, - isVarargsArray, _, extensibleType) and - // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes - // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here. - not exists(EndpointCharacteristic characteristic2, float confidence2 | - characteristic2 != characteristic - | - characteristic2.appliesToEndpoint(endpoint) and - confidence2 >= SharedCharacteristics::maximalConfidence() and - characteristic2 - .hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2) - ) -} - -/** - * Holds if the given `endpoint` is a positive example for the `endpointType`. - * - * The other parameters record various other properties of interest. - */ -predicate isPositiveExample( - Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name, - string signature, string input, string output, string isVarargsArray, string extensibleType -) { - any(ApplicationModeMetadataExtractor meta) - .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, - isVarargsArray, _, extensibleType) and - CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and - exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext())) -} - -/* - * EndpointCharacteristic classes that are specific to Automodel for Java. - */ - -/** - * A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks. - * - * A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return - * type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does - * the dangerous/interesting thing, so we want the latter to be modeled as the sink. - * - * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks - */ -private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { - UnexploitableIsCharacteristic() { this = "argument of is-style boolean method" } - - override predicate appliesToEndpoint(Endpoint e) { - e.getCallable().getName().matches("is%") and - e.getCallable().getReturnType() instanceof BooleanType and - not ApplicationCandidatesImpl::isSink(e, _, _) - } -} - -/** - * A negative characteristic that indicates that parameters of an existence-checking boolean method should not be - * considered sinks. - * - * A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a - * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the - * dangerous/interesting thing, so we want the latter to be modeled as the sink. - */ -private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { - UnexploitableExistsCharacteristic() { this = "argument of existence-checking boolean method" } - - override predicate appliesToEndpoint(Endpoint e) { - exists(Callable callable | callable = e.getCallable() | - callable.getName().toLowerCase() = ["exists", "notexists"] and - callable.getReturnType() instanceof BooleanType - ) - } -} - -/** - * A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks, - * and its return value should not be considered a source. - */ -private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic -{ - ExceptionCharacteristic() { this = "argument/result of exception-related method" } - - override predicate appliesToEndpoint(Endpoint e) { - e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and - ( - e.getExtensibleType() = "sinkModel" and - not ApplicationCandidatesImpl::isSink(e, _, _) - or - e.getExtensibleType() = "sourceModel" and - not ApplicationCandidatesImpl::isSource(e, _, _) and - e.getMaDOutput() = "ReturnValue" - ) - } -} - -/** - * A negative characteristic that indicates that an endpoint is a MaD taint step. MaD modeled taint steps are global, - * so they are not sinks for any query. Non-MaD taint steps might be specific to a particular query, so we don't - * filter those out. - */ -private class IsMaDTaintStepCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic { - IsMaDTaintStepCharacteristic() { this = "taint step" } - - override predicate appliesToEndpoint(Endpoint e) { - FlowSummaryImpl::Private::Steps::summaryThroughStepValue(e.asNode(), _, _) - or - FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(e.asNode(), _, _) - or - FlowSummaryImpl::Private::Steps::summaryGetterStep(e.asNode(), _, _, _) - or - FlowSummaryImpl::Private::Steps::summarySetterStep(e.asNode(), _, _, _) - } -} - -/** - * A call to a method that's known locally will not be considered as a candidate to model. - * - * The reason is that we would expect data/taint flow into the method implementation to uncover - * any sinks that are present there. - */ -private class LocalCall extends CharacteristicsImpl::UninterestingToModelCharacteristic { - LocalCall() { this = "local call" } - - override predicate appliesToEndpoint(Endpoint e) { - e.(CallArgument).getCallable().fromSource() - or - e.(MethodReturnValue).getCallable().fromSource() - } -} - -/** - * A characteristic that marks endpoints as uninteresting to model, according to the Java ModelExclusions module. - */ -private class ExcludedFromModeling extends CharacteristicsImpl::UninterestingToModelCharacteristic { - ExcludedFromModeling() { this = "excluded from modeling" } - - override predicate appliesToEndpoint(Endpoint e) { - ModelExclusions::isUninterestingForModels(e.getCallable()) - } -} - -/** - * A negative characteristic that filters out non-public methods. Non-public methods are not interesting to include in - * the standard Java modeling, because they cannot be called from outside the package. - */ -private class NonPublicMethodCharacteristic extends CharacteristicsImpl::UninterestingToModelCharacteristic -{ - NonPublicMethodCharacteristic() { this = "non-public method" } - - override predicate appliesToEndpoint(Endpoint e) { - exists(Callable c | c = e.getCallable() | not c.isPublic()) - } -} - -/** - * A negative characteristic that indicates that an endpoint is a non-sink argument to a method whose sinks have already - * been modeled _manually_. This is restricted to manual sinks only, because only during the manual process do we have - * the expectation that all sinks present in a method have been considered. - * - * WARNING: These endpoints should not be used as negative samples for training, because some sinks may have been missed - * when the method was modeled. Specifically, as we start using ATM to merge in new declarations, we can be less sure - * that a method with one argument modeled as a MaD sink has also had its remaining arguments manually reviewed. The - * ML model might have predicted argument 0 of some method to be a sink but not argument 1, when in fact argument 1 is - * also a sink. - */ -private class OtherArgumentToModeledMethodCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic -{ - OtherArgumentToModeledMethodCharacteristic() { - this = "other argument to a method that has already been modeled manually" - } - - override predicate appliesToEndpoint(Endpoint e) { - not ApplicationCandidatesImpl::isSink(e, _, _) and - exists(CallArgument otherSink | - ApplicationCandidatesImpl::isSink(otherSink, _, "manual") and - e.(CallArgument).getCall() = otherSink.getCall() and - e != otherSink - ) - } -} - -/** - * Holds if the type of the given expression is annotated with `@FunctionalInterface`. - */ -predicate hasFunctionalInterfaceType(Expr e) { - exists(RefType tp | tp = e.getType().getErasure() | - tp.getAnAssociatedAnnotation().getType().hasQualifiedName("java.lang", "FunctionalInterface") - ) -} - -/** - * A characteristic that marks functional expression as likely not sinks. - * - * These expressions may well _contain_ sinks, but rarely are sinks themselves. - */ -private class FunctionValueCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic { - FunctionValueCharacteristic() { this = "function value" } - - override predicate appliesToEndpoint(Endpoint e) { - exists(Expr expr | expr = e.asNode().asExpr() | - expr instanceof FunctionalExpr or hasFunctionalInterfaceType(expr) - ) - } -} - -/** - * A negative characteristic that indicates that an endpoint is not a `to` node for any known taint step. Such a node - * cannot be tainted, because taint can't flow into it. - * - * WARNING: These endpoints should not be used as negative samples for training, because they may include sinks for - * which our taint tracking modeling is incomplete. - */ -private class CannotBeTaintedCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic -{ - CannotBeTaintedCharacteristic() { this = "cannot be tainted" } - - override predicate appliesToEndpoint(Endpoint e) { not this.isKnownOutNodeForStep(e) } - - /** - * Holds if the node `n` is known as the predecessor in a modeled flow step. - */ - private predicate isKnownOutNodeForStep(Endpoint e) { - e.asNode().asExpr() instanceof Call or // we just assume flow in that case - TaintTracking::localTaintStep(_, e.asNode()) or - FlowSummaryImpl::Private::Steps::summaryThroughStepValue(_, e.asNode(), _) or - FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(_, e.asNode(), _) or - FlowSummaryImpl::Private::Steps::summaryGetterStep(_, _, e.asNode(), _) or - FlowSummaryImpl::Private::Steps::summarySetterStep(_, _, e.asNode(), _) - } -} diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql deleted file mode 100644 index a3fa8b9b46fa..000000000000 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Surfaces the endpoints that are not already known to be sinks, and are therefore used as candidates for - * classification with an ML model. - * - * Note: This query does not actually classify the endpoints using the model. - * - * @name Automodel candidates (application mode) - * @description A query to extract automodel candidates in application mode. - * @kind problem - * @problem.severity recommendation - * @id java/ml/extract-automodel-application-candidates - * @tags internal extract automodel application-mode candidates - */ - -import java -private import AutomodelApplicationModeCharacteristics -private import AutomodelJavaUtil - -/** - * Gets a sample of endpoints (of at most `limit` samples) with the given method signature. - * - * The main purpose of this helper predicate is to avoid selecting too many candidates, as this may - * cause the SARIF file to exceed the maximum size limit. - */ -bindingset[limit] -private Endpoint getSampleForSignature( - int limit, string package, string type, string subtypes, string name, string signature, - string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled -) { - exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta | - num_endpoints = - count(Endpoint e | - meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs, - alreadyAiModeled, extensibleType) - ) - | - result = - rank[n](Endpoint e, Location loc | - loc = e.asTop().getLocation() and - meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs, - alreadyAiModeled, extensibleType) - | - e - order by - loc.getFile().getAbsolutePath(), loc.getStartLine(), loc.getStartColumn(), - loc.getEndLine(), loc.getEndColumn() - ) and - // To avoid selecting samples that are too close together (as the ranking above goes by file - // path first), we select `limit` evenly spaced samples from the ranked list of endpoints. By - // default this would always include the first sample, so we add a random-chosen prime offset - // to the first sample index, and reduce modulo the number of endpoints. - // Finally, we add 1 to the result, as ranking results in a 1-indexed relation. - n = 1 + (([0 .. limit - 1] * (num_endpoints / limit).floor() + 46337) % num_endpoints) - ) -} - -from - Endpoint endpoint, DollarAtString package, DollarAtString type, DollarAtString subtypes, - DollarAtString name, DollarAtString signature, DollarAtString input, DollarAtString output, - DollarAtString isVarargsArray, DollarAtString alreadyAiModeled, DollarAtString extensibleType -where - isCandidate(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray, - extensibleType, alreadyAiModeled) and - endpoint = - getSampleForSignature(9, package, type, subtypes, name, signature, input, output, - isVarargsArray, extensibleType, alreadyAiModeled) -select endpoint.asNode(), - "Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", // - package, "package", // - type, "type", // - subtypes, "subtypes", // - name, "name", // method name - signature, "signature", // - input, "input", // - output, "output", // - isVarargsArray, "isVarargsArray", // - alreadyAiModeled, "alreadyAiModeled", // - extensibleType, "extensibleType" diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql deleted file mode 100644 index a399c413fa4a..000000000000 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt. - * - * @name Negative examples (application mode) - * @kind problem - * @problem.severity recommendation - * @id java/ml/extract-automodel-application-negative-examples - * @tags internal extract automodel application-mode negative examples - */ - -private import java -private import AutomodelApplicationModeCharacteristics -private import AutomodelEndpointTypes -private import AutomodelJavaUtil - -/** - * Gets a sample of endpoints (of at most `limit` samples) for which the given characteristic applies. - * - * The main purpose of this helper predicate is to avoid selecting too many samples, as this may - * cause the SARIF file to exceed the maximum size limit. - */ -bindingset[limit] -Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) { - exists(int n, int num_endpoints | num_endpoints = count(Endpoint e | c.appliesToEndpoint(e)) | - result = - rank[n](Endpoint e, Location loc | - loc = e.asTop().getLocation() and c.appliesToEndpoint(e) - | - e - order by - loc.getFile().getAbsolutePath(), loc.getStartLine(), loc.getStartColumn(), - loc.getEndLine(), loc.getEndColumn() - ) and - // To avoid selecting samples that are too close together (as the ranking above goes by file - // path first), we select `limit` evenly spaced samples from the ranked list of endpoints. By - // default this would always include the first sample, so we add a random-chosen prime offset - // to the first sample index, and reduce modulo the number of endpoints. - // Finally, we add 1 to the result, as ranking results in a 1-indexed relation. - n = 1 + (([0 .. limit - 1] * (num_endpoints / limit).floor() + 46337) % num_endpoints) - ) -} - -from - Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message, - DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name, - DollarAtString signature, DollarAtString input, DollarAtString output, - DollarAtString isVarargsArray, DollarAtString extensibleType -where - endpoint = getSampleForCharacteristic(characteristic, 100) and - isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature, - input, output, isVarargsArray, extensibleType) and - message = characteristic -select endpoint.asNode(), - message + "\nrelated locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", // - package, "package", // - type, "type", // - subtypes, "subtypes", // - name, "name", // - signature, "signature", // - input, "input", // - output, "output", // - isVarargsArray, "isVarargsArray", // - extensibleType, "extensibleType" diff --git a/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql b/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql deleted file mode 100644 index faf49b73fc14..000000000000 --- a/java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt. - * - * @name Positive examples (application mode) - * @kind problem - * @problem.severity recommendation - * @id java/ml/extract-automodel-application-positive-examples - * @tags internal extract automodel application-mode positive examples - */ - -private import AutomodelApplicationModeCharacteristics -private import AutomodelEndpointTypes -private import AutomodelJavaUtil - -from - Endpoint endpoint, EndpointType endpointType, ApplicationModeMetadataExtractor meta, - DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name, - DollarAtString signature, DollarAtString input, DollarAtString output, - DollarAtString isVarargsArray, DollarAtString extensibleType -where - isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output, - isVarargsArray, extensibleType) -select endpoint.asNode(), - endpointType + "\nrelated locations: $@, $@, $@." + - "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()), "CallContext", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", // - package, "package", // - type, "type", // - subtypes, "subtypes", // - name, "name", // - signature, "signature", // - input, "input", // - output, "output", // - isVarargsArray, "isVarargsArray", // - extensibleType, "extensibleType" diff --git a/java/ql/automodel/src/AutomodelCandidateFilter.yml b/java/ql/automodel/src/AutomodelCandidateFilter.yml deleted file mode 100644 index c945ae3206fe..000000000000 --- a/java/ql/automodel/src/AutomodelCandidateFilter.yml +++ /dev/null @@ -1,5 +0,0 @@ -extensions: - - addsTo: - pack: codeql/java-automodel-queries - extensible: automodelCandidateFilter - data: [] diff --git a/java/ql/automodel/src/AutomodelCountGeneratedSinks.ql b/java/ql/automodel/src/AutomodelCountGeneratedSinks.ql deleted file mode 100644 index 475e3753810b..000000000000 --- a/java/ql/automodel/src/AutomodelCountGeneratedSinks.ql +++ /dev/null @@ -1,19 +0,0 @@ -/** - * @name Number of instances of each sink model - * @description Counts the number of instances of `ai-generated` sink models. - * @kind table - * @id java/ml/metrics-count-instances-per-sink-model - * @tags internal automodel metrics - */ - -private import java -private import AutomodelAlertSinkUtil - -from int instanceCount, SinkModel s -where - instanceCount = s.getInstanceCount() and - instanceCount > 0 and - s.getProvenance() = "ai-generated" -select instanceCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes, - s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext, - s.getKind() as kind, s.getProvenance() as provenance order by instanceCount desc diff --git a/java/ql/automodel/src/AutomodelEndpointTypes.qll b/java/ql/automodel/src/AutomodelEndpointTypes.qll deleted file mode 100644 index f4f7bc8eb7b0..000000000000 --- a/java/ql/automodel/src/AutomodelEndpointTypes.qll +++ /dev/null @@ -1,82 +0,0 @@ -/** - * For internal use only. - * - * Defines the set of classes that endpoint scoring models can predict. Endpoint scoring models must - * only predict classes defined within this file. This file is the source of truth for the integer - * representation of each of these classes. - */ - -/** A class that can be predicted by a classifier. */ -abstract class EndpointType extends string { - /** - * Holds when the string matches the name of the sink / source type. - */ - bindingset[this] - EndpointType() { any() } - - /** - * Gets the name of the sink/source kind for this endpoint type as used in models-as-data. - * - * See https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#LL353C11-L357C31 - * for sink types, and https://github.com/github/codeql/blob/44213f0144fdd54bb679ca48d68b28dcf820f7a8/java/ql/lib/semmle/code/java/dataflow/ExternalFlow.qll#L365 - * for source types. - */ - final string getKind() { result = this } -} - -/** A class for sink types that can be predicted by a classifier. */ -abstract class SinkType extends EndpointType { - bindingset[this] - SinkType() { any() } -} - -/** A sink relevant to the SQL injection query */ -class SqlInjectionSinkType extends SinkType { - SqlInjectionSinkType() { this = "sql-injection" } -} - -/** A sink relevant to the tainted path injection query. */ -class PathInjectionSinkType extends SinkType { - PathInjectionSinkType() { this = "path-injection" } -} - -/** A sink relevant to the SSRF query. */ -class RequestForgerySinkType extends SinkType { - RequestForgerySinkType() { this = "request-forgery" } -} - -/** A sink relevant to the command injection query. */ -class CommandInjectionSinkType extends SinkType { - CommandInjectionSinkType() { this = "command-injection" } -} - -/** A sink relevant to file storage. */ -class FileContentStoreSinkType extends SinkType { - FileContentStoreSinkType() { this = "file-content-store" } -} - -/** A sink relevant to HTML injection. */ -class HtmlInjectionSinkType extends SinkType { - HtmlInjectionSinkType() { this = "html-injection" } -} - -/** A sink relevant to LDAP injection. */ -class LdapInjectionSinkType extends SinkType { - LdapInjectionSinkType() { this = "ldap-injection" } -} - -/** A sink relevant to URL redirection. */ -class UrlRedirectionSinkType extends SinkType { - UrlRedirectionSinkType() { this = "url-redirection" } -} - -/** A class for source types that can be predicted by a classifier. */ -abstract class SourceType extends EndpointType { - bindingset[this] - SourceType() { any() } -} - -/** A source of remote data. */ -class RemoteSourceType extends SourceType { - RemoteSourceType() { this = "remote" } -} diff --git a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll b/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll deleted file mode 100644 index 7f385a41d1ea..000000000000 --- a/java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll +++ /dev/null @@ -1,507 +0,0 @@ -/** - * For internal use only. - */ - -private import java -private import semmle.code.Location as Location -private import semmle.code.java.dataflow.DataFlow -private import semmle.code.java.dataflow.TaintTracking -private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow -private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl -private import semmle.code.java.security.ExternalAPIs as ExternalAPIs -private import semmle.code.java.Expr as Expr -private import semmle.code.java.security.QueryInjection -private import semmle.code.java.security.RequestForgery -private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions -private import AutomodelJavaUtil as AutomodelJavaUtil -import AutomodelSharedCharacteristics as SharedCharacteristics -import AutomodelEndpointTypes as AutomodelEndpointTypes - -newtype JavaRelatedLocationType = - MethodDoc() or - ClassDoc() - -newtype TFrameworkModeEndpoint = - TExplicitParameter(Parameter p) { - AutomodelJavaUtil::isFromSource(p) and - not AutomodelJavaUtil::isUnexploitableType(p.getType()) - } or - TQualifier(Callable c) { AutomodelJavaUtil::isFromSource(c) and not c instanceof Constructor } or - TReturnValue(Callable c) { - AutomodelJavaUtil::isFromSource(c) and - c instanceof Constructor - or - AutomodelJavaUtil::isFromSource(c) and - c instanceof Method and - not AutomodelJavaUtil::isUnexploitableType(c.getReturnType()) - } or - TOverridableParameter(Method m, Parameter p) { - AutomodelJavaUtil::isFromSource(p) and - not AutomodelJavaUtil::isUnexploitableType(p.getType()) and - p.getCallable() = m and - m instanceof ModelExclusions::ModelApi and - AutomodelJavaUtil::isOverridable(m) - } or - TOverridableQualifier(Method m) { - AutomodelJavaUtil::isFromSource(m) and - m instanceof ModelExclusions::ModelApi and - AutomodelJavaUtil::isOverridable(m) - } - -/** - * A framework mode endpoint. - */ -abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint { - /** - * Gets the input (if any) for this endpoint, eg.: `Argument[0]`. - * - * For endpoints that are source candidates, this will be `none()`. - */ - abstract string getMaDInput(); - - /** - * Gets the output (if any) for this endpoint, eg.: `ReturnValue`. - * - * For endpoints that are sink candidates, this will be `none()`. - */ - abstract string getMaDOutput(); - - /** - * Returns the name of the parameter of the endpoint. - */ - abstract string getParamName(); - - /** - * Returns the callable that contains the endpoint. - */ - abstract Callable getCallable(); - - abstract Top asTop(); - - abstract string getExtensibleType(); - - string toString() { result = this.asTop().toString() } - - Location getLocation() { result = this.asTop().getLocation() } -} - -class ExplicitParameterEndpoint extends FrameworkModeEndpoint, TExplicitParameter { - Parameter param; - - ExplicitParameterEndpoint() { this = TExplicitParameter(param) and param.fromSource() } - - override string getMaDInput() { result = "Argument[" + param.getPosition() + "]" } - - override string getMaDOutput() { none() } - - override string getParamName() { result = param.getName() } - - override Callable getCallable() { result = param.getCallable() } - - override Top asTop() { result = param } - - override string getExtensibleType() { result = "sinkModel" } -} - -class QualifierEndpoint extends FrameworkModeEndpoint, TQualifier { - Callable callable; - - QualifierEndpoint() { - this = TQualifier(callable) and not callable.isStatic() and callable.fromSource() - } - - override string getMaDInput() { result = "Argument[this]" } - - override string getMaDOutput() { none() } - - override string getParamName() { result = "this" } - - override Callable getCallable() { result = callable } - - override Top asTop() { result = callable } - - override string getExtensibleType() { result = "sinkModel" } -} - -class ReturnValue extends FrameworkModeEndpoint, TReturnValue { - Callable callable; - - ReturnValue() { this = TReturnValue(callable) and callable.fromSource() } - - override string getMaDInput() { none() } - - override string getMaDOutput() { result = "ReturnValue" } - - override string getParamName() { none() } - - override Callable getCallable() { result = callable } - - override Top asTop() { result = callable } - - override string getExtensibleType() { result = "sourceModel" } -} - -class OverridableParameter extends FrameworkModeEndpoint, TOverridableParameter { - Method method; - Parameter param; - - OverridableParameter() { this = TOverridableParameter(method, param) } - - override string getMaDInput() { none() } - - override string getMaDOutput() { result = "Parameter[" + param.getPosition() + "]" } - - override string getParamName() { result = param.getName() } - - override Callable getCallable() { result = method } - - override Top asTop() { result = param } - - override string getExtensibleType() { result = "sourceModel" } -} - -class OverridableQualifier extends FrameworkModeEndpoint, TOverridableQualifier { - Method m; - - OverridableQualifier() { this = TOverridableQualifier(m) } - - override string getMaDInput() { none() } - - override string getMaDOutput() { result = "Parameter[this]" } - - override string getParamName() { result = "this" } - - override Callable getCallable() { result = m } - - override Top asTop() { result = m } - - override string getExtensibleType() { result = "sourceModel" } -} - -/** - * A candidates implementation for framework mode. - * - * Some important notes: - * - This mode is using parameters as endpoints. - * - Sink- and neutral-information is being used from MaD models. - * - When available, we use method- and class-java-docs as related locations. - */ -module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig { - // for documentation of the implementations here, see the QLDoc in the CandidateSig signature module. - class Endpoint = FrameworkModeEndpoint; - - class EndpointType = AutomodelEndpointTypes::EndpointType; - - class SinkType = AutomodelEndpointTypes::SinkType; - - class SourceType = AutomodelEndpointTypes::SourceType; - - class RelatedLocation = Location::Top; - - class RelatedLocationType = JavaRelatedLocationType; - - // Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact. - predicate isSanitizer(Endpoint e, EndpointType t) { none() } - - RelatedLocation asLocation(Endpoint e) { result = e.asTop() } - - predicate isKnownKind = AutomodelJavaUtil::isKnownKind/2; - - predicate isSink(Endpoint e, string kind, string provenance) { - exists( - string package, string type, boolean subtypes, string name, string signature, string ext, - string input - | - sinkSpec(e, package, type, subtypes, name, signature, ext, input) and - ExternalFlow::sinkModel(package, type, subtypes, name, [signature, ""], ext, input, kind, - provenance, _) - ) - } - - predicate isSource(Endpoint e, string kind, string provenance) { - exists( - string package, string type, boolean subtypes, string name, string signature, string ext, - string output - | - sourceSpec(e, package, type, subtypes, name, signature, ext, output) and - ExternalFlow::sourceModel(package, type, subtypes, name, [signature, ""], ext, output, kind, - provenance, _) - ) - } - - predicate isNeutral(Endpoint e) { - exists(string package, string type, string name, string signature, string endpointType | - sinkSpec(e, package, type, _, name, signature, _, _) and - endpointType = "sink" - or - sourceSpec(e, package, type, _, name, signature, _, _) and - endpointType = "source" - | - ExternalFlow::neutralModel(package, type, name, [signature, ""], endpointType, _) - ) - } - - /** - * Holds if the endpoint concerns a callable with the given package, type, name and signature. - * - * If `subtypes` is `false`, only the exact callable is considered. If `true`, the callable and - * all its overrides are considered. - */ - additional predicate endpointCallable( - Endpoint e, string package, string type, boolean subtypes, string name, string signature - ) { - exists(Callable c | - c = e.getCallable() and subtypes in [true, false] - or - e.getCallable().(Method).getSourceDeclaration().overrides+(c) and subtypes = true - | - c.hasQualifiedName(package, type, name) and - signature = ExternalFlow::paramsString(c) - ) - } - - additional predicate sinkSpec( - Endpoint e, string package, string type, boolean subtypes, string name, string signature, - string ext, string input - ) { - endpointCallable(e, package, type, subtypes, name, signature) and - ext = "" and - input = e.getMaDInput() - } - - additional predicate sourceSpec( - Endpoint e, string package, string type, boolean subtypes, string name, string signature, - string ext, string output - ) { - endpointCallable(e, package, type, subtypes, name, signature) and - ext = "" and - output = e.getMaDOutput() - } - - /** - * Gets the related location for the given endpoint. - * - * Related locations can be JavaDoc comments of the class or the method. - */ - RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) { - type = MethodDoc() and - result = e.getCallable().(Documentable).getJavadoc() - or - type = ClassDoc() and - result = e.getCallable().getDeclaringType().(Documentable).getJavadoc() - } -} - -module CharacteristicsImpl = SharedCharacteristics::SharedCharacteristics; - -class EndpointCharacteristic = CharacteristicsImpl::EndpointCharacteristic; - -class Endpoint = FrameworkCandidatesImpl::Endpoint; - -/* - * Predicates that are used to surface prompt examples and candidates for classification with an ML model. - */ - -/** - * A MetadataExtractor that extracts metadata for framework mode. - */ -class FrameworkModeMetadataExtractor extends string { - FrameworkModeMetadataExtractor() { this = "FrameworkModeMetadataExtractor" } - - predicate hasMetadata( - Endpoint e, string package, string type, string subtypes, string name, string signature, - string input, string output, string parameterName, string alreadyAiModeled, - string extensibleType - ) { - exists(Callable callable | e.getCallable() = callable | - (if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and - (if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and - package = callable.getDeclaringType().getPackage().getName() and - // we're using the erased types because the MaD convention is to not specify type parameters. - // Whether something is or isn't a sink doesn't usually depend on the type parameters. - type = callable.getDeclaringType().getErasure().(RefType).getNestedName() and - subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and - name = callable.getName() and - signature = ExternalFlow::paramsString(callable) and - (if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and - e.getExtensibleType() = extensibleType - ) and - ( - not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = "" - or - CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled) - ) - } -} - -/** - * Holds if the given `endpoint` should be considered a candidate for the `extensibleType`. - * - * The other parameters record various other properties of interest. - */ -predicate isCandidate( - Endpoint endpoint, string package, string type, string subtypes, string name, string signature, - string input, string output, string parameterName, string extensibleType, string alreadyAiModeled -) { - CharacteristicsImpl::isCandidate(endpoint, _) and - not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u | - u.appliesToEndpoint(endpoint) - ) and - any(FrameworkModeMetadataExtractor meta) - .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName, - alreadyAiModeled, extensibleType) and - // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a - // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's - // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We - // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink - // types, and we don't need to reexamine it. - alreadyAiModeled.matches(["", "%ai-%"]) and - AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature) -} - -/** - * Holds if the given `endpoint` is a negative example for the `extensibleType` - * because of the `characteristic`. - * - * The other parameters record various other properties of interest. - */ -predicate isNegativeExample( - Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package, - string type, string subtypes, string name, string signature, string input, string output, - string parameterName, string extensibleType -) { - characteristic.appliesToEndpoint(endpoint) and - // the node is known not to be an endpoint of any appropriate type - forall(AutomodelEndpointTypes::EndpointType tp | - tp = CharacteristicsImpl::getAPotentialType(endpoint) - | - characteristic.hasImplications(tp, false, _) - ) and - // the lowest confidence across all endpoint types should be at least highConfidence - confidence = - min(float c | - characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c) - ) and - confidence >= SharedCharacteristics::highConfidence() and - any(FrameworkModeMetadataExtractor meta) - .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName, - _, extensibleType) and - // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes - // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here. - not exists(EndpointCharacteristic characteristic2, float confidence2 | - characteristic2 != characteristic - | - characteristic2.appliesToEndpoint(endpoint) and - confidence2 >= SharedCharacteristics::maximalConfidence() and - characteristic2 - .hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2) - ) -} - -/** - * Holds if the given `endpoint` is a positive example for the `endpointType`. - * - * The other parameters record various other properties of interest. - */ -predicate isPositiveExample( - Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name, - string signature, string input, string output, string parameterName, string extensibleType -) { - any(FrameworkModeMetadataExtractor meta) - .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName, - _, extensibleType) and - CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) -} - -/* - * EndpointCharacteristic classes that are specific to Automodel for Java. - */ - -/** - * A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks, - * and its return value should not be considered a source. - * - * A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return - * type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does - * the dangerous/interesting thing, so we want the latter to be modeled as the sink. - * - * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks - */ -private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic -{ - UnexploitableIsCharacteristic() { this = "argument of is-style boolean method" } - - override predicate appliesToEndpoint(Endpoint e) { - e.getCallable().getName().matches("is%") and - e.getCallable().getReturnType() instanceof BooleanType and - ( - e.getExtensibleType() = "sinkModel" and - not FrameworkCandidatesImpl::isSink(e, _, _) - or - e.getExtensibleType() = "sourceModel" and - not FrameworkCandidatesImpl::isSource(e, _, _) and - e.getMaDOutput() = "ReturnValue" - ) - } -} - -/** - * A negative characteristic that indicates that parameters of an existence-checking boolean method should not be - * considered sinks, and its return value should not be considered a source. - * - * A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a - * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the - * dangerous/interesting thing, so we want the latter to be modeled as the sink. - */ -private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic -{ - UnexploitableExistsCharacteristic() { this = "argument of existence-checking boolean method" } - - override predicate appliesToEndpoint(Endpoint e) { - exists(Callable callable | - callable = e.getCallable() and - callable.getName().toLowerCase() = ["exists", "notexists"] and - callable.getReturnType() instanceof BooleanType - | - e.getExtensibleType() = "sinkModel" and - not FrameworkCandidatesImpl::isSink(e, _, _) - or - e.getExtensibleType() = "sourceModel" and - not FrameworkCandidatesImpl::isSource(e, _, _) and - e.getMaDOutput() = "ReturnValue" - ) - } -} - -/** - * A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks, - * and its return value should not be considered a source. - */ -private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic -{ - ExceptionCharacteristic() { this = "argument/result of exception-related method" } - - override predicate appliesToEndpoint(Endpoint e) { - e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and - ( - e.getExtensibleType() = "sinkModel" and - not FrameworkCandidatesImpl::isSink(e, _, _) - or - e.getExtensibleType() = "sourceModel" and - not FrameworkCandidatesImpl::isSource(e, _, _) and - e.getMaDOutput() = "ReturnValue" - ) - } -} - -/** - * A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that - * are considered worth modeling. - */ -private class NotAModelApi extends CharacteristicsImpl::UninterestingToModelCharacteristic { - NotAModelApi() { this = "not a model API" } - - override predicate appliesToEndpoint(Endpoint e) { - not e.getCallable() instanceof ModelExclusions::ModelApi - } -} diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql deleted file mode 100644 index 83683b4e78f5..000000000000 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractCandidates.ql +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Surfaces the endpoints that are not already known to be sinks, and are therefore used as candidates for - * classification with an ML model. - * - * Note: This query does not actually classify the endpoints using the model. - * - * @name Automodel candidates (framework mode) - * @description A query to extract automodel candidates in framework mode. - * @kind problem - * @problem.severity recommendation - * @id java/ml/extract-automodel-framework-candidates - * @tags internal extract automodel framework-mode candidates - */ - -private import AutomodelFrameworkModeCharacteristics -private import AutomodelJavaUtil - -from - Endpoint endpoint, DollarAtString package, DollarAtString type, DollarAtString subtypes, - DollarAtString name, DollarAtString signature, DollarAtString input, DollarAtString output, - DollarAtString parameterName, DollarAtString alreadyAiModeled, DollarAtString extensibleType -where - isCandidate(endpoint, package, type, subtypes, name, signature, input, output, parameterName, - extensibleType, alreadyAiModeled) -select endpoint, - "Related locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", // - package, "package", // - type, "type", // - subtypes, "subtypes", // - name, "name", // - signature, "signature", // - input, "input", // - output, "output", // - parameterName, "parameterName", // - alreadyAiModeled, "alreadyAiModeled", // - extensibleType, "extensibleType" diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql deleted file mode 100644 index 05e5951b0610..000000000000 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractNegativeExamples.ql +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Surfaces endpoints that are non-sinks with high confidence, for use as negative examples in the prompt. - * - * @name Negative examples (framework mode) - * @kind problem - * @problem.severity recommendation - * @id java/ml/extract-automodel-framework-negative-examples - * @tags internal extract automodel framework-mode negative examples - */ - -private import AutomodelFrameworkModeCharacteristics -private import AutomodelEndpointTypes -private import AutomodelJavaUtil - -from - Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, - DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name, - DollarAtString signature, DollarAtString input, DollarAtString output, - DollarAtString parameterName, DollarAtString extensibleType -where - isNegativeExample(endpoint, characteristic, confidence, package, type, subtypes, name, signature, - input, output, parameterName, extensibleType) -select endpoint, - characteristic + "\nrelated locations: $@, $@." + - "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", // - package, "package", // - type, "type", // - subtypes, "subtypes", // - name, "name", // - signature, "signature", // - input, "input", // - output, "output", // - parameterName, "parameterName", // - extensibleType, "extensibleType" diff --git a/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql b/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql deleted file mode 100644 index 7cb023949ed9..000000000000 --- a/java/ql/automodel/src/AutomodelFrameworkModeExtractPositiveExamples.ql +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Surfaces endpoints that are sinks with high confidence, for use as positive examples in the prompt. - * - * @name Positive examples (framework mode) - * @kind problem - * @problem.severity recommendation - * @id java/ml/extract-automodel-framework-positive-examples - * @tags internal extract automodel framework-mode positive examples - */ - -private import AutomodelFrameworkModeCharacteristics -private import AutomodelEndpointTypes -private import AutomodelJavaUtil - -from - Endpoint endpoint, EndpointType endpointType, DollarAtString package, DollarAtString type, - DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input, - DollarAtString output, DollarAtString parameterName, DollarAtString extensibleType -where - isPositiveExample(endpoint, endpointType, package, type, subtypes, name, signature, input, output, - parameterName, extensibleType) -select endpoint, - endpointType + "\nrelated locations: $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, MethodDoc()), "MethodDoc", // - CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, ClassDoc()), "ClassDoc", // - package, "package", // - type, "type", // - subtypes, "subtypes", // - name, "name", // - signature, "signature", // - input, "input", // - output, "output", // - parameterName, "parameterName", // - extensibleType, "extensibleType" diff --git a/java/ql/automodel/src/AutomodelJavaUtil.qll b/java/ql/automodel/src/AutomodelJavaUtil.qll deleted file mode 100644 index 368fb172483b..000000000000 --- a/java/ql/automodel/src/AutomodelJavaUtil.qll +++ /dev/null @@ -1,111 +0,0 @@ -private import java -private import AutomodelEndpointTypes as AutomodelEndpointTypes - -/** - * A helper class to represent a string value that can be returned by a query using $@ notation. - * - * It extends `string`, but adds a mock `hasLocationInfo` method that returns the string itself as the file name. - * - * Use this, when you want to return a string value from a query using $@ notation - the string value - * will be included in the sarif file. - * - * - * Background information on `hasLocationInfo`: - * https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/#providing-location-information - */ -class DollarAtString extends string { - bindingset[this] - DollarAtString() { any() } - - bindingset[this] - predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { - path = this and sl = 1 and sc = 1 and el = 1 and ec = 1 - } -} - -/** - * Holds for all combinations of MaD kinds (`kind`) and their human readable - * descriptions. - */ -predicate isKnownKind(string kind, AutomodelEndpointTypes::EndpointType type) { - kind = type.getKind() -} - -/** - * By convention, the subtypes property of the MaD declaration should only be - * true when there _can_ exist any subtypes with a different implementation. - * - * It would technically be ok to always use the value 'true', but this would - * break convention. - */ -pragma[nomagic] -boolean considerSubtypes(Callable callable) { - if - callable.isStatic() or - callable.getDeclaringType().isStatic() or - callable.isFinal() or - callable.getDeclaringType().isFinal() - then result = false - else result = true -} - -/** - * Holds if the given package, type, name and signature is a candidate for automodeling. - * - * This predicate is extensible, so that different endpoints can be selected at runtime. - */ -extensible predicate automodelCandidateFilter( - string package, string type, string name, string signature -); - -/** - * Holds if the given package, type, name and signature is a candidate for automodeling. - * - * This relies on an extensible predicate, and if that is not supplied then - * all endpoints are considered candidates. - */ -bindingset[package, type, name, signature] -predicate includeAutomodelCandidate(string package, string type, string name, string signature) { - not automodelCandidateFilter(_, _, _, _) or - automodelCandidateFilter(package, type, name, signature) -} - -/** - * Holds if the given program element corresponds to a piece of source code, - * that is, it is not compiler-generated. - * - * Note: This is a stricter check than `Element::fromSource`, which simply - * checks whether the element is in a source file as opposed to a JAR file. - * There can be compiler-generated elements in source files (especially for - * Kotlin), which we also want to exclude. - */ -predicate isFromSource(Element e) { - // from a source file (not a JAR) - e.fromSource() and - // not explicitly marked as compiler-generated - not e.isCompilerGenerated() and - // does not have a dummy location - not e.hasLocationInfo(_, 0, 0, 0, 0) -} - -/** - * Holds if taint cannot flow through the given type (because it is a numeric - * type or some other type with a fixed set of values). - */ -predicate isUnexploitableType(Type tp) { - tp instanceof PrimitiveType or - tp instanceof BoxedType or - tp instanceof NumberType or - tp instanceof VoidType -} - -/** - * Holds if the given method can be overridden, that is, it is not final, - * static, or private. - */ -predicate isOverridable(Method m) { - not m.getDeclaringType().isFinal() and - not m.isFinal() and - not m.isStatic() and - not m.isPrivate() -} diff --git a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll b/java/ql/automodel/src/AutomodelSharedCharacteristics.qll deleted file mode 100644 index 273c5d30dec9..000000000000 --- a/java/ql/automodel/src/AutomodelSharedCharacteristics.qll +++ /dev/null @@ -1,412 +0,0 @@ -float maximalConfidence() { result = 1.0 } - -float highConfidence() { result = 0.9 } - -float mediumConfidence() { result = 0.6 } - -/** - * A specification of how to instantiate the shared characteristics for a given candidate class. - * - * The `CandidateSig` implementation specifies a type to use for Endpoints (eg., `ParameterNode`), as well as a type - * to label endpoint classes (the `EndpointType`). One of the endpoint classes needs to be a 'negative' class, meaning - * "not any of the other known endpoint types". - */ -signature module CandidateSig { - /** - * An endpoint is a potential candidate for modeling. This will typically be bound to the language's - * DataFlow node class, or a subtype thereof. - */ - class Endpoint { - /** - * Gets the kind of this endpoint, either "sourceModel" or "sinkModel". - */ - string getExtensibleType(); - - /** - * Gets a string representation of this endpoint. - */ - string toString(); - } - - /** - * A related location for an endpoint. This will typically be bound to the supertype of all AST nodes (eg., `Top`). - */ - class RelatedLocation; - - /** - * A label for a related location. - * - * Eg., method-doc, class-doc, etc. - */ - class RelatedLocationType; - - /** - * An endpoint type considered by this specification. - */ - class EndpointType extends string; - - /** - * A sink endpoint type considered by this specification. - */ - class SinkType extends EndpointType; - - /** - * A source endpoint type considered by this specification. - */ - class SourceType extends EndpointType; - - /** - * Gets the endpoint as a location. - * - * This is a utility function to convert an endpoint to its corresponding location. - */ - RelatedLocation asLocation(Endpoint e); - - /** - * Defines what MaD kinds are known, and what endpoint type they correspond to. - */ - predicate isKnownKind(string kind, EndpointType type); - - /** - * Holds if `e` is a flow sanitizer, and has type `t`. - */ - predicate isSanitizer(Endpoint e, EndpointType t); - - /** - * Holds if `e` is a sink with the label `kind`, and provenance `provenance`. - */ - predicate isSink(Endpoint e, string kind, string provenance); - - /** - * Holds if `e` is a source with the label `kind`, and provenance `provenance`. - */ - predicate isSource(Endpoint e, string kind, string provenance); - - /** - * Holds if `e` is not a source or sink of any kind. - */ - predicate isNeutral(Endpoint e); - - /** - * Gets a related location. - * - * A related location is a source code location that may hold extra information about an endpoint that can be useful - * to the machine learning model. - * - * For example, a related location for a method call may be the documentation comment of a method. - */ - RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType name); -} - -/** - * A set of shared characteristics for a given candidate class. - * - * This module is language-agnostic, although the `CandidateSig` module will be language-specific. - * - * The language specific implementation can also further extend the behavior of this module by adding additional - * implementations of endpoint characteristics exported by this module. - */ -module SharedCharacteristics { - predicate isSink = Candidate::isSink/3; - - predicate isNeutral = Candidate::isNeutral/1; - - predicate isModeled(Candidate::Endpoint e, string kind, string extensibleKind, string provenance) { - Candidate::isSink(e, kind, provenance) and extensibleKind = "sinkModel" - or - Candidate::isSource(e, kind, provenance) and extensibleKind = "sourceModel" - } - - /** - * Holds if `endpoint` is modeled as `endpointType`. - */ - predicate isKnownAs( - Candidate::Endpoint endpoint, Candidate::EndpointType endpointType, - EndpointCharacteristic characteristic - ) { - // If the list of characteristics includes positive indicators with maximal confidence for this class, then it's a - // known sink for the class. - characteristic.appliesToEndpoint(endpoint) and - characteristic.hasImplications(endpointType, true, maximalConfidence()) - } - - /** - * Gets a potential type of this endpoint to make sure that sources are - * associated with source types and sinks with sink types. - */ - Candidate::EndpointType getAPotentialType(Candidate::Endpoint endpoint) { - endpoint.getExtensibleType() = "sourceModel" and - result instanceof Candidate::SourceType - or - endpoint.getExtensibleType() = "sinkModel" and - result instanceof Candidate::SinkType - } - - /** - * Holds if the given `endpoint` should be considered as a candidate for type `endpointType`, - * and classified by the ML model. - * - * A candidate is an endpoint that cannot be excluded from `endpointType` based on its characteristics. - */ - predicate isCandidate(Candidate::Endpoint endpoint, Candidate::EndpointType endpointType) { - endpointType = getAPotentialType(endpoint) and - not exists(getAnExcludingCharacteristic(endpoint, endpointType)) - } - - /** - * Gets the related location of `e` with name `name`, if it exists. - * Otherwise, gets the candidate itself. - */ - Candidate::RelatedLocation getRelatedLocationOrCandidate( - Candidate::Endpoint e, Candidate::RelatedLocationType type - ) { - if exists(Candidate::getRelatedLocation(e, type)) - then result = Candidate::getRelatedLocation(e, type) - else result = Candidate::asLocation(e) - } - - /** - * Gets a characteristics that disbar `endpoint` from being a candidate for `endpointType` - * with at least medium confidence. - */ - EndpointCharacteristic getAnExcludingCharacteristic( - Candidate::Endpoint endpoint, Candidate::EndpointType endpointType - ) { - result.appliesToEndpoint(endpoint) and - exists(float confidence | - confidence >= mediumConfidence() and - result.hasImplications(endpointType, false, confidence) - ) - } - - /** - * A set of characteristics that a particular endpoint might have. This set of characteristics is used to make decisions - * about whether to include the endpoint in the training set and with what kind, as well as whether to score the - * endpoint at inference time. - */ - abstract class EndpointCharacteristic extends string { - /** - * Holds for the string that is the name of the characteristic. This should describe some property of an endpoint - * that is meaningful for determining whether it's a sink, and if so, of which sink type. - */ - bindingset[this] - EndpointCharacteristic() { any() } - - /** - * Holds for endpoints that have this characteristic. - */ - abstract predicate appliesToEndpoint(Candidate::Endpoint n); - - /** - * This predicate describes what the characteristic tells us about an endpoint. - * - * Params: - * endpointType: The sink/source type. - * isPositiveIndicator: If true, this characteristic indicates that this endpoint _is_ a member of the class; if - * false, it indicates that it _isn't_ a member of the class. - * confidence: A float in [0, 1], which tells us how strong an indicator this characteristic is for the endpoint - * belonging / not belonging to the given class. A confidence near zero means this characteristic is a very weak - * indicator of whether or not the endpoint belongs to the class. A confidence of 1 means that all endpoints with - * this characteristic definitively do/don't belong to the class. - */ - abstract predicate hasImplications( - Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence - ); - - /** Indicators with confidence at or above this threshold are considered to be high-confidence indicators. */ - final float getHighConfidenceThreshold() { result = 0.8 } - } - - /** - * A high-confidence characteristic that indicates that an endpoint is a sink of a specified type. These endpoints can - * be used as positive samples for training or for a few-shot prompt. - */ - abstract class SinkCharacteristic extends EndpointCharacteristic { - bindingset[this] - SinkCharacteristic() { any() } - - abstract Candidate::EndpointType getSinkType(); - - final override predicate hasImplications( - Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence - ) { - endpointType = this.getSinkType() and - isPositiveIndicator = true and - confidence = maximalConfidence() - } - } - - /** - * A high-confidence characteristic that indicates that an endpoint is a source of a specified type. These endpoints can - * be used as positive samples for training or for a few-shot prompt. - */ - abstract class SourceCharacteristic extends EndpointCharacteristic { - bindingset[this] - SourceCharacteristic() { any() } - - abstract Candidate::EndpointType getSourceType(); - - final override predicate hasImplications( - Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence - ) { - endpointType = this.getSourceType() and - isPositiveIndicator = true and - confidence = maximalConfidence() - } - } - - /** - * A high-confidence characteristic that indicates that an endpoint is not a sink of any type. These endpoints can be - * used as negative samples for training or for a few-shot prompt. - */ - abstract class NotASinkCharacteristic extends EndpointCharacteristic { - bindingset[this] - NotASinkCharacteristic() { any() } - - override predicate hasImplications( - Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence - ) { - endpointType instanceof Candidate::SinkType and - isPositiveIndicator = false and - confidence = highConfidence() - } - } - - /** - * A high-confidence characteristic that indicates that an endpoint is not a source of any type. These endpoints can be - * used as negative samples for training or for a few-shot prompt. - */ - abstract class NotASourceCharacteristic extends EndpointCharacteristic { - bindingset[this] - NotASourceCharacteristic() { any() } - - override predicate hasImplications( - Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence - ) { - endpointType instanceof Candidate::SourceType and - isPositiveIndicator = false and - confidence = highConfidence() - } - } - - /** - * A high-confidence characteristic that indicates that an endpoint is neither a source nor a sink of any type. - */ - abstract class NeitherSourceNorSinkCharacteristic extends NotASinkCharacteristic, - NotASourceCharacteristic - { - bindingset[this] - NeitherSourceNorSinkCharacteristic() { any() } - - final override predicate hasImplications( - Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence - ) { - NotASinkCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) or - NotASourceCharacteristic.super.hasImplications(endpointType, isPositiveIndicator, confidence) - } - } - - /** - * A medium-confidence characteristic that indicates that an endpoint is unlikely to be a sink of any type. These - * endpoints can be excluded from scoring at inference time, both to save time and to avoid false positives. They should - * not, however, be used as negative samples for training or for a few-shot prompt, because they may include a small - * number of sinks. - */ - abstract class LikelyNotASinkCharacteristic extends EndpointCharacteristic { - bindingset[this] - LikelyNotASinkCharacteristic() { any() } - - override predicate hasImplications( - Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence - ) { - endpointType instanceof Candidate::SinkType and - isPositiveIndicator = false and - confidence = mediumConfidence() - } - } - - /** - * A characteristic that indicates not necessarily that an endpoint is not a sink, but rather that it is not a sink - * that's interesting to model in the standard Java libraries. These filters should be removed when extracting sink - * candidates within a user's codebase for customized modeling. - * - * These endpoints should not be used as negative samples for training or for a few-shot prompt, because they are not - * necessarily non-sinks. - */ - abstract class UninterestingToModelCharacteristic extends EndpointCharacteristic { - bindingset[this] - UninterestingToModelCharacteristic() { any() } - - override predicate hasImplications( - Candidate::EndpointType endpointType, boolean isPositiveIndicator, float confidence - ) { - endpointType instanceof Candidate::SinkType and - isPositiveIndicator = false and - confidence = mediumConfidence() - } - } - - /** - * Contains default implementations that are derived solely from the `CandidateSig` implementation. - */ - private module DefaultCharacteristicImplementations { - /** - * Endpoints identified as sinks by the `CandidateSig` implementation are sinks with maximal confidence. - */ - private class KnownSinkCharacteristic extends SinkCharacteristic { - string madKind; - Candidate::EndpointType endpointType; - string provenance; - - KnownSinkCharacteristic() { - Candidate::isKnownKind(madKind, endpointType) and - // bind "this" to a unique string differing from that of the SinkType classes - this = madKind + "_" + provenance + "_characteristic" and - Candidate::isSink(_, madKind, provenance) - } - - override predicate appliesToEndpoint(Candidate::Endpoint e) { - Candidate::isSink(e, madKind, provenance) - } - - override Candidate::EndpointType getSinkType() { result = endpointType } - } - - private class KnownSourceCharacteristic extends SourceCharacteristic { - string madKind; - Candidate::EndpointType endpointType; - string provenance; - - KnownSourceCharacteristic() { - Candidate::isKnownKind(madKind, endpointType) and - // bind "this" to a unique string differing from that of the SinkType classes - this = madKind + "_" + provenance + "_characteristic" and - Candidate::isSource(_, madKind, provenance) - } - - override predicate appliesToEndpoint(Candidate::Endpoint e) { - Candidate::isSource(e, madKind, provenance) - } - - override Candidate::EndpointType getSourceType() { result = endpointType } - } - - /** - * A negative characteristic that indicates that an endpoint was manually modeled as a neutral model. - */ - private class NeutralModelCharacteristic extends NeitherSourceNorSinkCharacteristic { - NeutralModelCharacteristic() { this = "known non-sink" } - - override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isNeutral(e) } - } - - /** - * A negative characteristic that indicates that an endpoint is a sanitizer, and thus not a source. - */ - private class IsSanitizerCharacteristic extends NotASourceCharacteristic { - IsSanitizerCharacteristic() { this = "known sanitizer" } - - override predicate appliesToEndpoint(Candidate::Endpoint e) { Candidate::isSanitizer(e, _) } - } - } -} diff --git a/java/ql/automodel/src/AutomodelSinkModelMrvaQueries.ql b/java/ql/automodel/src/AutomodelSinkModelMrvaQueries.ql deleted file mode 100644 index ed61ccfbbfdc..000000000000 --- a/java/ql/automodel/src/AutomodelSinkModelMrvaQueries.ql +++ /dev/null @@ -1,62 +0,0 @@ -/** - * This file contains query predicates for use when gathering metrics at scale using Multi Repo - * Variant Analysis. - */ - -private import java -private import AutomodelAlertSinkUtil - -/** - * Holds if `alertCount` is the number of alerts for the query with ID `queryId` for which the - * sinks correspond to the given `ai-generated` sink model. - */ -query predicate sinkModelCountPerQuery( - string queryId, int alertCount, string package, string type, boolean subtypes, string name, - string signature, string input, string ext, string kind, string provenance -) { - exists(SinkModel s | - sinkModelTallyPerQuery(queryId, alertCount, s) and - s.getProvenance() = "ai-generated" and - s.getPackage() = package and - s.getType() = type and - s.getSubtypes() = subtypes and - s.getName() = name and - s.getSignature() = signature and - s.getInput() = input and - s.getExt() = ext and - s.getKind() = kind and - s.getProvenance() = provenance - ) -} - -/** - * Holds if `instanceCount` is the number of instances corresponding to the given `ai-generated` - * sink model (as identified by the `package`, `name`, `input`, etc.). - */ -query predicate instanceCount( - int instanceCount, string package, string type, boolean subtypes, string name, string signature, - string input, string ext, string kind, string provenance -) { - exists(SinkModel s | - instanceCount = s.getInstanceCount() and - instanceCount > 0 and - s.getProvenance() = "ai-generated" and - s.getPackage() = package and - s.getType() = type and - s.getSubtypes() = subtypes and - s.getName() = name and - s.getSignature() = signature and - s.getInput() = input and - s.getExt() = ext and - s.getKind() = kind and - s.getProvenance() = provenance - ) -} - -// MRVA requires a select clause, so we repurpose it to tell us which query predicates had results. -from string hadResults -where - sinkModelCountPerQuery(_, _, _, _, _, _, _, _, _, _, _) and hadResults = "sinkModelCountPerQuery" - or - instanceCount(_, _, _, _, _, _, _, _, _, _) and hadResults = "instanceCount" -select hadResults diff --git a/java/ql/automodel/src/change-notes/2024-11-19-drop-automodel.md b/java/ql/automodel/src/change-notes/2024-11-19-drop-automodel.md new file mode 100644 index 000000000000..2b554d6de204 --- /dev/null +++ b/java/ql/automodel/src/change-notes/2024-11-19-drop-automodel.md @@ -0,0 +1,4 @@ +--- +category: breaking +--- +* Dropped the Java Automodel queries. diff --git a/java/ql/automodel/src/codeql-pack.release.yml b/java/ql/automodel/src/codeql-pack.release.yml deleted file mode 100644 index 56a2fb388725..000000000000 --- a/java/ql/automodel/src/codeql-pack.release.yml +++ /dev/null @@ -1,2 +0,0 @@ ---- -lastReleaseVersion: 1.0.11 diff --git a/java/ql/automodel/src/qlpack.yml b/java/ql/automodel/src/qlpack.yml deleted file mode 100644 index 4acf2219db30..000000000000 --- a/java/ql/automodel/src/qlpack.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: codeql/java-automodel-queries -version: 1.0.12-dev -groups: - - java - - automodel -dependencies: - codeql/java-all: ${workspace} -dataExtensions: - - AutomodelCandidateFilter.yml -warnOnImplicitThis: true \ No newline at end of file diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractionTests.expected b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractionTests.expected deleted file mode 100644 index 8ec8033d086e..000000000000 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractionTests.expected +++ /dev/null @@ -1,2 +0,0 @@ -testFailures -failures diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractionTests.ql b/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractionTests.ql deleted file mode 100644 index b7e1efc45325..000000000000 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/AutomodelApplicationModeExtractionTests.ql +++ /dev/null @@ -1,35 +0,0 @@ -import java -import AutomodelApplicationModeCharacteristics as Characteristics -import AutomodelExtractionTests - -module TestHelper implements TestHelperSig { - Location getEndpointLocation(Characteristics::Endpoint endpoint) { - result = endpoint.asTop().getLocation() - } - - predicate isCandidate( - Characteristics::Endpoint endpoint, string name, string signature, string input, string output, - string extensibleType - ) { - Characteristics::isCandidate(endpoint, _, _, _, name, signature, input, output, _, - extensibleType, _) - } - - predicate isPositiveExample( - Characteristics::Endpoint endpoint, string endpointType, string name, string signature, - string input, string output, string extensibleType - ) { - Characteristics::isPositiveExample(endpoint, endpointType, _, _, _, name, signature, input, - output, _, extensibleType) - } - - predicate isNegativeExample( - Characteristics::Endpoint endpoint, string name, string signature, string input, string output, - string extensibleType - ) { - Characteristics::isNegativeExample(endpoint, _, _, _, _, _, name, signature, input, output, _, - extensibleType) - } -} - -import MakeTest> diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/PluginImpl.java b/java/ql/automodel/test/AutomodelApplicationModeExtraction/PluginImpl.java deleted file mode 100644 index b0f3482a7322..000000000000 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/PluginImpl.java +++ /dev/null @@ -1,8 +0,0 @@ -import hudson.Plugin; - -public class PluginImpl extends Plugin { - @Override - public void configure(String name, String value) { // $ sourceModelCandidate=configure(String,String):Parameter[0] sourceModelCandidate=configure(String,String):Parameter[1] - // ... - } -} diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/Test.java b/java/ql/automodel/test/AutomodelApplicationModeExtraction/Test.java deleted file mode 100644 index 9691cf86c150..000000000000 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/Test.java +++ /dev/null @@ -1,112 +0,0 @@ -package com.github.codeql.test; - -import java.io.InputStream; -import java.io.PrintWriter; -import java.nio.file.CopyOption; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Supplier; -import java.io.File; -import java.io.FileFilter; -import java.nio.file.FileVisitOption; -import java.net.URLConnection; -import java.util.concurrent.FutureTask; - -class Test { - public static void main(String[] args) throws Exception { - AtomicReference reference = new AtomicReference<>(); // uninteresting (parameterless constructor) - reference.set( // $ sinkModelCandidate=set(Object):Argument[this] - args[0] // $ negativeSinkExample=set(Object):Argument[0] // modeled as a flow step - ); // not a source candidate (return type is void) - } - - public static void callSupplier(Supplier supplier) { - supplier.get(); // not a source candidate (lambda flow) - } - - public static void copyFiles(Path source, Path target, CopyOption option) throws Exception { - Files.copy( - source, // $ positiveSinkExample=copy(Path,Path,CopyOption[]):Argument[0](path-injection) - target, // $ positiveSinkExample=copy(Path,Path,CopyOption[]):Argument[1](path-injection) - option // no candidate (not modeled, but source and target are modeled) - ); // $ sourceModelCandidate=copy(Path,Path,CopyOption[]):ReturnValue - } - - public static InputStream getInputStream(Path openPath) throws Exception { - return Files.newInputStream( - openPath // $ sinkModelCandidate=newInputStream(Path,OpenOption[]):Argument[0] positiveSinkExample=newInputStream(Path,OpenOption[]):Argument[0](path-injection) // sink candidate because "only" ai-modeled, and useful as a candidate in regression testing - ); // $ sourceModelCandidate=newInputStream(Path,OpenOption[]):ReturnValue - } - - public static InputStream getInputStream(String openPath, String otherPath) throws Exception { - return Test.getInputStream( // the call is not a source candidate (argument to local call) - Paths.get( - openPath, // $ negativeSinkExample=get(String,String[]):Argument[0] // modeled as a flow step - otherPath - ) // $ sourceModelCandidate=get(String,String[]):ReturnValue negativeSinkExample=get(String,String[]):Argument[1] - ); - } - - public static int compareFiles(File f1, File f2) { - return f1.compareTo( // $ negativeSinkExample=compareTo(File):Argument[this] - f2 // $ negativeSinkExample=compareTo(File):Argument[0] // modeled as not a sink - ); // not a source candidate (return type is int) - } - - public static void FilesWalkExample(Path p, FileVisitOption o) throws Exception { - Files.walk( - p, // $ negativeSinkExample=walk(Path,FileVisitOption[]):Argument[0] // modeled as a flow step - o, // the implicit varargs array is a candidate, annotated on the last line of the call - o // not a candidate (only the first arg corresponding to a varargs array - // is extracted) - ); // $ sourceModelCandidate=walk(Path,FileVisitOption[]):ReturnValue sinkModelCandidate=walk(Path,FileVisitOption[]):Argument[1] - } - - public static void WebSocketExample(URLConnection c) throws Exception { - c.getInputStream(); // $ sinkModelCandidate=getInputStream():Argument[this] positiveSourceExample=getInputStream():ReturnValue(remote) // not a source candidate (manual modeling) - c.connect(); // $ sinkModelCandidate=connect():Argument[this] // not a source candidate (return type is void) - } - - public static void fileFilterExample(File f, FileFilter ff) { - f.listFiles( // $ sinkModelCandidate=listFiles(FileFilter):Argument[this] - ff - ); // $ sourceModelCandidate=listFiles(FileFilter):ReturnValue - } -} - -class OverrideTest extends Exception { - public void printStackTrace(PrintWriter writer) { // $ sourceModelCandidate=printStackTrace(PrintWriter):Parameter[0] - return; - } - -} - -class TaskUtils { - public FutureTask getTask() { - FutureTask ft = new FutureTask(() -> { - // ^-- no sink candidate for the `this` qualifier of a constructor - return 42; - }); - return ft; - } -} - -class MoreTests { - public static void FilesListExample(Path p) throws Exception { - Files.list( - Files.createDirectories( - p // $ positiveSinkExample=createDirectories(Path,FileAttribute[]):Argument[0](path-injection) - ) // $ sourceModelCandidate=createDirectories(Path,FileAttribute[]):ReturnValue negativeSinkExample=list(Path):Argument[0] // modeled as a flow step - ); // $ sourceModelCandidate=list(Path):ReturnValue - - Files.delete( - p // $ sinkModelCandidate=delete(Path):Argument[0] positiveSinkExample=delete(Path):Argument[0](path-injection) - ); // not a source candidate (return type is void) - - Files.deleteIfExists( - p // $ sinkModelCandidate=deleteIfExists(Path):Argument[0] positiveSinkExample=deleteIfExists(Path):Argument[0](path-injection) - ); // not a source candidate (return type is boolean) - } -} \ No newline at end of file diff --git a/java/ql/automodel/test/AutomodelApplicationModeExtraction/hudson/Plugin.java b/java/ql/automodel/test/AutomodelApplicationModeExtraction/hudson/Plugin.java deleted file mode 100644 index 3ad79abb8df1..000000000000 --- a/java/ql/automodel/test/AutomodelApplicationModeExtraction/hudson/Plugin.java +++ /dev/null @@ -1,7 +0,0 @@ -package hudson; - -/** Plugin doc */ -public class Plugin { - /** Configure method doc */ - public void configure(String name, String value) {} -} diff --git a/java/ql/automodel/test/AutomodelExtractionTests.qll b/java/ql/automodel/test/AutomodelExtractionTests.qll deleted file mode 100644 index fbc407c67f05..000000000000 --- a/java/ql/automodel/test/AutomodelExtractionTests.qll +++ /dev/null @@ -1,77 +0,0 @@ -import java -import TestUtilities.InlineExpectationsTest -import AutomodelSharedCharacteristics - -signature module TestHelperSig { - Location getEndpointLocation(Candidate::Endpoint e); - - predicate isCandidate( - Candidate::Endpoint e, string name, string signature, string input, string output, - string extensibleType - ); - - predicate isPositiveExample( - Candidate::Endpoint e, string endpointType, string name, string signature, string input, - string output, string extensibleType - ); - - predicate isNegativeExample( - Candidate::Endpoint e, string name, string signature, string input, string output, - string extensibleType - ); -} - -module Extraction TestHelper> implements TestSig { - string getARelevantTag() { - result in [ - "sourceModelCandidate", "sinkModelCandidate", // a candidate source/sink - "positiveSourceExample", "positiveSinkExample", // a known source/sink - "negativeSourceExample", "negativeSinkExample" // a known non-source/non-sink - ] - } - - /** - * If `extensibleType` is `sourceModel` then the result is `ifSource`, if it - * is `sinkModel` then the result is `ifSink`. - */ - bindingset[extensibleType, ifSource, ifSink] - private string ifSource(string extensibleType, string ifSource, string ifSink) { - extensibleType = "sourceModel" and result = ifSource - or - extensibleType = "sinkModel" and result = ifSink - } - - additional predicate selectEndpoint( - Candidate::Endpoint endpoint, string name, string signature, string input, string output, - string extensibleType, string tag, string suffix - ) { - TestHelper::isCandidate(endpoint, name, signature, input, output, extensibleType) and - tag = ifSource(extensibleType, "sourceModelCandidate", "sinkModelCandidate") and - suffix = "" - or - TestHelper::isNegativeExample(endpoint, name, signature, input, output, extensibleType) and - tag = "negative" + ifSource(extensibleType, "Source", "Sink") + "Example" and - suffix = "" - or - exists(string endpointType | - TestHelper::isPositiveExample(endpoint, endpointType, name, signature, input, output, - extensibleType) and - tag = "positive" + ifSource(extensibleType, "Source", "Sink") + "Example" and - suffix = "(" + endpointType + ")" - ) - } - - predicate hasActualResult(Location location, string element, string tag, string value) { - exists( - Candidate::Endpoint endpoint, string name, string signature, string input, string output, - string extensibleType, string suffix - | - selectEndpoint(endpoint, name, signature, input, output, extensibleType, tag, suffix) - | - TestHelper::getEndpointLocation(endpoint) = location and - endpoint.toString() = element and - // for source models only the output is relevant, and vice versa for sink models - value = name + signature + ":" + ifSource(extensibleType, output, input) + suffix - ) - } -} diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractionTests.expected b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractionTests.expected deleted file mode 100644 index 8ec8033d086e..000000000000 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractionTests.expected +++ /dev/null @@ -1,2 +0,0 @@ -testFailures -failures diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractionTests.ql b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractionTests.ql deleted file mode 100644 index 0d5e86118705..000000000000 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/AutomodelFrameworkModeExtractionTests.ql +++ /dev/null @@ -1,35 +0,0 @@ -import java -import AutomodelFrameworkModeCharacteristics as Characteristics -import AutomodelExtractionTests - -module TestHelper implements TestHelperSig { - Location getEndpointLocation(Characteristics::Endpoint endpoint) { - result = endpoint.asTop().getLocation() - } - - predicate isCandidate( - Characteristics::Endpoint endpoint, string name, string signature, string input, string output, - string extensibleType - ) { - Characteristics::isCandidate(endpoint, _, _, _, name, signature, input, output, _, - extensibleType, _) - } - - predicate isPositiveExample( - Characteristics::Endpoint endpoint, string endpointType, string name, string signature, - string input, string output, string extensibleType - ) { - Characteristics::isPositiveExample(endpoint, endpointType, _, _, _, name, signature, input, - output, _, extensibleType) - } - - predicate isNegativeExample( - Characteristics::Endpoint endpoint, string name, string signature, string input, string output, - string extensibleType - ) { - Characteristics::isNegativeExample(endpoint, _, _, _, _, _, name, signature, input, output, _, - extensibleType) - } -} - -import MakeTest> diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/MyWriter.java b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/MyWriter.java deleted file mode 100644 index 62bd773cc2e2..000000000000 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/MyWriter.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.github.codeql.test; - -public class MyWriter extends java.io.Writer { - @Override - public void write(char[] cbuf, int off, int len) { // $ sinkModelCandidate=write(char[],int,int):Argument[this] positiveSinkExample=write(char[],int,int):Argument[0](file-content-store) sourceModelCandidate=write(char[],int,int):Parameter[this] sourceModelCandidate=write(char[],int,int):Parameter[0] - } - - @Override - public void close() { // $ sinkModelCandidate=close():Argument[this] sourceModelCandidate=close():Parameter[this] - } - - @Override - public void flush() { // $ sinkModelCandidate=flush():Argument[this] sourceModelCandidate=flush():Parameter[this] - } -} diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/NonPublicClass.java b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/NonPublicClass.java deleted file mode 100644 index b106d3da5940..000000000000 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/NonPublicClass.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.github.codeql.test; - -/** - * No candidates in this class, as it's not public! - */ -class NonPublicClass { - public void noCandidates(String here) { - System.out.println(here); - } -} diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicClass.java b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicClass.java deleted file mode 100644 index 79fabff0664b..000000000000 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicClass.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.github.codeql.test; - -public class PublicClass { - public void stuff(String arg) { // $ sinkModelCandidate=stuff(String):Argument[this] sourceModelCandidate=stuff(String):Parameter[this] sinkModelCandidate=stuff(String):Argument[0] sourceModelCandidate=stuff(String):Parameter[0] // source candidates because it is an overrideable method - System.out.println(arg); - } - - public static void staticStuff(String arg) { // $ sinkModelCandidate=staticStuff(String):Argument[0] // `arg` is not a source candidate (not overrideabe); `this` is not a candidate (static method) - System.out.println(arg); - } - - protected void nonPublicStuff(String arg) { // $ sinkModelCandidate=nonPublicStuff(String):Argument[this] sourceModelCandidate=nonPublicStuff(String):Parameter[this] sinkModelCandidate=nonPublicStuff(String):Argument[0] sourceModelCandidate=nonPublicStuff(String):Parameter[0] - System.out.println(arg); - } - - void packagePrivateStuff(String arg) { // no candidates because the method is not public - System.out.println(arg); - } - - public PublicClass(Object input) { // $ sourceModelCandidate=PublicClass(Object):ReturnValue sinkModelCandidate=PublicClass(Object):Argument[0] // `this` is not a candidate because it is a constructor - } - - // `input` and `input` are source candidates, but not sink candidates (is-style method) - public Boolean isIgnored(Object input) { // $ negativeSinkExample=isIgnored(Object):Argument[this] sourceModelCandidate=isIgnored(Object):Parameter[this] negativeSinkExample=isIgnored(Object):Argument[0] sourceModelCandidate=isIgnored(Object):Parameter[0] - return false; - } -} diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicInterface.java b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicInterface.java deleted file mode 100644 index d4f80b3c6988..000000000000 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/com/github/codeql/test/PublicInterface.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.github.codeql.test; - -public interface PublicInterface { - public int stuff(String arg); // $ sinkModelCandidate=stuff(String):Argument[this] sourceModelCandidate=stuff(String):Parameter[this] sinkModelCandidate=stuff(String):Argument[0] sourceModelCandidate=stuff(String):Parameter[0] // result is _not_ a source candidate source (primitive return type) - - public static void staticStuff(String arg) { // $ sinkModelCandidate=staticStuff(String):Argument[0] // not a source candidate (static method) - System.out.println(arg); - } -} diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/java/io/File.java b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/java/io/File.java deleted file mode 100644 index 8bfe83e23397..000000000000 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/java/io/File.java +++ /dev/null @@ -1,13 +0,0 @@ -package java.io; - -public class File { - public int compareTo( // $ negativeSinkExample=compareTo(File):Argument[this] sourceModelCandidate=compareTo(File):Parameter[this] // modeled as neutral for sinks - File pathname // $ negativeSinkExample=compareTo(File):Argument[0] sourceModelCandidate=compareTo(File):Parameter[0] // modeled as neutral for sinks - ) { - return 0; - } - - public boolean setLastModified(long time) { // $ sinkModelCandidate=setLastModified(long):Argument[this] sourceModelCandidate=setLastModified(long):Parameter[this] // time is not a candidate (primitive type) - return false; - } // return value is not a source candidate because it's a primitive -} diff --git a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/java/nio/file/Files.java b/java/ql/automodel/test/AutomodelFrameworkModeExtraction/java/nio/file/Files.java deleted file mode 100644 index a833ba5f6e20..000000000000 --- a/java/ql/automodel/test/AutomodelFrameworkModeExtraction/java/nio/file/Files.java +++ /dev/null @@ -1,31 +0,0 @@ -package java.nio.file; - -import java.io.InputStream; -import java.io.FileInputStream; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.file.OpenOption; - -public class Files { - public static void copy( // method result is not a candidate source (void) - Path source, // $ positiveSinkExample=copy(Path,OutputStream):Argument[0](path-injection) // manual model exists - OutputStream out // $ sinkModelCandidate=copy(Path,OutputStream):Argument[1] - /* NB: may be worthwhile to implement the - same behavior as in application mode where out would not be a - candidate because there already is a model for another parameter of - the same method and we assume that methods are always modeled - completely. - */ - ) throws IOException { - // ... - } - - public static InputStream newInputStream( // $ sourceModelCandidate=newInputStream(Path,OpenOption[]):ReturnValue - Path openPath, // $ positiveSinkExample=newInputStream(Path,OpenOption[]):Argument[0](path-injection) sinkModelCandidate=newInputStream(Path,OpenOption[]):Argument[0] // known sink, but still a candidate (ai-modeled, and useful as a candidate in regression testing) - OpenOption... options // $ sinkModelCandidate=newInputStream(Path,OpenOption[]):Argument[1] - ) throws IOException { - return new FileInputStream(openPath.toFile()); - } -} diff --git a/java/ql/automodel/test/change-notes/2024-05-23-Version1.md b/java/ql/automodel/test/change-notes/2024-05-23-Version1.md deleted file mode 100644 index 5840e51017be..000000000000 --- a/java/ql/automodel/test/change-notes/2024-05-23-Version1.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -category: breaking ---- -* CodeQL package management is now generally available, and all GitHub-produced CodeQL packages have had their version numbers increased to 1.0.0. diff --git a/java/ql/automodel/test/qlpack.yml b/java/ql/automodel/test/qlpack.yml deleted file mode 100644 index 46138d9435c0..000000000000 --- a/java/ql/automodel/test/qlpack.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: codeql/java-automodel-tests -version: 1.0.0-dev -groups: - - java - - automodel - - test -dependencies: - codeql/java-all: ${workspace} - codeql/java-automodel-queries: ${workspace} - codeql/java-tests: ${workspace} -extractor: java -tests: . -warnOnImplicitThis: true \ No newline at end of file