Skip to content

Commit

Permalink
Merge pull request #29 from ajinabraham/speed
Browse files Browse the repository at this point in the history
libsast 2.0.0 bump
  • Loading branch information
ajinabraham authored Aug 30, 2023
2 parents 6f34ce3 + af2f1c9 commit 4e6111b
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 45 deletions.
2 changes: 1 addition & 1 deletion libsast/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
__title__ = 'libsast'
__authors__ = 'Ajin Abraham'
__copyright__ = 'Copyright 2020 Ajin Abraham, OpenSecurity'
__version__ = '1.5.3'
__version__ = '2.0.0'
__version_info__ = tuple(int(i) for i in __version__.split('.'))
__all__ = [
'Scanner',
Expand Down
51 changes: 38 additions & 13 deletions libsast/core_matcher/choice_matcher.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# -*- coding: utf_8 -*-
"""Choice Macher."""
from pathlib import Path
from multiprocessing import (
Pool,
)

from libsast.core_matcher.helpers import (
get_rules,
Expand Down Expand Up @@ -31,6 +34,7 @@ def scan(self, paths: list) -> dict:
if not (self.scan_rules and paths):
return
self.validate_rules()
choice_args = []
if self.show_progress:
pbar = common.ProgressBar('Choice Match', len(self.scan_rules))
self.scan_rules = pbar.progrees_loop(self.scan_rules)
Expand All @@ -39,7 +43,15 @@ def scan(self, paths: list) -> dict:
if rule['type'] != 'code' and self.alternative_path:
# Scan only alternative path
scan_paths = [Path(self.alternative_path)]
self.choice_matcher(scan_paths, rule)
choice_args.append((scan_paths, rule))

# Multiprocess Pool
with Pool() as pool:
results = pool.starmap(
self.choice_matcher,
choice_args,
chunksize=10)
self.add_finding(results)
return self.findings

def validate_rules(self):
Expand All @@ -66,6 +78,7 @@ def validate_rules(self):

def choice_matcher(self, scan_paths, rule):
"""Run a Single Choice Matcher rule on all files."""
results = []
try:
matches = set()
all_matches = set()
Expand All @@ -89,22 +102,34 @@ def choice_matcher(self, scan_paths, rule):
elif isinstance(match, list):
# or, and
matches.add(match[0])
self.add_finding(rule, matches, all_matches)
results.append({
'rule': rule,
'matches': matches,
'all_matches': all_matches,
})
except Exception:
raise exceptions.RuleProcessingError('Rule processing error.')
return results

def add_finding(self, rule, matches, all_matches):
def add_finding(self, results):
"""Add Choice Findings."""
if all_matches:
selection = rule['selection'].format(list(all_matches))
elif matches:
select = rule['choice'][min(matches)][1]
selection = rule['selection'].format(select)
elif rule.get('else'):
selection = rule['selection'].format(rule['else'])
else:
return
self.findings[rule['id']] = self.get_meta(rule, selection)
for res_list in results:
if not res_list:
continue
for match_dict in res_list:
all_matches = match_dict['all_matches']
matches = match_dict['matches']
rule = match_dict['rule']
if all_matches:
selection = rule['selection'].format(list(all_matches))
elif matches:
select = rule['choice'][min(matches)][1]
selection = rule['selection'].format(select)
elif rule.get('else'):
selection = rule['selection'].format(rule['else'])
else:
continue
self.findings[rule['id']] = self.get_meta(rule, selection)

def get_meta(self, rule, selection):
"""Get Finding Meta."""
Expand Down
80 changes: 50 additions & 30 deletions libsast/core_matcher/pattern_matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
"""Pattern Macher."""
from copy import deepcopy
from operator import itemgetter
from multiprocessing import (
Pool,
)

from libsast.core_matcher.helpers import (
get_rules,
Expand Down Expand Up @@ -35,16 +38,22 @@ def scan(self, paths: list) -> dict:
if self.show_progress:
pbar = common.ProgressBar('Pattern Match', len(paths))
paths = pbar.progrees_loop(paths)
files_to_scan = set()
for sfile in paths:
ext = sfile.suffix.lower()
if self.exts and ext not in self.exts:
if self.exts and sfile.suffix.lower() not in self.exts:
continue
if sfile.stat().st_size / 1000 / 1000 > 5:
# Skip scanning files greater than 5 MB
print(f'Skipping large file {sfile.as_posix()}')
continue
data = sfile.read_text('utf-8', 'ignore')
self.pattern_matcher(data, sfile, ext)
files_to_scan.add(sfile)
# Multiprocess Pool
with Pool() as pool:
results = pool.map(
self.pattern_matcher,
files_to_scan,
chunksize=10)
self.add_finding(results)
return self.findings

def validate_rules(self):
Expand All @@ -68,9 +77,11 @@ def validate_rules(self):
f' Available matchers are {supported}',
)

def pattern_matcher(self, data, file_path, ext):
def pattern_matcher(self, file_path):
"""Static Analysis Pattern Matcher."""
results = []
try:
data = file_path.read_text('utf-8', 'ignore')
for rule in self.scan_rules:
case = rule.get('input_case')
if case == 'lower':
Expand All @@ -79,7 +90,7 @@ def pattern_matcher(self, data, file_path, ext):
tmp_data = data.upper()
else:
tmp_data = data
if ext in ('.html', '.xml'):
if file_path.suffix.lower() in ('.html', '.xml'):
fmt_data = strip_comments2(tmp_data)
else:
fmt_data = strip_comments(tmp_data)
Expand All @@ -88,31 +99,40 @@ def pattern_matcher(self, data, file_path, ext):
fmt_data,
rule)
if matches:
self.add_finding(file_path, rule, matches)
results.append({
'file': file_path.as_posix(),
'rule': rule,
'matches': matches,
})
except Exception:
raise exceptions.RuleProcessingError('Rule processing error.')
return results

def add_finding(self, file_path, rule, matches):
def add_finding(self, results):
"""Add Code Analysis Findings."""
for match in matches:
crule = deepcopy(rule)
file_details = {
'file_path': file_path.as_posix(),
'match_string': match[0],
'match_position': match[1],
'match_lines': match[2],
}
if rule['id'] in self.findings:
self.findings[rule['id']]['files'].append(file_details)
else:
metadata = crule.get('metadata', {})
metadata['description'] = crule['message']
metadata['severity'] = crule['severity']
self.findings[rule['id']] = {
'files': [file_details],
'metadata': metadata,
}
to_sort = self.findings[rule['id']]['files']
self.findings[rule['id']]['files'] = sorted(
to_sort,
key=itemgetter('file_path', 'match_string', 'match_lines'))
for res_list in results:
if not res_list:
continue
for match_dict in res_list:
rule = match_dict['rule']
for match in match_dict['matches']:
crule = deepcopy(rule)
file_details = {
'file_path': match_dict['file'],
'match_string': match[0],
'match_position': match[1],
'match_lines': match[2],
}
if rule['id'] in self.findings:
self.findings[rule['id']]['files'].append(file_details)
else:
metadata = crule.get('metadata', {})
metadata['description'] = crule['message']
metadata['severity'] = crule['severity']
self.findings[rule['id']] = {
'files': [file_details],
'metadata': metadata,
}
self.findings[rule['id']]['files'] = sorted(
self.findings[rule['id']]['files'],
key=itemgetter('file_path', 'match_string', 'match_lines'))
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "libsast"
version = "1.5.3"
version = "2.0.0"
description = "A generic SAST library built on top of semgrep and regex"
keywords = ["libsast", "SAST", "Python SAST", "SAST API", "Regex SAST", "Pattern Matcher"]
authors = ["Ajin Abraham <[email protected]>"]
Expand Down

0 comments on commit 4e6111b

Please sign in to comment.