Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions cycode/cli/apps/scan/code_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from cycode.cli.console import console
from cycode.cli.exceptions import custom_exceptions
from cycode.cli.exceptions.handle_scan_errors import handle_scan_exception
from cycode.cli.files_collector.excluder import exclude_irrelevant_documents_to_scan
from cycode.cli.files_collector.excluder import excluder
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
from cycode.cli.files_collector.path_documents import get_relevant_documents
from cycode.cli.files_collector.repository_documents import (
Expand Down Expand Up @@ -56,8 +56,8 @@ def scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None:
progress_bar_section=ScanProgressBarSection.PREPARE_LOCAL_FILES,
repo_path=repo_path,
)
git_head_documents = exclude_irrelevant_documents_to_scan(scan_type, git_head_documents)
pre_committed_documents = exclude_irrelevant_documents_to_scan(scan_type, pre_committed_documents)
git_head_documents = excluder.exclude_irrelevant_documents_to_scan(scan_type, git_head_documents)
pre_committed_documents = excluder.exclude_irrelevant_documents_to_scan(scan_type, pre_committed_documents)
sca_code_scanner.perform_pre_hook_range_scan_actions(repo_path, git_head_documents, pre_committed_documents)
scan_commit_range_documents(
ctx,
Expand All @@ -77,8 +77,8 @@ def scan_sca_commit_range(ctx: typer.Context, path: str, commit_range: str) -> N
from_commit_documents, to_commit_documents = get_commit_range_modified_documents(
progress_bar, ScanProgressBarSection.PREPARE_LOCAL_FILES, path, from_commit_rev, to_commit_rev
)
from_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, from_commit_documents)
to_commit_documents = exclude_irrelevant_documents_to_scan(scan_type, to_commit_documents)
from_commit_documents = excluder.exclude_irrelevant_documents_to_scan(scan_type, from_commit_documents)
to_commit_documents = excluder.exclude_irrelevant_documents_to_scan(scan_type, to_commit_documents)
sca_code_scanner.perform_pre_commit_range_scan_actions(
path, from_commit_documents, from_commit_rev, to_commit_documents, to_commit_rev
)
Expand Down Expand Up @@ -288,7 +288,7 @@ def scan_commit_range(
{'path': path, 'commit_range': commit_range, 'commit_id': commit_id},
)

documents_to_scan.extend(exclude_irrelevant_documents_to_scan(scan_type, commit_documents_to_scan))
documents_to_scan.extend(excluder.exclude_irrelevant_documents_to_scan(scan_type, commit_documents_to_scan))

logger.debug('List of commit ids to scan, %s', {'commit_ids': commit_ids_to_scan})
logger.debug('Starting to scan commit range (it may take a few minutes)')
Expand Down
4 changes: 2 additions & 2 deletions cycode/cli/apps/scan/pre_commit/pre_commit_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from cycode.cli import consts
from cycode.cli.apps.scan.code_scanner import get_scan_parameters, scan_documents, scan_sca_pre_commit
from cycode.cli.files_collector.excluder import exclude_irrelevant_documents_to_scan
from cycode.cli.files_collector.excluder import excluder
from cycode.cli.files_collector.repository_documents import (
get_diff_file_content,
get_diff_file_path,
Expand Down Expand Up @@ -45,5 +45,5 @@ def pre_commit_command(
progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES)
documents_to_scan.append(Document(get_path_by_os(get_diff_file_path(file)), get_diff_file_content(file)))

documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)
documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)
scan_documents(ctx, documents_to_scan, get_scan_parameters(ctx), is_git_diff=True)
4 changes: 2 additions & 2 deletions cycode/cli/apps/scan/repository/repository_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from cycode.cli import consts
from cycode.cli.apps.scan.code_scanner import get_scan_parameters, scan_documents
from cycode.cli.exceptions.handle_scan_errors import handle_scan_exception
from cycode.cli.files_collector.excluder import exclude_irrelevant_documents_to_scan
from cycode.cli.files_collector.excluder import excluder
from cycode.cli.files_collector.repository_documents import get_git_repository_tree_file_entries
from cycode.cli.files_collector.sca.sca_code_scanner import perform_pre_scan_documents_actions
from cycode.cli.logger import logger
Expand Down Expand Up @@ -57,7 +57,7 @@ def repository_command(
)
)

documents_to_scan = exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)
documents_to_scan = excluder.exclude_irrelevant_documents_to_scan(scan_type, documents_to_scan)

perform_pre_scan_documents_actions(ctx, scan_type, documents_to_scan)

Expand Down
9 changes: 8 additions & 1 deletion cycode/cli/apps/scan/scan_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
ISSUE_DETECTED_STATUS_CODE,
NO_ISSUES_STATUS_CODE,
)
from cycode.cli.files_collector.excluder import excluder
from cycode.cli.utils import scan_utils
from cycode.cli.utils.get_api_client import get_scan_cycode_client
from cycode.cli.utils.sentry import add_breadcrumb
Expand Down Expand Up @@ -138,13 +139,19 @@ def scan_command(

ctx.obj['show_secret'] = show_secret
ctx.obj['soft_fail'] = soft_fail
ctx.obj['client'] = get_scan_cycode_client(ctx)
ctx.obj['scan_type'] = scan_type
ctx.obj['sync'] = sync
ctx.obj['severity_threshold'] = severity_threshold
ctx.obj['monitor'] = monitor
ctx.obj['report'] = report

scan_client = get_scan_cycode_client(ctx)
ctx.obj['client'] = scan_client

remote_scan_config = scan_client.get_scan_configuration_safe(scan_type)
if remote_scan_config:
excluder.apply_scan_config(str(scan_type), remote_scan_config)

if export_type and export_file:
console_printer = ctx.obj['console_printer']
console_printer.enable_recording(export_type, export_file)
Expand Down
235 changes: 124 additions & 111 deletions cycode/cli/files_collector/excluder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,12 @@
if TYPE_CHECKING:
from cycode.cli.models import Document
from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection
from cycode.cyclient import models


logger = get_logger('File Excluder')


def exclude_irrelevant_files(
progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, filenames: list[str]
) -> list[str]:
relevant_files = []
for filename in filenames:
progress_bar.update(progress_bar_section)
if _is_relevant_file_to_scan(scan_type, filename):
relevant_files.append(filename)

is_sub_path.cache_clear() # free up memory

return relevant_files


def exclude_irrelevant_documents_to_scan(scan_type: str, documents_to_scan: list['Document']) -> list['Document']:
logger.debug('Excluding irrelevant documents to scan')

relevant_documents = []
for document in documents_to_scan:
if _is_relevant_document_to_scan(scan_type, document.path, document.content):
relevant_documents.append(document)

return relevant_documents


def _is_subpath_of_cycode_configuration_folder(filename: str) -> bool:
return (
is_sub_path(configuration_manager.global_config_file_manager.get_config_directory_path(), filename)
Expand All @@ -63,43 +39,6 @@ def _does_document_exceed_max_size_limit(content: str) -> bool:
return get_content_size(content) > consts.FILE_MAX_SIZE_LIMIT_IN_BYTES


def _is_relevant_file_to_scan(scan_type: str, filename: str) -> bool:
if _is_subpath_of_cycode_configuration_folder(filename):
logger.debug(
'The file is irrelevant because it is in the Cycode configuration directory, %s',
{'filename': filename, 'configuration_directory': consts.CYCODE_CONFIGURATION_DIRECTORY},
)
return False

if _is_path_configured_in_exclusions(scan_type, filename):
logger.debug('The file is irrelevant because its path is in the ignore paths list, %s', {'filename': filename})
return False

if not _is_file_extension_supported(scan_type, filename):
logger.debug(
'The file is irrelevant because its extension is not supported, %s',
{'scan_type': scan_type, 'filename': filename},
)
return False

if is_binary_file(filename):
logger.debug('The file is irrelevant because it is a binary file, %s', {'filename': filename})
return False

if scan_type != consts.SCA_SCAN_TYPE and _does_file_exceed_max_size_limit(filename):
logger.debug(
'The file is irrelevant because it has exceeded the maximum size limit, %s',
{
'max_file_size': consts.FILE_MAX_SIZE_LIMIT_IN_BYTES,
'file_size': get_file_size(filename),
'filename': filename,
},
)
return False

return not (scan_type == consts.SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan(filename))


def _is_file_relevant_for_sca_scan(filename: str) -> bool:
if any(sca_excluded_path in filename for sca_excluded_path in consts.SCA_EXCLUDED_PATHS):
logger.debug(
Expand All @@ -110,52 +49,126 @@ def _is_file_relevant_for_sca_scan(filename: str) -> bool:
return True


def _is_relevant_document_to_scan(scan_type: str, filename: str, content: str) -> bool:
if _is_subpath_of_cycode_configuration_folder(filename):
logger.debug(
'The document is irrelevant because it is in the Cycode configuration directory, %s',
{'filename': filename, 'configuration_directory': consts.CYCODE_CONFIGURATION_DIRECTORY},
)
return False

if _is_path_configured_in_exclusions(scan_type, filename):
logger.debug(
'The document is irrelevant because its path is in the ignore paths list, %s', {'filename': filename}
)
return False

if not _is_file_extension_supported(scan_type, filename):
logger.debug(
'The document is irrelevant because its extension is not supported, %s',
{'scan_type': scan_type, 'filename': filename},
)
return False

if is_binary_content(content):
logger.debug('The document is irrelevant because it is a binary file, %s', {'filename': filename})
return False

if scan_type != consts.SCA_SCAN_TYPE and _does_document_exceed_max_size_limit(content):
logger.debug(
'The document is irrelevant because it has exceeded the maximum size limit, %s',
{
'max_document_size': consts.FILE_MAX_SIZE_LIMIT_IN_BYTES,
'document_size': get_content_size(content),
'filename': filename,
},
)
return False

return True


def _is_file_extension_supported(scan_type: str, filename: str) -> bool:
filename = filename.lower()

if scan_type == consts.IAC_SCAN_TYPE:
return filename.endswith(consts.IAC_SCAN_SUPPORTED_FILES)

if scan_type == consts.SCA_SCAN_TYPE:
return filename.endswith(consts.SCA_CONFIGURATION_SCAN_SUPPORTED_FILES)

return not filename.endswith(consts.SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE)
class Excluder:
def __init__(self) -> None:
self._scannable_extensions: dict[str, tuple[str, ...]] = {
consts.IAC_SCAN_TYPE: consts.IAC_SCAN_SUPPORTED_FILES,
consts.SCA_SCAN_TYPE: consts.SCA_CONFIGURATION_SCAN_SUPPORTED_FILES,
}
self._non_scannable_extensions: dict[str, tuple[str, ...]] = {
consts.SECRET_SCAN_TYPE: consts.SECRET_SCAN_FILE_EXTENSIONS_TO_IGNORE,
}

def apply_scan_config(self, scan_type: str, scan_config: 'models.ScanConfiguration') -> None:
if scan_config.scannable_extensions:
self._scannable_extensions[scan_type] = tuple(scan_config.scannable_extensions)

def _is_file_extension_supported(self, scan_type: str, filename: str) -> bool:
filename = filename.lower()

scannable_extensions = self._scannable_extensions.get(scan_type)
if scannable_extensions:
return filename.endswith(scannable_extensions)

non_scannable_extensions = self._non_scannable_extensions.get(scan_type)
if non_scannable_extensions:
return not filename.endswith(non_scannable_extensions)

return True

def _is_relevant_file_to_scan_common(self, scan_type: str, filename: str) -> bool:
if _is_subpath_of_cycode_configuration_folder(filename):
logger.debug(
'The document is irrelevant because it is in the Cycode configuration directory, %s',
{'filename': filename, 'configuration_directory': consts.CYCODE_CONFIGURATION_DIRECTORY},
)
return False

if _is_path_configured_in_exclusions(scan_type, filename):
logger.debug(
'The document is irrelevant because its path is in the ignore paths list, %s', {'filename': filename}
)
return False

if not self._is_file_extension_supported(scan_type, filename):
logger.debug(
'The document is irrelevant because its extension is not supported, %s',
{'scan_type': scan_type, 'filename': filename},
)
return False

return True

def _is_relevant_file_to_scan(self, scan_type: str, filename: str) -> bool:
if not self._is_relevant_file_to_scan_common(scan_type, filename):
return False

if is_binary_file(filename):
logger.debug('The file is irrelevant because it is a binary file, %s', {'filename': filename})
return False

if scan_type != consts.SCA_SCAN_TYPE and _does_file_exceed_max_size_limit(filename):
logger.debug(
'The file is irrelevant because it has exceeded the maximum size limit, %s',
{
'max_file_size': consts.FILE_MAX_SIZE_LIMIT_IN_BYTES,
'file_size': get_file_size(filename),
'filename': filename,
},
)
return False

return not (scan_type == consts.SCA_SCAN_TYPE and not _is_file_relevant_for_sca_scan(filename))

def _is_relevant_document_to_scan(self, scan_type: str, filename: str, content: str) -> bool:
if not self._is_relevant_file_to_scan_common(scan_type, filename):
return False

if is_binary_content(content):
logger.debug('The document is irrelevant because it is a binary file, %s', {'filename': filename})
return False

if scan_type != consts.SCA_SCAN_TYPE and _does_document_exceed_max_size_limit(content):
logger.debug(
'The document is irrelevant because it has exceeded the maximum size limit, %s',
{
'max_document_size': consts.FILE_MAX_SIZE_LIMIT_IN_BYTES,
'document_size': get_content_size(content),
'filename': filename,
},
)
return False

return True

def exclude_irrelevant_files(
self,
progress_bar: 'BaseProgressBar',
progress_bar_section: 'ProgressBarSection',
scan_type: str,
filenames: list[str],
) -> list[str]:
relevant_files = []
for filename in filenames:
progress_bar.update(progress_bar_section)
if self._is_relevant_file_to_scan(scan_type, filename):
relevant_files.append(filename)

is_sub_path.cache_clear() # free up memory

return relevant_files

def exclude_irrelevant_documents_to_scan(
self, scan_type: str, documents_to_scan: list['Document']
) -> list['Document']:
logger.debug('Excluding irrelevant documents to scan')

relevant_documents = []
for document in documents_to_scan:
if self._is_relevant_document_to_scan(scan_type, document.path, document.content):
relevant_documents.append(document)

return relevant_documents


excluder = Excluder()
Loading