diff --git a/pre-commit-src/pre-commit b/pre-commit-src/pre-commit --- a/pre-commit-src/pre-commit +++ b/pre-commit-src/pre-commit @@ -6,54 +6,54 @@ set -Eeuo pipefail # Allow user input during commit exec < /dev/tty templates_content='{}' templates_names=() unique_json_files=() unique_filterlists_to_include=() -all_domains_variables='[]' +all_domains_variables_in_json_files='[]' all_domains_variables_names='{}' all_json_files_contents='{}' variables_in_json_files='{}' -variables_in_filterlists='{}' +variables_in_included_filterlists='{}' all_domain_variables_matches_in_filterlists='[]' last_error='' testing=false error_handler() { local exit_code=$? local line_number=$1 echo "Error: Script failed with exit code $exit_code at line $line_number" - if [ "$BASH_COMMAND" = "return 1" ]; then + if [ "${BASH_COMMAND:-}" = "return 1" ]; then echo -e "Last error message:\n$last_error" else echo -e "\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" echo "THIS ERROR SHOULDN'T HAPPEN, PLEASE REPORT IT TO AFB TEAM OR KRIS" echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n" - echo "Last executed command: $BASH_COMMAND" + echo "Last executed command: ${BASH_COMMAND:-}" fi - if [ "$testing" = true ]; then + if [ "$testing" = true ] && [ "${BASH_COMMAND:-}" = "return 1" ]; then exit 0 else exit $exit_code fi } # Set up trap to catch errors and invoke the error handler trap 'error_handler $LINENO' ERR check_git_status() { local status=$(git status) - if grep -q "Changes not staged for commit" <<< $status || grep -q "Untracked files" <<< $status; then + local git_status=$(grep -s "Changes not staged for commit" <<< $status || grep -s "Untracked files" <<< $status || echo '') + if ! [ -z "$git_status" ]; then read -p "There are changes not staged for commit. The script will check only the staged version. Do you want to continue? (y/n): " choice case "$choice" in - # Echo empty line y|Y ) echo "";; n|N ) echo "Aborting."; exit 1;; * ) echo "Invalid choice. Aborting."; exit 1;; esac fi } check_if_jq_is_installed() { @@ -89,21 +89,19 @@ get_staged_version_of_a_file() { parse_template_data() { local template="$1" local -n file_data_nameref=$2 local staged_template get_staged_version_of_a_file "$template" staged_template # Extract lines starting with %domainsVariables exceptionrules: and process them with jq - local json_files_in_template - json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< "$staged_template" | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))') || json_files_in_template="[]" + local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< "$staged_template" | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]') # Extract lines starting with %include exceptionrules: and process them with jq - local included_filterlists_files_in_template - included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< "$staged_template" | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))') || included_filterlists_files_in_template="[]" + local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< "$staged_template" | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]') if [ "$included_filterlists_files_in_template" = "[]" ]; then last_error="ERROR: There is no list included in template $template_name" return 1 fi # Create a JSON object with the template name as the key and the extracted domainsVariables and include data as values file_data_nameref=$(jq -n --arg template "$template" --argjson json_files_in_template "$json_files_in_template" --argjson included_filterlists_files_in_template "$included_filterlists_files_in_template" ' @@ -122,45 +120,55 @@ update_templates_content() { } update_unique_json_files() { local template_name="$1" local file_data="$2" # Extract the list of domainsVariables files from the template data local json_files_list=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$file_data") for json_file in $json_files_list; do - if ! grep -qwF "$json_file" <<< "${unique_json_files[@]}"; then + local matches_in_unique_json_files=$(grep -swF "$json_file" <<< "${unique_json_files[@]}" || echo '') + if [ -z "$matches_in_unique_json_files" ]; then unique_json_files+=("$json_file") fi done } update_unique_includes() { local template_name="$1" local file_data="$2" # Extract the list of included files from the template data local included_files_list=$(jq -r --arg file "$template_name" '.[$file].include[]' <<< "$file_data") for included_filterlist in $included_files_list; do - if ! grep -qwF "$included_filterlist" <<< "${unique_filterlists_to_include[@]}"; then + local matches_in_unique_filterlists=$(grep -swF "$included_filterlist" <<< "${unique_filterlists_to_include[@]}" || echo '') + if [ -z "$matches_in_unique_filterlists" ]; then unique_filterlists_to_include+=("$included_filterlist") fi done } check_if_valid_json() { local json_file_path="$1" if ! jq -e . >/dev/null 2>&1 <<< "$(cat "$json_file_path")"; then last_error="ERROR: Invalid JSON content in $json_file_path" return 1 fi } +get_domains_variables_names_incl_duplicates() { + local json_file_content="$1" + local -n domains_variables_names_incl_duplicated_nameref=$2 + + # If jq would be used the duplicates would be automatically removed, therefore I used perl + domains_variables_names_incl_duplicated_nameref=$(perl -0777 -ne 'print "$1\n" while /"([^"]+?)"(?=[\s\r\n]*:)/g' <<< $json_file_content) +} check_if_correct_domains_variables_json_structure() { local json_file_content="$1" + local json_file_path="$2" # Check if the JSON structure is valid and matches the expected format: # { "variable1": ["domain1", "domain2" (...)], "variable2": ["domain1", "domain3" (...)], (...)} if ! echo $json_file_content | jq -e ' type == "object" and ([keys[] as $k | .[$k] | type == "array" and all(.[]; type == "string")] | all) ' >/dev/null 2>&1; then last_error="ERROR: JSON structure is invalid in $json_file_path" @@ -199,128 +207,147 @@ check_if_duplicated_domains() { } check_if_correct_domain() { local json_file_path="$1" local domains_variable_name="$2" local domains="$3" for domain in $domains; do # Check if the domain matches the expected pattern - if ! echo "$domain" | grep -qP "^(?:(?:(?!-)[A-Za-z0-9-]{1,63}(? 0))')" '$all_domains_variables + $domains_variables_names | unique') + # Merge the new domains variables into the existing all_domains_variables_in_json_files array + all_domains_variables_in_json_files=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson domains_variables_names "$(echo "$domains_variables_names" | jq -R -s 'split("\n") | map(select(length > 0))')" '$all_domains_variables_in_json_files + $domains_variables_names | unique') # Add the JSON file content to the all_json_files_contents object all_json_files_contents=$(echo "$all_json_files_contents" | jq --arg json_file_path "$json_file_path" --argjson json_file_content "$json_file_content" '.[$json_file_path] = $json_file_content') # Update the variables_in_json_files object with the keys from the JSON file variables_in_json_files=$(echo "$variables_in_json_files" | jq --arg key "$json_file_path" --argjson value "$(echo "$all_json_files_contents" | jq 'to_entries | map({key: .key, value: (.value | keys)}) | from_entries' | jq --arg key "$json_file_path" '.[$key]')" '. * {($key): $value}') } check_if_duplicated_domains_variable_name_between_files() { local json_file_path="$1" local domains_variables_names="$2" for domains_variable_name in $domains_variables_names; do - # Check if the domains variable name exists in the all_domains_variables array - if jq -e --arg name "$domains_variable_name" 'index($name) != null' <<< "$all_domains_variables" >/dev/null 2>&1; then + # Check if the domains variable name exists in the all_domains_variables_in_json_files array + if jq -e --arg name "$domains_variable_name" 'index($name) != null' <<< "$all_domains_variables_in_json_files" >/dev/null 2>&1; then # If a duplicate is found, identify the file containing the duplicate and report an error local file_with_duplicate=$(jq -r --arg name "$domains_variable_name" ' to_entries | map(select(.value | has($name))) | .[0].key ' <<< "$all_json_files_contents") last_error="ERROR: Duplicate domains variable found in $json_file_path and $file_with_duplicate file: $domains_variable_name" return 1 fi done } -find_domain_variables_in_filterlist() { +find_domain_variables_syntax_in_filterlist() { local filterlist_content="$1" local filterlist_path="$2" local -n all_lines_with_domain_variables_in_filterlist_nameref=$3 # Find lines containing domain variables in the filterlist - all_lines_with_domain_variables_in_filterlist_nameref=$(grep -P '%<\{.*\}>%' <<< "$filterlist_content") || all_lines_with_domain_variables_in_filterlist_nameref='' + # The regex to find them is simpler than in filterlist delivery to also catch domains variables in the wrong place + # without starting with a complex regex. The full regex is in one of the next steps + all_lines_with_domain_variables_in_filterlist_nameref=$(grep -P '%<\{.*\}>%' <<< "$filterlist_content" || echo '') } +check_for_simiar_to_domain_variable() { + local filterlist_content="$1" + local filterlist_path="$2" + + local regex_for_missing_beginning='([^%]|^)<\{|%[^<]?\{|%<[^{]' + local regex_for_missing_ending='[^}]>%|\}[^>]?%|\}>([^%]|$)' + local full_regex_for_missing_character="($regex_for_missing_beginning).*($regex_for_missing_ending|\}>%)|($regex_for_missing_beginning|%<\{).*($regex_for_missing_ending)" + local lines_with_similar_to_domain_variable + + lines_with_similar_to_domain_variable=$(grep -P "$full_regex_for_missing_character" <<< "$filterlist_content" || echo '') + if [ -n "$lines_with_similar_to_domain_variable" ]; then + last_error="ERROR: Found a line in $filterlist_path that is similar to a domain variable, but it's not a domain variable:\n\n" + last_error+="$lines_with_similar_to_domain_variable" + return 1 + fi + } + process_filters() { local all_lines_with_domain_variables_in_filterlist="$1" local -n domains_variables_collected_from_filterlist_nameref="$2" domains_variables_collected_from_filterlist_nameref='[]' for filter in $all_lines_with_domain_variables_in_filterlist; do # Extract the domain variable from the filter local domains_variable_match=$(grep -oP '(?<=%<\{).*?(?=\}>%)' <<< "$filter") if [ "$(echo "$domains_variable_match" | wc -l)" -gt 1 ]; then last_error="ERROR: More than 2 domain variables found in filter: $filter" return 1 fi # Ensure the domain variable is correctly formatted in the filter - local true_matches=$(grep -P '(%<{(\w+)}>%(?:,~?[a-zA-Z0-9*.~-]+)*#[?@$]?#)|([,$]domain=(?:[a-zA-Z0-9*.~-]+\|)*%<{(\w+)}>%)' <<< "$filter") + local true_matches + true_matches=$(grep -P '(%<{(\w+)}>%(?:,~?[a-zA-Z0-9*.~-]+)*#[?@$]?#)|([,$]domain=(?:[a-zA-Z0-9*.~-]+\|)*%<{(\w+)}>%)' <<< "$filter") if [ -z "$true_matches" ]; then last_error="ERROR: Domain variable added in a wrong way in filter: $filter" return 1 fi domains_variables_collected_from_filterlist_nameref=$(jq --arg domains_variable_match "$domains_variable_match" '. + [$domains_variable_match]' <<< "$domains_variables_collected_from_filterlist_nameref") done } update_matches_and_variables() { local domains_variables_collected_from_filterlist="$1" local file_path="$2" # Update the list of all domain variable matches in filterlists all_domain_variables_matches_in_filterlists=$(jq -n --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" --argjson matches "$domains_variables_collected_from_filterlist" '$all_domain_variables_matches_in_filterlists + $matches | unique') - # Update the variables_in_filterlists object with the matches from the current filterlist - variables_in_filterlists=$(jq --arg key "$file_path" --argjson value "$domains_variables_collected_from_filterlist" '.[$key] = $value' <<< "$variables_in_filterlists") + # Update the variables_in_included_filterlists object with the matches from the current filterlist + variables_in_included_filterlists=$(jq --arg key "$file_path" --argjson value "$domains_variables_collected_from_filterlist" '.[$key] = $value' <<< "$variables_in_included_filterlists") } extract_domains_variables_in_included_filterlists() { + local template_name="$1" local -n domains_variables_in_included_filterlists_nameref=$2 # Extract the list of included filterlists from the template local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content") domains_variables_in_included_filterlists_nameref=() for included_filterlist in $included_filterlists; do # Extract the domain variables from each included filterlist - local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< "$variables_in_filterlists") + local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< "$variables_in_included_filterlists") for domain_variable in $domains_variables; do domains_variables_in_included_filterlists_nameref+=("$domain_variable") done done - } extract_domains_variables_in_included_json_files() { local template_name="$1" local -n domains_variables_in_included_json_files_nameref=$2 # Extract the list of included JSON files from the template local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content") @@ -331,17 +358,16 @@ extract_domains_variables_in_included_js local domains_variables=$(jq -r --arg key "$included_json_file" '.[$key][]' <<< "$variables_in_json_files") for domain_variable in $domains_variables; do domains_variables_in_included_json_files_nameref+=("$domain_variable") done done } check_domain_variables_in_filterlists() { - local template_name="$1" local domains_variables_in_included_filterlists=() local domains_variables_in_included_json_files=() # When for example $2 was empty, then the array had one element with empty string if [ -n "$2" ]; then domains_variables_in_included_filterlists=($2) fi @@ -368,26 +394,27 @@ check_domain_variables_in_filterlists() last_error+="which wasn't found in any of the domains variables files included in that template:\n\n" last_error+="$included_json_files" return 1 fi done } check_if_domains_variables_are_identical_in_lists_and_jsons() { - if [ "$all_domains_variables" != "$all_domain_variables_matches_in_filterlists" ]; then + if [ "$all_domains_variables_in_json_files" != "$all_domain_variables_matches_in_filterlists" ]; then last_error="Error: the domain variables in domain-variables file and the filter list are not the same\n" last_error+="Extra variables in domain-variables files:\n" - last_error+="$(jq -n --argjson all_domains_variables "$all_domains_variables" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domains_variables - $all_domain_variables_matches_in_filterlists')\n" + last_error+="$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domains_variables_in_json_files - $all_domain_variables_matches_in_filterlists')\n" last_error+="Extra variables in filter lists:\n" - last_error+=$(jq -n --argjson all_domains_variables "$all_domains_variables" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domain_variables_matches_in_filterlists - $all_domains_variables') + last_error+=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domain_variables_matches_in_filterlists - $all_domains_variables_in_json_files') return 1 fi } + main() { check_git_status check_if_jq_is_installed for template_name in *.txt; do templates_names+=("$template_name") # To avoid creating a subshell, the variable is passed as a reference to parse_template_data function # That helps with the error handling and allows to use global variables local file_data @@ -399,35 +426,35 @@ main() { update_unique_json_files "$template_name" "$file_data" update_unique_includes "$template_name" "$file_data" done for domains_variables_path in ${unique_json_files[@]}; do check_if_file_exists "$domains_variables_path" local staged_domains_variables_file get_staged_version_of_a_file "$domains_variables_path" staged_domains_variables_file - check_if_correct_domains_variables_json_structure "$staged_domains_variables_file" + check_if_correct_domains_variables_json_structure "$staged_domains_variables_file" "$domains_variables_path" + local domains_variables_names_incl_duplicates + get_domains_variables_names_incl_duplicates "$staged_domains_variables_file" domains_variables_names_incl_duplicates + + check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names_incl_duplicates" "$staged_domains_variables_file" - # If jq would be used the duplicates would be automatically removed, therefore I used perl - local domains_variables_names=$(perl -0777 -ne 'print "$1\n" while /"([^"]+?)"(?=[\s\r\n]*:)/g' <<< $staged_domains_variables_file) - check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names" "$staged_domains_variables_file" - check_if_duplicated_domains_variable_name_between_files "$domains_variables_path" "$domains_variables_names" - update_domains_variables_data "$domains_variables_names" "$domains_variables_path" + check_if_duplicated_domains_variable_name_between_files "$domains_variables_path" "$domains_variables_names_incl_duplicates" + update_domains_variables_data "$domains_variables_names_incl_duplicates" "$domains_variables_path" done for filterlist_path in ${unique_filterlists_to_include[@]}; do check_if_file_exists "$filterlist_path" local filterlist_content get_staged_version_of_a_file "$filterlist_path" filterlist_content + check_for_simiar_to_domain_variable "$filterlist_content" "$filterlist_path" + local all_lines_with_domain_variables_in_filterlist - # This regex check is simpler than in filterlist delivery to also catch domains variables in the wrong place - # without starting with a complex regex. The full regex is in one of the next steps - - find_domain_variables_in_filterlist "$filterlist_content" "$filterlist_path" all_lines_with_domain_variables_in_filterlist + find_domain_variables_syntax_in_filterlist "$filterlist_content" "$filterlist_path" all_lines_with_domain_variables_in_filterlist if [ -z "$all_lines_with_domain_variables_in_filterlist" ]; then # In case of lack of matches, the value of all_lines_with_domain_variables_in_filterlist should have just # a message to show. local domains_variables_collected_from_filterlist='[]' else local domains_variables_collected_from_filterlist process_filters "$all_lines_with_domain_variables_in_filterlist" domains_variables_collected_from_filterlist @@ -463,39 +490,44 @@ check_unit_tests() { return $exit_status else last_error="Unit tests passed successfully" fi } check_pre_commit_files() { pre_commit_git_status=$(git status :pre-commit-src/pre-commit) - - if grep -q "Changes not staged for commit" <<< $pre_commit_git_status || grep -q "Untracked files" <<< $pre_commit_git_status; then + logs_for_unstaged_changes_in_pre_commit=$(grep -s "Changes not staged for commit" <<< $pre_commit_git_status || grep -s "Untracked files" <<< $pre_commit_git_status || echo '') + if ! [ -z "$logs_for_unstaged_changes_in_pre_commit" ]; then last_error="Unstaged changes detected in pre-commit file. Stage pre-commit changes before continuing." return 1 fi pre_commit_tests_git_status=$(git status :pre-commit-src/tests/pre-commit-tests.sh) - if grep -q "Changes not staged for commit" <<< $pre_commit_tests_git_status || grep -q "Untracked files" <<< $pre_commit_tests_git_status; then - last_error="Unstaged changes detected in pre-commit-tests file. Stage pre-commit changes before continuing." + logs_for_unstaged_changes_in_pre_commit_tests=$(grep -s "Changes not staged for commit" <<< $pre_commit_tests_git_status || grep -s "Untracked files" <<< $pre_commit_tests_git_status || echo '') + if ! [ -z "$logs_for_unstaged_changes_in_pre_commit_tests" ]; then + last_error="Unstaged changes detected in pre-commit-tests file. Stage pre-commit tests changes before continuing." return 1 fi - if grep -q "Changes to be committed" <<< $pre_commit_tests_git_status || grep -q "Changes to be committed" <<< $pre_commit_git_status; then + + # Only if something changed in pre commit or pre commit tests the unit tests should be run + logs_for_commited_changes_in_pre_commit_or_test=$(grep -s "Changes to be committed" <<< $pre_commit_tests_git_status || grep -s "Changes to be committed" <<< $pre_commit_git_status || echo '') + if ! [ -z "$logs_for_commited_changes_in_pre_commit_or_test" ]; then check_unit_tests fi } # For testing purposes only if the script has no arguments or the argument is main the process should run # thanks to that the script can be tested without running the main function if [ -z "${1:-}" ] || [ "$1" = "main" ]; then main check_pre_commit_files + echo "Pre-commit checks passed successfully. Double check if there were no error messages above this message before pushing" exit 0 elif [ "$1" = "--load-only" ]; then testing=true echo "Script loaded successfully" else "$@" fi