Improved pre-commit check
authoreyeokg <k.galczynski@eyeo.com>
Wed, 30 Oct 2024 16:46:48 +0100
changeset 25706 4cbb4d574255
parent 25705 d7bf4e5f34aa
child 25707 2a19e3a985b6
Improved pre-commit check
exceptionrules/domains-variables/domains-variables2.json
pre-commit
--- a/exceptionrules/domains-variables/domains-variables2.json
+++ b/exceptionrules/domains-variables/domains-variables2.json
@@ -1,10 +1,6 @@
 {
     "bbbba": [
         "uponit1.com",
         "uponit2.com"
-    ],
-    "aaaaa": [
-        "taboola1.com",
-        "taboola2.com"
     ]
 }
\ No newline at end of file
--- a/pre-commit
+++ b/pre-commit
@@ -1,72 +1,123 @@
 #!/bin/bash
 
 # Improve error handling, option e is not picked because it's expected for functions
 # to return non-0 statuses.
 set -Eeuo pipefail
 
+# Allow user input during commit
+exec < /dev/tty
+
 templates_content='{}'
 templates_names=()
 unique_json_files=()
 unique_filterlists_to_include=()
 all_domains_variables='[]'
 all_domains_variables_names='{}'
 all_json_files_contents='{}'
 variables_in_json_files='{}'
 variables_in_filterlists='{}'
 all_domain_variables_matches_in_filterlists='[]'
 last_error=''
+testing=false
 
 error_handler() {
     local exit_code=$?
     local line_number=$1
     echo "Error: Script failed with exit code $exit_code at line $line_number"
     if [ "$BASH_COMMAND" = "return 1" ]; then
         echo -e "Last error message:\n$last_error"
     else
         echo -e "\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
         echo "THIS ERROR SHOULDN'T HAPPEN, PLEASE REPORT IT TO AFB TEAM OR KRIS"
         echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n"
         echo "Last executed command: $BASH_COMMAND"
     fi
-
-    exit $exit_code
+    if [ "$testing" = true ]; then
+        exit 0
+    else
+        exit $exit_code
+    fi
 }
 
 # Set up trap to catch errors and invoke the error handler
-trap 'error_handler $LINENO $BASH_COMMAND' ERR
+trap 'error_handler $LINENO' ERR
+
+check_git_status() {
+    local status=$(git status)
+
+    if grep -q "Changes not staged for commit" <<< $status || grep -q "Untracked files" <<< $status; then
+        read -p "There are changes not staged for commit. The script will check only the staged version. Do you want to continue? (y/n): " choice
+        case "$choice" in 
+            # Echo empty line
+            y|Y ) echo "";;
+            n|N ) echo "Aborting."; exit 1;;
+            * ) echo "Invalid choice. Aborting."; exit 1;;
+        esac
+    fi
+}
 
 check_if_jq_is_installed() {
     if ! command -v jq &>/dev/null; then
         last_error="ERROR: jq is not installed. Please install jq to continue."
         return 1
     fi
 }
 
+check_if_file_exists() {
+    local file_path="$1"
+    if ! [ -f "$file_path" ]; then
+        last_error="ERROR: File $file_path does not exist"
+        return 1
+    fi
+}
+
+get_staged_version_of_a_file() {
+    local file_name="$1"
+
+    # Using name reference in order to not create subshells for each assignment and allow to use global variables
+    # and global error handling
+    local -n staged_file_content_nameref=$2
+
+    check_if_file_exists "$file_name"
+    if git show :"$file_name" >/dev/null 2>&1; then
+        staged_file_content_nameref="$(git show :"$file_name")"
+    else
+        last_error="ERROR: File $file_name was requested by a template but it's not tracked neither staged."
+        return 1
+    fi
+}
 parse_template_data() {
     local template="$1"
+    local -n file_data_nameref=$2
+    local staged_template
+
+    get_staged_version_of_a_file "$template" staged_template
+
     # Extract lines starting with %domainsVariables exceptionrules: and process them with jq
-    local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" "$template" | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))')
+    local json_files_in_template
+    json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< "$staged_template" | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))') || json_files_in_template="[]"
     # Extract lines starting with %include exceptionrules: and process them with jq
-    local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" "$template" | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))')
+    local included_filterlists_files_in_template
+    included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< "$staged_template" | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))') || included_filterlists_files_in_template="[]"
 
-    if [ $(jq length <<< "$included_filterlists_files_in_template") -eq 0 ]; then
+    if [ "$included_filterlists_files_in_template" = "[]" ]; then
         last_error="ERROR: There is no list included in template $template_name"
         return 1
     fi
 
     # Create a JSON object with the template name as the key and the extracted domainsVariables and include data as values
-    jq -n --arg template "$template" --argjson json_files_in_template "$json_files_in_template" --argjson included_filterlists_files_in_template "$included_filterlists_files_in_template" '
+    file_data_nameref=$(jq -n --arg template "$template" --argjson json_files_in_template "$json_files_in_template" --argjson included_filterlists_files_in_template "$included_filterlists_files_in_template" '
         {
             $template: {
                 "domainsVariables": $json_files_in_template,
                 "include": $included_filterlists_files_in_template
             }
-        }'
+        }')
 }
 
 update_templates_content() {
     local file_template="$1"
     # Merge the new template data into the existing templates_content JSON
     templates_content=$(jq -n --argjson templates_content "$templates_content" --argjson file_template "$file_template" '$templates_content + $file_template')
 }
 
@@ -90,40 +141,32 @@ update_unique_includes() {
 
     for included_filterlist in $included_files_list; do
         if ! grep -qwF "$included_filterlist" <<< "${unique_filterlists_to_include[@]}"; then
             unique_filterlists_to_include+=("$included_filterlist")
         fi
     done
 }
 
-check_if_file_exists() {
-    local file_path="$1"
-    if ! [ -f "$file_path" ]; then
-        last_error="ERROR: File $file_path does not exist"
-        return 1
-    fi
-}
-
 check_if_valid_json() {
     local json_file_path="$1"
     if ! jq -e . >/dev/null 2>&1 <<< "$(cat "$json_file_path")"; then
         last_error="ERROR: Invalid JSON content in $json_file_path"
         return 1
     fi
 }
 
 check_if_correct_domains_variables_json_structure() {
-    local json_file_path="$1"
+    local json_file_content="$1"
     # Check if the JSON structure is valid and matches the expected format:
     # { "variable1": ["domain1", "domain2" (...)], "variable2": ["domain1", "domain3" (...)], (...)}
-    if ! jq -e '
+    if ! echo $json_file_content | jq -e '
         type == "object" and
         ([keys[] as $k | .[$k] | type == "array" and all(.[]; type == "string")] | all)
-        ' "$json_file_path"  >/dev/null 2>&1;
+        '   >/dev/null 2>&1;
     then
         last_error="ERROR: JSON structure is invalid in $json_file_path"
         return 1
     fi
 }
 
 check_if_duplicated_domains_variable_name_in_single_file() {
     local json_file_path="$1"
@@ -166,23 +209,24 @@ check_if_correct_domain() {
             return 1
         fi
     done
 }
 
 check_if_correct_domains_variables() {
     local json_file_path="$1"
     local domains_variables_names="$2"
-
+    local json_file_content="$3"
     check_if_duplicated_domains_variable_name_in_single_file "$json_file_path" "$domains_variables_names"
 
     for domains_variable_name in $domains_variables_names; do
         check_if_valid_domains_variable_name "$domains_variable_name"
         # Extract the value associated with the domains variable name from the JSON file
-        local domains=$(jq -r --arg key "$domains_variable_name" '.[$key][]' "$json_file_path")
+        local domains=$(jq -r --arg key "$domains_variable_name" '.[$key][]' <<< "$json_file_content")
+
         check_if_duplicated_domains "$json_file_path" "$domains_variable_name" "$domains"
         check_if_correct_domain "$json_file_path" "$domains_variable_name" "$domains"
     done
 }
 
 update_domains_variables_data() {
     local domains_variables_names="$1"
     local json_file_path="$2"
@@ -213,96 +257,91 @@ check_if_duplicated_domains_variable_nam
             return 1
         fi
     done
 }
 
 find_domain_variables_in_filterlist() {
     local filterlist_content="$1"
     local filterlist_path="$2"
-    
+    local -n all_lines_with_domain_variables_in_filterlist_nameref=$3
     # Find lines containing domain variables in the filterlist
-    local all_lines_with_domain_variables_in_filterlist=$(grep -P '%<\{.*\}>%' <<< "$filterlist_content") || {
-        variables_in_filterlists=$(jq --arg key "$filterlist_path" --argjson value "[]" '.[$key] = $value' <<< "$variables_in_filterlists")
-        echo "No domains variables found in $filterlist_path"
-        return 0
-    }
-    echo -n "$all_lines_with_domain_variables_in_filterlist"
+    all_lines_with_domain_variables_in_filterlist_nameref=$(grep -P '%<\{.*\}>%' <<< "$filterlist_content") || all_lines_with_domain_variables_in_filterlist_nameref=''
 }
 
 process_filters() {
     local all_lines_with_domain_variables_in_filterlist="$1"
-    local collected_matches='[]'
+    local -n domains_variables_collected_from_filterlist_nameref="$2"
+    domains_variables_collected_from_filterlist_nameref='[]'
 
     for filter in $all_lines_with_domain_variables_in_filterlist; do
         # Extract the domain variable from the filter
         local domains_variable_match=$(grep -oP '(?<=%<\{).*?(?=\}>%)' <<< "$filter")
         if [ "$(echo "$domains_variable_match" | wc -l)" -gt 1 ]; then
             last_error="ERROR: More than 2 domain variables found in filter: $filter"
             return 1
         fi
         # Ensure the domain variable is correctly formatted in the filter
         local true_matches=$(grep -P '(%<{(\w+)}>%(?:,~?[a-zA-Z0-9*.~-]+)*#[?@$]?#)|([,$]domain=(?:[a-zA-Z0-9*.~-]+\|)*%<{(\w+)}>%)' <<< "$filter")
 
         if [ -z "$true_matches" ]; then
             last_error="ERROR: Domain variable added in a wrong way in filter: $filter"
             return 1
         fi
-        collected_matches=$(jq --arg domains_variable_match "$domains_variable_match" '. + [$domains_variable_match]' <<< "$collected_matches")
+        domains_variables_collected_from_filterlist_nameref=$(jq --arg domains_variable_match "$domains_variable_match" '. + [$domains_variable_match]' <<< "$domains_variables_collected_from_filterlist_nameref")
     done
-
-    # To avoid \n at the end
-    echo -n "$collected_matches"
 }
 
 update_matches_and_variables() {
-    local collected_matches="$1"
+    local domains_variables_collected_from_filterlist="$1"
     local file_path="$2"
     
     # Update the list of all domain variable matches in filterlists
-    all_domain_variables_matches_in_filterlists=$(jq -n --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" --argjson matches "$collected_matches" '$all_domain_variables_matches_in_filterlists + $matches | unique')
+    all_domain_variables_matches_in_filterlists=$(jq -n --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" --argjson matches "$domains_variables_collected_from_filterlist" '$all_domain_variables_matches_in_filterlists + $matches | unique')
+
     # Update the variables_in_filterlists object with the matches from the current filterlist
-    variables_in_filterlists=$(jq --arg key "$file_path" --argjson value "$collected_matches" '.[$key] = $value' <<< "$variables_in_filterlists")
+    variables_in_filterlists=$(jq --arg key "$file_path" --argjson value "$domains_variables_collected_from_filterlist" '.[$key] = $value' <<< "$variables_in_filterlists")
 }
 
 extract_domains_variables_in_included_filterlists() {
     local template_name="$1"
+    local -n domains_variables_in_included_filterlists_nameref=$2
     # Extract the list of included filterlists from the template
     local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content")
-    local domains_variables_in_included_filterlists=()
+    domains_variables_in_included_filterlists_nameref=()
 
     for included_filterlist in $included_filterlists; do
         # Extract the domain variables from each included filterlist
         local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< "$variables_in_filterlists")
         for domain_variable in $domains_variables; do
-            domains_variables_in_included_filterlists+=("$domain_variable")
+            domains_variables_in_included_filterlists_nameref+=("$domain_variable")
         done
     done
 
-    echo "${domains_variables_in_included_filterlists[@]}"
 }
 
 extract_domains_variables_in_included_json_files() {
     local template_name="$1"
+    local -n domains_variables_in_included_json_files_nameref=$2
+
     # Extract the list of included JSON files from the template
     local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content")
-    local domains_variables_in_included_json_files=()
+    domains_variables_in_included_json_files_nameref=()
 
     for included_json_file in $included_json_files; do
         # Extract the domain variables from each included JSON file
         local domains_variables=$(jq -r --arg key "$included_json_file" '.[$key][]' <<< "$variables_in_json_files")
         for domain_variable in $domains_variables; do
-            domains_variables_in_included_json_files+=("$domain_variable")
+            domains_variables_in_included_json_files_nameref+=("$domain_variable")
         done
     done
-
-    echo "${domains_variables_in_included_json_files[@]}"
 }
 
 check_domain_variables_in_filterlists() {
+
     local template_name="$1"
     local domains_variables_in_included_filterlists=()
     local domains_variables_in_included_json_files=()
 
     # When for example $2 was empty, then the array had one element with empty string
     if [ -n "$2" ]; then
         domains_variables_in_included_filterlists=($2)
     fi
@@ -320,17 +359,17 @@ check_domain_variables_in_filterlists() 
             if [ "$domain_variable_in_filterlist" = "$domain_variable_in_json_file" ]; then
                 found=true
                 break
             fi
         done
         if ! $found; then
             last_error="Error: One of the filterlists:\n\n"
             last_error+="$included_filterlists\n\n"
-            last_error+="included in the template $template_name contain a domain variable $domain_variable_in_filterlist"
+            last_error+="included in the template $template_name contain a domain variable $domain_variable_in_filterlist "
             last_error+="which wasn't found in any of the domains variables files included in that template:\n\n"
             last_error+="$included_json_files"
             return 1
         fi
     done
 }
 
 check_if_domains_variables_are_identical_in_lists_and_jsons() {
@@ -340,72 +379,95 @@ check_if_domains_variables_are_identical
         last_error+="$(jq -n --argjson all_domains_variables "$all_domains_variables" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domains_variables - $all_domain_variables_matches_in_filterlists')\n"
         last_error+="Extra variables in filter lists:\n"
         last_error+=$(jq -n --argjson all_domains_variables "$all_domains_variables" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domain_variables_matches_in_filterlists - $all_domains_variables')
         return 1
     fi
 }
 
 main() {
+    check_git_status
     check_if_jq_is_installed
     for template_name in *.txt; do
-    echo $template_name
         templates_names+=("$template_name")
-        # local variable is declared before to be able to capture exit status of the function
+        # To avoid creating a subshell, the variable is passed as a reference to parse_template_data function
+        # That helps with the error handling and allows to use global variables
         local file_data
         # Parse data from the template
-        file_data=$(parse_template_data "$template_name")
-        if [ $? -ne 0 ]; then
-            last_error="$file_data"
-            return 1
-        fi
+        parse_template_data "$template_name" file_data
         # Update the templates_content JSON with the data from the file
         update_templates_content "$file_data"
 
         update_unique_json_files "$template_name" "$file_data"
         update_unique_includes "$template_name" "$file_data"
     done
 
     for domains_variables_path in ${unique_json_files[@]}; do
         check_if_file_exists "$domains_variables_path"
-        check_if_correct_domains_variables_json_structure "$domains_variables_path"
+        local staged_domains_variables_file
+        get_staged_version_of_a_file "$domains_variables_path" staged_domains_variables_file
+        check_if_correct_domains_variables_json_structure "$staged_domains_variables_file"
 
         # If jq would be used the duplicates would be automatically removed, therefore I used perl
-        local domains_variables_names=$(cat "$domains_variables_path" | perl -0777 -ne 'print "$1\n" while /"([^"]+?)"(?=[\s\r\n]*:)/g')
-        check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names"
+        local domains_variables_names=$(perl -0777 -ne 'print "$1\n" while /"([^"]+?)"(?=[\s\r\n]*:)/g' <<< $staged_domains_variables_file)
+        check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names" "$staged_domains_variables_file"
         check_if_duplicated_domains_variable_name_between_files "$domains_variables_path" "$domains_variables_names"
         update_domains_variables_data "$domains_variables_names" "$domains_variables_path"
     done
 
     for filterlist_path in ${unique_filterlists_to_include[@]}; do
         check_if_file_exists "$filterlist_path"
-        local filterlist_content=$(cat "$filterlist_path")
+        local filterlist_content
+        get_staged_version_of_a_file "$filterlist_path" filterlist_content
 
         local all_lines_with_domain_variables_in_filterlist
         # This regex check is simpler than in filterlist delivery to also catch domains variables in the wrong place
         # without starting with a complex regex. The full regex is in one of the next steps
-        all_lines_with_domain_variables_in_filterlist=$(find_domain_variables_in_filterlist "$filterlist_content" "$filterlist_path")
-        if [ "$all_lines_with_domain_variables_in_filterlist" = "No domains variables found in $filterlist_path" ]; then
-            echo "$all_lines_with_domain_variables_in_filterlist"
-            local collected_matches='[]'
+
+        find_domain_variables_in_filterlist "$filterlist_content" "$filterlist_path" all_lines_with_domain_variables_in_filterlist
+
+        if [ -z "$all_lines_with_domain_variables_in_filterlist" ]; then
+            # In case of lack of matches, the value of all_lines_with_domain_variables_in_filterlist should have just
+            # a message to show.
+            local domains_variables_collected_from_filterlist='[]'
         else
-            local collected_matches
-            collected_matches=$(process_filters "$all_lines_with_domain_variables_in_filterlist")
+            local domains_variables_collected_from_filterlist
+            process_filters "$all_lines_with_domain_variables_in_filterlist" domains_variables_collected_from_filterlist
         fi
-        update_matches_and_variables "$collected_matches" "$filterlist_path"
+
+        update_matches_and_variables "$domains_variables_collected_from_filterlist" "$filterlist_path"
     done
 
+    local domains_variables_in_included_filterlists=()
     for template_name in ${templates_names[@]}; do
-        local domains_variables_in_included_filterlists
-        domains_variables_in_included_filterlists=$(extract_domains_variables_in_included_filterlists "$template_name")
+        extract_domains_variables_in_included_filterlists "$template_name" domains_variables_in_included_filterlists
 
-        local domains_variables_in_included_json_files=$(extract_domains_variables_in_included_json_files "$template_name")
-
-        check_domain_variables_in_filterlists "$template_name" "$domains_variables_in_included_filterlists" "$domains_variables_in_included_json_files"
+        local domains_variables_in_included_json_files
+        extract_domains_variables_in_included_json_files "$template_name" domains_variables_in_included_json_files
+        check_domain_variables_in_filterlists "$template_name" "$(echo ${domains_variables_in_included_filterlists[@]})" "$(echo ${domains_variables_in_included_json_files[@]})"
     done
 
     check_if_domains_variables_are_identical_in_lists_and_jsons
 }
 
-main
+# For testing purposes only if the script has no arguments or the argument is main the process should run
+# thanks to that the script can be tested without running the main function
+if [ -z "${1:-}" ] || [ "$1" = "main" ]; then
+    main
+    ./pre-commit-src/tests/pre-commit-tests.sh || exit_code=1
+    function_exit_code=$?
+    if [ $exit_code -ne 1 ]; then
+        exit_code=$function_exit_code
+    fi
 
-echo "All tests passed"
-exit 0
\ No newline at end of file
+    if [ $exit_code -ne 0 ]; then
+        echo "Tests failed with exit code $exit_code"
+    else
+        echo "Tests passed successfully"
+    fi
+    exit $exit_code
+
+elif [ "$1" = "--load-only" ]; then
+    testing=true
+    echo "Script loaded successfully"
+else
+    "$@"
+fi