pre-commit-src/pre-commit
author eyeokg <k.galczynski@eyeo.com>
Thu, 07 Nov 2024 17:08:15 +0100
changeset 25709 ca5e4f738e57
parent 25708 d3198dc01a79
child 25710 b3dddcc5f208
permissions -rwxr-xr-x
Improved pre_commit and tests, removed old pre-commit

#!/bin/bash

# Improve error handling, option e is not picked because it's expected for functions
# to return non-0 statuses.
set -Eeuo pipefail

# Allow user input during commit
exec < /dev/tty

templates_content='{}'
templates_names=()
unique_json_files=()
unique_filterlists_to_include=()
all_domains_variables_in_json_files='[]'
all_domains_variables_names='{}'
all_json_files_contents='{}'
variables_in_json_files='{}'
variables_in_included_filterlists='{}'
all_domain_variables_matches_in_filterlists='[]'
last_error=''
testing=false

error_handler() {
    local exit_code=$?
    local line_number=$1
    echo "Error: Script failed with exit code $exit_code at line $line_number"
    if [ "${BASH_COMMAND:-}" = "return 1" ]; then
        echo -e "Last error message:\n$last_error"
    else
        echo -e "\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
        echo "THIS ERROR SHOULDN'T HAPPEN, PLEASE REPORT IT TO AFB TEAM OR KRIS"
        echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n"
        echo "Last executed command: ${BASH_COMMAND:-}"
    fi
    if [ "$testing" = true ] && [ "${BASH_COMMAND:-}" = "return 1" ]; then
        exit 0
    else
        exit $exit_code
    fi
}

# Set up trap to catch errors and invoke the error handler
trap 'error_handler $LINENO' ERR

check_git_status() {
    local status=$(git status)

    local git_status=$(grep -s "Changes not staged for commit" <<< $status || grep -s "Untracked files" <<< $status || echo '')
    if ! [ -z "$git_status" ]; then
        read -p "There are changes not staged for commit. The script will check only the staged version. Do you want to continue? (y/n): " choice
        case "$choice" in 
            y|Y ) echo "";;
            n|N ) echo "Aborting."; exit 1;;
            * ) echo "Invalid choice. Aborting."; exit 1;;
        esac
    fi
}

check_if_jq_is_installed() {
    if ! command -v jq &>/dev/null; then
        last_error="ERROR: jq is not installed. Please install jq to continue."
        return 1
    fi
}

check_if_file_exists() {
    local file_path="$1"
    if ! [ -f "$file_path" ]; then
        last_error="ERROR: File $file_path does not exist"
        return 1
    fi
}

get_staged_version_of_a_file() {
    local file_name="$1"

    # Using name reference in order to not create subshells for each assignment and allow to use global variables
    # and global error handling
    local -n staged_file_content_nameref=$2

    check_if_file_exists "$file_name"
    if git show :"$file_name" >/dev/null 2>&1; then
        staged_file_content_nameref="$(git show :"$file_name")"
    else
        last_error="ERROR: File $file_name was requested by a template but it's not tracked neither staged."
        return 1
    fi
}
parse_template_data() {
    local template="$1"
    local -n file_data_nameref=$2
    local staged_template

    get_staged_version_of_a_file "$template" staged_template

    # Extract lines starting with %domainsVariables exceptionrules: and process them with jq
    local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< "$staged_template" | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]')
    # Extract lines starting with %include exceptionrules: and process them with jq
    local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< "$staged_template" | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]')

    if [ "$included_filterlists_files_in_template" = "[]" ]; then
        last_error="ERROR: There is no list included in template $template_name"
        return 1
    fi

    # Create a JSON object with the template name as the key and the extracted domainsVariables and include data as values
    file_data_nameref=$(jq -n --arg template "$template" --argjson json_files_in_template "$json_files_in_template" --argjson included_filterlists_files_in_template "$included_filterlists_files_in_template" '
        {
            $template: {
                "domainsVariables": $json_files_in_template,
                "include": $included_filterlists_files_in_template
            }
        }')
}

update_templates_content() {
    local file_template="$1"
    # Merge the new template data into the existing templates_content JSON
    templates_content=$(jq -n --argjson templates_content "$templates_content" --argjson file_template "$file_template" '$templates_content + $file_template')
}

update_unique_json_files() {
    local template_name="$1"
    local file_data="$2"
    # Extract the list of domainsVariables files from the template data
    local json_files_list=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$file_data")
    for json_file in $json_files_list; do
        local matches_in_unique_json_files=$(grep -swF "$json_file" <<< "${unique_json_files[@]}" || echo '')
        if [ -z "$matches_in_unique_json_files" ]; then
            unique_json_files+=("$json_file")
        fi
    done
}

update_unique_includes() {
    local template_name="$1"
    local file_data="$2"
    # Extract the list of included files from the template data
    local included_files_list=$(jq -r --arg file "$template_name" '.[$file].include[]' <<< "$file_data")

    for included_filterlist in $included_files_list; do
        local matches_in_unique_filterlists=$(grep -swF "$included_filterlist" <<< "${unique_filterlists_to_include[@]}" || echo '')
        if [ -z "$matches_in_unique_filterlists" ]; then
            unique_filterlists_to_include+=("$included_filterlist")
        fi
    done
}

check_if_valid_json() {
    local json_file_path="$1"
    if ! jq -e . >/dev/null 2>&1 <<< "$(cat "$json_file_path")"; then
        last_error="ERROR: Invalid JSON content in $json_file_path"
        return 1
    fi
}

get_domains_variables_names_incl_duplicates() {
    local json_file_content="$1"
    local -n domains_variables_names_incl_duplicated_nameref=$2

    # If jq would be used the duplicates would be automatically removed, therefore I used perl
    domains_variables_names_incl_duplicated_nameref=$(perl -0777 -ne 'print "$1\n" while /"([^"]+?)"(?=[\s\r\n]*:)/g' <<< $json_file_content)
}
check_if_correct_domains_variables_json_structure() {
    local json_file_content="$1"
    local json_file_path="$2"
    # Check if the JSON structure is valid and matches the expected format:
    # { "variable1": ["domain1", "domain2" (...)], "variable2": ["domain1", "domain3" (...)], (...)}
    if ! echo $json_file_content | jq -e '
        type == "object" and
        ([keys[] as $k | .[$k] | type == "array" and all(.[]; type == "string")] | all)
        '   >/dev/null 2>&1;
    then
        last_error="ERROR: JSON structure is invalid in $json_file_path"
        return 1
    fi
}

check_if_duplicated_domains_variable_name_in_single_file() {
    local json_file_path="$1"
    local domains_variables_names="$2"
    if [ "$(echo "$domains_variables_names" | sort | uniq -d)" ]; then
        last_error="ERROR: Duplicate domains_variables_names found in $json_file_path"
        return 1
    fi
}

check_if_valid_domains_variable_name() {
    local domains_variable_name="$1"
    if ! [[ "$domains_variable_name" =~ ^[[:alnum:]_]+$ ]]; then
        last_error="ERROR: Invalid domains variable name: $domains_variable_name, only alphanumeric characters and underscores are allowed"
        return 1
    fi
}

check_if_duplicated_domains() {
    local json_file_path="$1"
    local key="$2"
    local value="$3"
    # Value of domainsVariable is an array of strings representing domains,
    # therefore the error message mentions domains.
    if [ -n "$(echo "$value" | sort | uniq -di)" ]; then
        last_error="ERROR: There are duplicated domains in $key in file $json_file_path:\n"
        last_error+="$(echo "$value" | sort | uniq -di)"
        return 1
    fi
}

check_if_correct_domain() {
    local json_file_path="$1"
    local domains_variable_name="$2"
    local domains="$3"
    for domain in $domains; do
        # Check if the domain matches the expected pattern
        if [ -z $(grep -sP "^(?:(?:(?!-)[A-Za-z0-9-]{1,63}(?<!-)\.)*(?:[A-Za-z]{2,})$)|(?:^(?:(?!-)[A-Za-z0-9-]{1,63}(?<!-)\.)+\*)$" <<< "$domain") ]; then
            last_error="ERROR: Invalid domain in $domains_variable_name: $domain in file $json_file_path"
            return 1
        fi
    done
}

check_if_correct_domains_variables() {
    local json_file_path="$1"
    local domains_variables_names="$2"
    local json_file_content="$3"
    check_if_duplicated_domains_variable_name_in_single_file "$json_file_path" "$domains_variables_names"
    for domains_variable_name in $domains_variables_names; do
        check_if_valid_domains_variable_name "$domains_variable_name"
        # Extract the value associated with the domains variable name from the JSON file
        local domains=$(jq -r --arg key "$domains_variable_name" '.[$key][]' <<< "$json_file_content")

        check_if_duplicated_domains "$json_file_path" "$domains_variable_name" "$domains"
        check_if_correct_domain "$json_file_path" "$domains_variable_name" "$domains"
    done
}

update_domains_variables_data() {
    local domains_variables_names="$1"
    local json_file_path="$2"
    local json_file_content=$(jq -c . < "$json_file_path")
    
    # Merge the new domains variables into the existing all_domains_variables_in_json_files array
    all_domains_variables_in_json_files=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson domains_variables_names "$(echo "$domains_variables_names" | jq -R -s 'split("\n") | map(select(length > 0))')" '$all_domains_variables_in_json_files + $domains_variables_names | unique')

    # Add the JSON file content to the all_json_files_contents object
    all_json_files_contents=$(echo "$all_json_files_contents" | jq --arg json_file_path "$json_file_path" --argjson json_file_content "$json_file_content" '.[$json_file_path] = $json_file_content')

    # Update the variables_in_json_files object with the keys from the JSON file
    variables_in_json_files=$(echo "$variables_in_json_files" | jq --arg key "$json_file_path" --argjson value "$(echo "$all_json_files_contents" | jq 'to_entries | map({key: .key, value: (.value | keys)}) | from_entries' | jq --arg key "$json_file_path" '.[$key]')" '. * {($key): $value}')
}

check_if_duplicated_domains_variable_name_between_files() {
    local json_file_path="$1"
    local domains_variables_names="$2"
    for domains_variable_name in $domains_variables_names; do

        # Check if the domains variable name exists in the all_domains_variables_in_json_files array
        if jq -e --arg name "$domains_variable_name" 'index($name) != null' <<< "$all_domains_variables_in_json_files" >/dev/null 2>&1; then
            # If a duplicate is found, identify the file containing the duplicate and report an error
            local file_with_duplicate=$(jq -r --arg name "$domains_variable_name" '
            to_entries | map(select(.value | has($name))) | .[0].key
            ' <<< "$all_json_files_contents")
            last_error="ERROR: Duplicate domains variable found in $json_file_path and $file_with_duplicate file: $domains_variable_name"
            return 1
        fi
    done
}

find_domain_variables_syntax_in_filterlist() {
    local filterlist_content="$1"
    local filterlist_path="$2"
    local -n all_lines_with_domain_variables_in_filterlist_nameref=$3
    # Find lines containing domain variables in the filterlist
    # The regex to find them is simpler than in filterlist delivery to also catch domains variables in the wrong place
    # without starting with a complex regex. The full regex is in one of the next steps
    all_lines_with_domain_variables_in_filterlist_nameref=$(grep -P '%<\{.*\}>%' <<< "$filterlist_content" || echo '')
}

check_for_simiar_to_domain_variable() {
    local filterlist_content="$1"
    local filterlist_path="$2"

    local regex_for_missing_beginning='([^%]|^)<\{|%[^<]?\{|%<[^{]'
    local regex_for_missing_ending='[^}]>%|\}[^>]?%|\}>([^%]|$)'
    local full_regex_for_missing_character="($regex_for_missing_beginning).*($regex_for_missing_ending|\}>%)|($regex_for_missing_beginning|%<\{).*($regex_for_missing_ending)"
    local lines_with_similar_to_domain_variable

    lines_with_similar_to_domain_variable=$(grep -P "$full_regex_for_missing_character" <<< "$filterlist_content" || echo '')
    if [ -n "$lines_with_similar_to_domain_variable" ]; then
        last_error="ERROR: Found a line in $filterlist_path that is similar to a domain variable, but it's not a domain variable:\n\n"
        last_error+="$lines_with_similar_to_domain_variable"
        return 1
    fi
    }

process_filters() {
    local all_lines_with_domain_variables_in_filterlist="$1"
    local -n domains_variables_collected_from_filterlist_nameref="$2"
    domains_variables_collected_from_filterlist_nameref='[]'

    for filter in $all_lines_with_domain_variables_in_filterlist; do
        # Extract the domain variable from the filter
        local domains_variable_match=$(grep -oP '(?<=%<\{).*?(?=\}>%)' <<< "$filter")
        if [ "$(echo "$domains_variable_match" | wc -l)" -gt 1 ]; then
            last_error="ERROR: More than 2 domain variables found in filter: $filter"
            return 1
        fi
        # Ensure the domain variable is correctly formatted in the filter
        local true_matches
        true_matches=$(grep -P '(%<{(\w+)}>%(?:,~?[a-zA-Z0-9*.~-]+)*#[?@$]?#)|([,$]domain=(?:[a-zA-Z0-9*.~-]+\|)*%<{(\w+)}>%)' <<< "$filter")

        if [ -z "$true_matches" ]; then
            last_error="ERROR: Domain variable added in a wrong way in filter: $filter"
            return 1
        fi
        domains_variables_collected_from_filterlist_nameref=$(jq --arg domains_variable_match "$domains_variable_match" '. + [$domains_variable_match]' <<< "$domains_variables_collected_from_filterlist_nameref")
    done
}

update_matches_and_variables() {
    local domains_variables_collected_from_filterlist="$1"
    local file_path="$2"
    
    # Update the list of all domain variable matches in filterlists
    all_domain_variables_matches_in_filterlists=$(jq -n --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" --argjson matches "$domains_variables_collected_from_filterlist" '$all_domain_variables_matches_in_filterlists + $matches | unique')

    # Update the variables_in_included_filterlists object with the matches from the current filterlist
    variables_in_included_filterlists=$(jq --arg key "$file_path" --argjson value "$domains_variables_collected_from_filterlist" '.[$key] = $value' <<< "$variables_in_included_filterlists")
}

extract_domains_variables_in_included_filterlists() {

    local template_name="$1"
    local -n domains_variables_in_included_filterlists_nameref=$2
    # Extract the list of included filterlists from the template
    local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content")
    domains_variables_in_included_filterlists_nameref=()

    for included_filterlist in $included_filterlists; do
        # Extract the domain variables from each included filterlist
        local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< "$variables_in_included_filterlists")
        for domain_variable in $domains_variables; do
            domains_variables_in_included_filterlists_nameref+=("$domain_variable")
        done
    done
}

extract_domains_variables_in_included_json_files() {
    local template_name="$1"
    local -n domains_variables_in_included_json_files_nameref=$2

    # Extract the list of included JSON files from the template
    local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content")
    domains_variables_in_included_json_files_nameref=()

    for included_json_file in $included_json_files; do
        # Extract the domain variables from each included JSON file
        local domains_variables=$(jq -r --arg key "$included_json_file" '.[$key][]' <<< "$variables_in_json_files")
        for domain_variable in $domains_variables; do
            domains_variables_in_included_json_files_nameref+=("$domain_variable")
        done
    done
}

check_domain_variables_in_filterlists() {
    local template_name="$1"
    local domains_variables_in_included_filterlists=()
    local domains_variables_in_included_json_files=()

    # When for example $2 was empty, then the array had one element with empty string
    if [ -n "$2" ]; then
        domains_variables_in_included_filterlists=($2)
    fi
    if [ -n "$3" ]; then
        domains_variables_in_included_json_files=($3)
    fi

    # Extract the list of included filterlists and JSON files from the template
    local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content")
    local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content")

    for domain_variable_in_filterlist in ${domains_variables_in_included_filterlists[@]}; do
        local found=false
        for domain_variable_in_json_file in ${domains_variables_in_included_json_files[@]}; do
            if [ "$domain_variable_in_filterlist" = "$domain_variable_in_json_file" ]; then
                found=true
                break
            fi
        done
        if ! $found; then
            last_error="Error: One of the filterlists:\n\n"
            last_error+="$included_filterlists\n\n"
            last_error+="included in the template $template_name contain a domain variable $domain_variable_in_filterlist "
            last_error+="which wasn't found in any of the domains variables files included in that template:\n\n"
            last_error+="$included_json_files"
            return 1
        fi
    done
}

check_if_domains_variables_are_identical_in_lists_and_jsons() {
    if [ "$all_domains_variables_in_json_files" != "$all_domain_variables_matches_in_filterlists" ]; then
        last_error="Error: the domain variables in domain-variables file and the filter list are not the same\n"
        last_error+="Extra variables in domain-variables files:\n"
        last_error+="$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domains_variables_in_json_files - $all_domain_variables_matches_in_filterlists')\n"
        last_error+="Extra variables in filter lists:\n"
        last_error+=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domain_variables_matches_in_filterlists - $all_domains_variables_in_json_files')
        return 1
    fi
}


main() {
    check_git_status
    check_if_jq_is_installed
    for template_name in *.txt; do
        templates_names+=("$template_name")
        # To avoid creating a subshell, the variable is passed as a reference to parse_template_data function
        # That helps with the error handling and allows to use global variables
        local file_data
        # Parse data from the template
        parse_template_data "$template_name" file_data
        # Update the templates_content JSON with the data from the file
        update_templates_content "$file_data"

        update_unique_json_files "$template_name" "$file_data"
        update_unique_includes "$template_name" "$file_data"
    done

    for domains_variables_path in ${unique_json_files[@]}; do
        check_if_file_exists "$domains_variables_path"
        local staged_domains_variables_file
        get_staged_version_of_a_file "$domains_variables_path" staged_domains_variables_file
        check_if_correct_domains_variables_json_structure "$staged_domains_variables_file" "$domains_variables_path"
        local domains_variables_names_incl_duplicates
        get_domains_variables_names_incl_duplicates "$staged_domains_variables_file" domains_variables_names_incl_duplicates
        
        check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names_incl_duplicates" "$staged_domains_variables_file"

        check_if_duplicated_domains_variable_name_between_files "$domains_variables_path" "$domains_variables_names_incl_duplicates"
        update_domains_variables_data "$domains_variables_names_incl_duplicates" "$domains_variables_path"
    done

    for filterlist_path in ${unique_filterlists_to_include[@]}; do
        check_if_file_exists "$filterlist_path"
        local filterlist_content
        get_staged_version_of_a_file "$filterlist_path" filterlist_content

        check_for_simiar_to_domain_variable  "$filterlist_content" "$filterlist_path"

        local all_lines_with_domain_variables_in_filterlist
        find_domain_variables_syntax_in_filterlist "$filterlist_content" "$filterlist_path" all_lines_with_domain_variables_in_filterlist

        if [ -z "$all_lines_with_domain_variables_in_filterlist" ]; then
            # In case of lack of matches, the value of all_lines_with_domain_variables_in_filterlist should have just
            # a message to show.
            local domains_variables_collected_from_filterlist='[]'
        else
            local domains_variables_collected_from_filterlist
            process_filters "$all_lines_with_domain_variables_in_filterlist" domains_variables_collected_from_filterlist
        fi

        update_matches_and_variables "$domains_variables_collected_from_filterlist" "$filterlist_path"
    done

    local domains_variables_in_included_filterlists=()
    for template_name in ${templates_names[@]}; do
        extract_domains_variables_in_included_filterlists "$template_name" domains_variables_in_included_filterlists

        local domains_variables_in_included_json_files
        extract_domains_variables_in_included_json_files "$template_name" domains_variables_in_included_json_files
        check_domain_variables_in_filterlists "$template_name" "$(echo ${domains_variables_in_included_filterlists[@]})" "$(echo ${domains_variables_in_included_json_files[@]})"
    done

    check_if_domains_variables_are_identical_in_lists_and_jsons
}

check_unit_tests() {
    local exit_status=0


    ./pre-commit-src/tests/pre-commit-tests.sh || exit_status=1
    function_exit_code=$?
    if [ $exit_status -ne 1 ]; then
        exit_status=$function_exit_code
    fi

    if [ $exit_status -ne 0 ]; then
        last_error="Unit tests failed with exit code $exit_status"
        return $exit_status
    else
        last_error="Unit tests passed successfully"
    fi
}

check_pre_commit_files() {
    pre_commit_git_status=$(git status :pre-commit-src/pre-commit)
    logs_for_unstaged_changes_in_pre_commit=$(grep -s "Changes not staged for commit" <<< $pre_commit_git_status || grep -s "Untracked files" <<< $pre_commit_git_status || echo '')
    if ! [ -z "$logs_for_unstaged_changes_in_pre_commit" ]; then
        last_error="Unstaged changes detected in pre-commit file. Stage pre-commit changes before continuing."
        return 1
    fi

    pre_commit_tests_git_status=$(git status :pre-commit-src/tests/pre-commit-tests.sh)
    logs_for_unstaged_changes_in_pre_commit_tests=$(grep -s "Changes not staged for commit" <<< $pre_commit_tests_git_status || grep -s "Untracked files" <<< $pre_commit_tests_git_status || echo '')
    if ! [ -z "$logs_for_unstaged_changes_in_pre_commit_tests" ]; then
        last_error="Unstaged changes detected in pre-commit-tests file. Stage pre-commit tests changes before continuing."
        return 1
    fi

    # Only if something changed in pre commit or pre commit tests the unit tests should be run
    logs_for_commited_changes_in_pre_commit_or_test=$(grep -s "Changes to be committed" <<< $pre_commit_tests_git_status || grep -s "Changes to be committed" <<< $pre_commit_git_status || echo '')
    if ! [ -z "$logs_for_commited_changes_in_pre_commit_or_test" ]; then
        check_unit_tests
    fi
}


# For testing purposes only if the script has no arguments or the argument is main the process should run
# thanks to that the script can be tested without running the main function
if [ -z "${1:-}" ] || [ "$1" = "main" ]; then
    main
    check_pre_commit_files

    echo "Pre-commit checks passed successfully. Double check if there were no error messages above this message before pushing"
    exit 0

elif [ "$1" = "--load-only" ]; then
    testing=true
    echo "Script loaded successfully"
else
    "$@"
fi