--- a/pre-commit
+++ b/pre-commit
@@ -1,40 +1,411 @@
-#!/bin/sh
+#!/bin/bash
+
+# Improve error handling, option e is not picked because it's expected for functions
+# to return non-0 statuses.
+set -Eeuo pipefail
+
+templates_content='{}'
+templates_names=()
+unique_json_files=()
+unique_filterlists_to_include=()
+all_domains_variables='[]'
+all_domains_variables_names='{}'
+all_json_files_contents='{}'
+variables_in_json_files='{}'
+variables_in_filterlists='{}'
+all_domain_variables_matches_in_filterlists='[]'
+last_error=''
+
+error_handler() {
+ local exit_code=$?
+ local line_number=$1
+ echo "Error: Script failed with exit code $exit_code at line $line_number"
+ if [ "$BASH_COMMAND" = "return 1" ]; then
+ echo -e "Last error message:\n$last_error"
+ else
+ echo -e "\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
+ echo "THIS ERROR SHOULDN'T HAPPEN, PLEASE REPORT IT TO AFB TEAM OR KRIS"
+ echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n"
+ echo "Last executed command: $BASH_COMMAND"
+ fi
+
+ exit $exit_code
+}
-if ! [ -f "./exceptionrules/domains-variables.json" ]; then
- echo "File does not exist"
- exit 1
-fi
-staged_domains_variables=$(git show :./exceptionrules/domains-variables.json 2>/dev/null)
+# Set up trap to catch errors and invoke the error handler
+trap 'error_handler $LINENO $BASH_COMMAND' ERR
+
+check_if_jq_is_installed() {
+ if ! command -v jq &>/dev/null; then
+ last_error="ERROR: jq is not installed. Please install jq to continue."
+ return 1
+ fi
+}
+
+parse_template_data() {
+ local template="$1"
+ # Extract lines starting with %domainsVariables exceptionrules: and process them with jq
+ local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" "$template" | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))')
+ # Extract lines starting with %include exceptionrules: and process them with jq
+ local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" "$template" | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))')
-if ! [ -n "$staged_domains_variables" ]; then
- echo "No changes in domains variables"
- exit 0
-fi
+ if [ $(jq length <<< "$included_filterlists_files_in_template") -eq 0 ]; then
+ last_error="ERROR: There is no list included in template $template_name"
+ return 1
+ fi
+
+ # Create a JSON object with the template name as the key and the extracted domainsVariables and include data as values
+ jq -n --arg template "$template" --argjson json_files_in_template "$json_files_in_template" --argjson included_filterlists_files_in_template "$included_filterlists_files_in_template" '
+ {
+ $template: {
+ "domainsVariables": $json_files_in_template,
+ "include": $included_filterlists_files_in_template
+ }
+ }'
+}
+
+update_templates_content() {
+ local file_template="$1"
+ # Merge the new template data into the existing templates_content JSON
+ templates_content=$(jq -n --argjson templates_content "$templates_content" --argjson file_template "$file_template" '$templates_content + $file_template')
+}
-if ! jq -e . >/dev/null 2>&1 <<<"$staged_domains_variables"; then
- echo "Error: Invalid JSON content"
- exit 1
-fi
+update_unique_json_files() {
+ local template_name="$1"
+ local file_data="$2"
+ # Extract the list of domainsVariables files from the template data
+ local json_files_list=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$file_data")
+ for json_file in $json_files_list; do
+ if ! grep -qwF "$json_file" <<< "${unique_json_files[@]}"; then
+ unique_json_files+=("$json_file")
+ fi
+ done
+}
+
+update_unique_includes() {
+ local template_name="$1"
+ local file_data="$2"
+ # Extract the list of included files from the template data
+ local included_files_list=$(jq -r --arg file "$template_name" '.[$file].include[]' <<< "$file_data")
-object=$(jq '.' <<< "$staged_domains_variables")
-keys=$(jq 'keys' <<< "$object")
+ for included_filterlist in $included_files_list; do
+ if ! grep -qwF "$included_filterlist" <<< "${unique_filterlists_to_include[@]}"; then
+ unique_filterlists_to_include+=("$included_filterlist")
+ fi
+ done
+}
+
+check_if_file_exists() {
+ local file_path="$1"
+ if ! [ -f "$file_path" ]; then
+ last_error="ERROR: File $file_path does not exist"
+ return 1
+ fi
+}
+
+check_if_valid_json() {
+ local json_file_path="$1"
+ if ! jq -e . >/dev/null 2>&1 <<< "$(cat "$json_file_path")"; then
+ last_error="ERROR: Invalid JSON content in $json_file_path"
+ return 1
+ fi
+}
-for key in $(jq -r '.[]' <<< "$keys"); do
- if ! [[ "$key" =~ ^[[:alnum:]_]+$ ]]; then
- echo "invalid key: $key, only alphanumeric characters and underscores are allowed"
- invalid=true
+check_if_correct_domains_variables_json_structure() {
+ local json_file_path="$1"
+ # Check if the JSON structure is valid and matches the expected format:
+ # { "variable1": ["domain1", "domain2" (...)], "variable2": ["domain1", "domain3" (...)], (...)}
+ if ! jq -e '
+ type == "object" and
+ ([keys[] as $k | .[$k] | type == "array" and all(.[]; type == "string")] | all)
+ ' "$json_file_path" >/dev/null 2>&1;
+ then
+ last_error="ERROR: JSON structure is invalid in $json_file_path"
+ return 1
+ fi
+}
+
+check_if_duplicated_domains_variable_name_in_single_file() {
+ local json_file_path="$1"
+ local domains_variables_names="$2"
+ if [ "$(echo "$domains_variables_names" | sort | uniq -d)" ]; then
+ last_error="ERROR: Duplicate domains_variables_names found in $json_file_path"
+ return 1
fi
- elements=$(jq -r ".[\"$key\"][]" <<< "$object")
- for element in $elements; do
- if ! [[ "$element" =~ ^[[:alnum:]*][[:alnum:]*.-]+$ ]]; then
- echo "invalid domain in $key: $element"
- invalid=true
+}
+
+check_if_valid_domains_variable_name() {
+ local domains_variable_name="$1"
+ if ! [[ "$domains_variable_name" =~ ^[[:alnum:]_]+$ ]]; then
+ last_error="ERROR: Invalid domains variable name: $domains_variable_name, only alphanumeric characters and underscores are allowed"
+ return 1
+ fi
+}
+
+check_if_duplicated_domains() {
+ local json_file_path="$1"
+ local key="$2"
+ local value="$3"
+ # Value of domainsVariable is an array of strings representing domains,
+ # therefore the error message mentions domains.
+ if [ -n "$(echo "$value" | sort | uniq -di)" ]; then
+ last_error="ERROR: There are duplicated domains in $key in file $json_file_path:\n"
+ last_error+="$(echo "$value" | sort | uniq -di)"
+ return 1
+ fi
+}
+
+check_if_correct_domain() {
+ local json_file_path="$1"
+ local domains_variable_name="$2"
+ local domains="$3"
+ for domain in $domains; do
+ # Check if the domain matches the expected pattern
+ if ! echo "$domain" | grep -qP "^(?:(?:(?!-)[A-Za-z0-9-]{1,63}(?<!-)\.)*(?:[A-Za-z]{2,})$)|(?:^(?:(?!-)[A-Za-z0-9-]{1,63}(?<!-)\.)+\*)$"; then
+ last_error="ERROR: Invalid domain in $domains_variable_name: $domain in file $json_file_path"
+ return 1
fi
done
-done
+}
+
+check_if_correct_domains_variables() {
+ local json_file_path="$1"
+ local domains_variables_names="$2"
+
+ check_if_duplicated_domains_variable_name_in_single_file "$json_file_path" "$domains_variables_names"
+
+ for domains_variable_name in $domains_variables_names; do
+ check_if_valid_domains_variable_name "$domains_variable_name"
+ # Extract the value associated with the domains variable name from the JSON file
+ local domains=$(jq -r --arg key "$domains_variable_name" '.[$key][]' "$json_file_path")
+ check_if_duplicated_domains "$json_file_path" "$domains_variable_name" "$domains"
+ check_if_correct_domain "$json_file_path" "$domains_variable_name" "$domains"
+ done
+}
+
+update_domains_variables_data() {
+ local domains_variables_names="$1"
+ local json_file_path="$2"
+ local json_file_content=$(jq -c . < "$json_file_path")
+
+ # Merge the new domains variables into the existing all_domains_variables array
+ all_domains_variables=$(jq -n --argjson all_domains_variables "$all_domains_variables" --argjson domains_variables_names "$(echo "$domains_variables_names" | jq -R -s 'split("\n") | map(select(length > 0))')" '$all_domains_variables + $domains_variables_names | unique')
+
+ # Add the JSON file content to the all_json_files_contents object
+ all_json_files_contents=$(echo "$all_json_files_contents" | jq --arg json_file_path "$json_file_path" --argjson json_file_content "$json_file_content" '.[$json_file_path] = $json_file_content')
+
+ # Update the variables_in_json_files object with the keys from the JSON file
+ variables_in_json_files=$(echo "$variables_in_json_files" | jq --arg key "$json_file_path" --argjson value "$(echo "$all_json_files_contents" | jq 'to_entries | map({key: .key, value: (.value | keys)}) | from_entries' | jq --arg key "$json_file_path" '.[$key]')" '. * {($key): $value}')
+}
+
+check_if_duplicated_domains_variable_name_between_files() {
+ local json_file_path="$1"
+ local domains_variables_names="$2"
+ for domains_variable_name in $domains_variables_names; do
+
+ # Check if the domains variable name exists in the all_domains_variables array
+ if jq -e --arg name "$domains_variable_name" 'index($name) != null' <<< "$all_domains_variables" >/dev/null 2>&1; then
+ # If a duplicate is found, identify the file containing the duplicate and report an error
+ local file_with_duplicate=$(jq -r --arg name "$domains_variable_name" '
+ to_entries | map(select(.value | has($name))) | .[0].key
+ ' <<< "$all_json_files_contents")
+ last_error="ERROR: Duplicate domains variable found in $json_file_path and $file_with_duplicate file: $domains_variable_name"
+ return 1
+ fi
+ done
+}
+
+find_domain_variables_in_filterlist() {
+ local filterlist_content="$1"
+ local filterlist_path="$2"
+
+ # Find lines containing domain variables in the filterlist
+ local all_lines_with_domain_variables_in_filterlist=$(grep -P '%<\{.*\}>%' <<< "$filterlist_content") || {
+ variables_in_filterlists=$(jq --arg key "$filterlist_path" --argjson value "[]" '.[$key] = $value' <<< "$variables_in_filterlists")
+ echo "No domains variables found in $filterlist_path"
+ return 0
+ }
+ echo -n "$all_lines_with_domain_variables_in_filterlist"
+}
+
+process_filters() {
+ local all_lines_with_domain_variables_in_filterlist="$1"
+ local collected_matches='[]'
+
+ for filter in $all_lines_with_domain_variables_in_filterlist; do
+ # Extract the domain variable from the filter
+ local domains_variable_match=$(grep -oP '(?<=%<\{).*?(?=\}>%)' <<< "$filter")
+ if [ "$(echo "$domains_variable_match" | wc -l)" -gt 1 ]; then
+ last_error="ERROR: More than 2 domain variables found in filter: $filter"
+ return 1
+ fi
+ # Ensure the domain variable is correctly formatted in the filter
+ local true_matches=$(grep -P '(%<{(\w+)}>%(?:,~?[a-zA-Z0-9*.~-]+)*#[?@$]?#)|([,$]domain=(?:[a-zA-Z0-9*.~-]+\|)*%<{(\w+)}>%)' <<< "$filter")
+
+ if [ -z "$true_matches" ]; then
+ last_error="ERROR: Domain variable added in a wrong way in filter: $filter"
+ return 1
+ fi
+ collected_matches=$(jq --arg domains_variable_match "$domains_variable_match" '. + [$domains_variable_match]' <<< "$collected_matches")
+ done
+
+ # To avoid \n at the end
+ echo -n "$collected_matches"
+}
+
+update_matches_and_variables() {
+ local collected_matches="$1"
+ local file_path="$2"
+
+ # Update the list of all domain variable matches in filterlists
+ all_domain_variables_matches_in_filterlists=$(jq -n --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" --argjson matches "$collected_matches" '$all_domain_variables_matches_in_filterlists + $matches | unique')
+ # Update the variables_in_filterlists object with the matches from the current filterlist
+ variables_in_filterlists=$(jq --arg key "$file_path" --argjson value "$collected_matches" '.[$key] = $value' <<< "$variables_in_filterlists")
+}
+
+extract_domains_variables_in_included_filterlists() {
+ local template_name="$1"
+ # Extract the list of included filterlists from the template
+ local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content")
+ local domains_variables_in_included_filterlists=()
+
+ for included_filterlist in $included_filterlists; do
+ # Extract the domain variables from each included filterlist
+ local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< "$variables_in_filterlists")
+ for domain_variable in $domains_variables; do
+ domains_variables_in_included_filterlists+=("$domain_variable")
+ done
+ done
+
+ echo "${domains_variables_in_included_filterlists[@]}"
+}
+
+extract_domains_variables_in_included_json_files() {
+ local template_name="$1"
+ # Extract the list of included JSON files from the template
+ local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content")
+ local domains_variables_in_included_json_files=()
-if [ "$invalid" = true ]; then
- exit 1
-fi
+ for included_json_file in $included_json_files; do
+ # Extract the domain variables from each included JSON file
+ local domains_variables=$(jq -r --arg key "$included_json_file" '.[$key][]' <<< "$variables_in_json_files")
+ for domain_variable in $domains_variables; do
+ domains_variables_in_included_json_files+=("$domain_variable")
+ done
+ done
+
+ echo "${domains_variables_in_included_json_files[@]}"
+}
+
+check_domain_variables_in_filterlists() {
+ local template_name="$1"
+ local domains_variables_in_included_filterlists=()
+ local domains_variables_in_included_json_files=()
+
+ # When for example $2 was empty, then the array had one element with empty string
+ if [ -n "$2" ]; then
+ domains_variables_in_included_filterlists=($2)
+ fi
+ if [ -n "$3" ]; then
+ domains_variables_in_included_json_files=($3)
+ fi
+
+ # Extract the list of included filterlists and JSON files from the template
+ local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content")
+ local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content")
+
+ for domain_variable_in_filterlist in ${domains_variables_in_included_filterlists[@]}; do
+ local found=false
+ for domain_variable_in_json_file in ${domains_variables_in_included_json_files[@]}; do
+ if [ "$domain_variable_in_filterlist" = "$domain_variable_in_json_file" ]; then
+ found=true
+ break
+ fi
+ done
+ if ! $found; then
+ last_error="Error: One of the filterlists:\n\n"
+ last_error+="$included_filterlists\n\n"
+ last_error+="included in the template $template_name contain a domain variable $domain_variable_in_filterlist"
+ last_error+="which wasn't found in any of the domains variables files included in that template:\n\n"
+ last_error+="$included_json_files"
+ return 1
+ fi
+ done
+}
+
+check_if_domains_variables_are_identical_in_lists_and_jsons() {
+ if [ "$all_domains_variables" != "$all_domain_variables_matches_in_filterlists" ]; then
+ last_error="Error: the domain variables in domain-variables file and the filter list are not the same\n"
+ last_error+="Extra variables in domain-variables files:\n"
+ last_error+="$(jq -n --argjson all_domains_variables "$all_domains_variables" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domains_variables - $all_domain_variables_matches_in_filterlists')\n"
+ last_error+="Extra variables in filter lists:\n"
+ last_error+=$(jq -n --argjson all_domains_variables "$all_domains_variables" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domain_variables_matches_in_filterlists - $all_domains_variables')
+ return 1
+ fi
+}
-exit 0
+main() {
+ check_if_jq_is_installed
+ for template_name in *.txt; do
+ echo $template_name
+ templates_names+=("$template_name")
+ # local variable is declared before to be able to capture exit status of the function
+ local file_data
+ # Parse data from the template
+ file_data=$(parse_template_data "$template_name")
+ if [ $? -ne 0 ]; then
+ last_error="$file_data"
+ return 1
+ fi
+ # Update the templates_content JSON with the data from the file
+ update_templates_content "$file_data"
+
+ update_unique_json_files "$template_name" "$file_data"
+ update_unique_includes "$template_name" "$file_data"
+ done
+
+ for domains_variables_path in ${unique_json_files[@]}; do
+ check_if_file_exists "$domains_variables_path"
+ check_if_correct_domains_variables_json_structure "$domains_variables_path"
+
+ # If jq would be used the duplicates would be automatically removed, therefore I used perl
+ local domains_variables_names=$(cat "$domains_variables_path" | perl -0777 -ne 'print "$1\n" while /"([^"]+?)"(?=[\s\r\n]*:)/g')
+ check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names"
+ check_if_duplicated_domains_variable_name_between_files "$domains_variables_path" "$domains_variables_names"
+ update_domains_variables_data "$domains_variables_names" "$domains_variables_path"
+ done
+
+ for filterlist_path in ${unique_filterlists_to_include[@]}; do
+ check_if_file_exists "$filterlist_path"
+ local filterlist_content=$(cat "$filterlist_path")
+
+ local all_lines_with_domain_variables_in_filterlist
+ # This regex check is simpler than in filterlist delivery to also catch domains variables in the wrong place
+ # without starting with a complex regex. The full regex is in one of the next steps
+ all_lines_with_domain_variables_in_filterlist=$(find_domain_variables_in_filterlist "$filterlist_content" "$filterlist_path")
+ if [ "$all_lines_with_domain_variables_in_filterlist" = "No domains variables found in $filterlist_path" ]; then
+ echo "$all_lines_with_domain_variables_in_filterlist"
+ local collected_matches='[]'
+ else
+ local collected_matches
+ collected_matches=$(process_filters "$all_lines_with_domain_variables_in_filterlist")
+ fi
+ update_matches_and_variables "$collected_matches" "$filterlist_path"
+ done
+
+ for template_name in ${templates_names[@]}; do
+ local domains_variables_in_included_filterlists
+ domains_variables_in_included_filterlists=$(extract_domains_variables_in_included_filterlists "$template_name")
+
+ local domains_variables_in_included_json_files=$(extract_domains_variables_in_included_json_files "$template_name")
+
+ check_domain_variables_in_filterlists "$template_name" "$domains_variables_in_included_filterlists" "$domains_variables_in_included_json_files"
+ done
+
+ check_if_domains_variables_are_identical_in_lists_and_jsons
+}
+
+main
+
+echo "All tests passed"
+exit 0
\ No newline at end of file