# HG changeset patch # User eyeokg # Date 1730304536 -3600 # Node ID 2a19e3a985b6f33402045af74d06577f7f5d49af # Parent 4cbb4d57425545dccdf473cd56dc15bfe86e551d Moved pre-commit files and added first test diff --git a/.gitignore b/.gitignore deleted file mode 100644 --- a/.gitignore +++ /dev/null @@ -1,1 +0,0 @@ -pre-commit \ No newline at end of file diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # Exceptionrules This is a project which contains filter rules in order for Acceptable Ads program to work ***IMPORTANT:*** After cloning the project, you need to run the following command in order to add the pre-commit hook: ```sh -ln -s -f "$(pwd)/pre-commit" "$(pwd)/.git/hooks/pre-commit" +ln -s -f "$(pwd)/pre-commit-src/pre-commit" "$(pwd)/.git/hooks/pre-commit" ``` If you are Windows user, you might need additional bash shell for pre-commit to work. For example the one used in [git for windows](https://gitforwindows.org/) should be sufficient. When you add files/change their names remember to add it to template files and inform AFB team. \ No newline at end of file diff --git a/pre-commit-src/pre-commit b/pre-commit-src/pre-commit new file mode 100755 --- /dev/null +++ b/pre-commit-src/pre-commit @@ -0,0 +1,490 @@ +#!/bin/bash + +# Improve error handling, option e is not picked because it's expected for functions +# to return non-0 statuses. +set -Eeuo pipefail + +# Allow user input during commit +exec < /dev/tty + +templates_content='{}' +templates_names=() +unique_json_files=() +unique_filterlists_to_include=() +all_domains_variables='[]' +all_domains_variables_names='{}' +all_json_files_contents='{}' +variables_in_json_files='{}' +variables_in_filterlists='{}' +all_domain_variables_matches_in_filterlists='[]' +last_error='' +testing=false + +error_handler() { + local exit_code=$? + local line_number=$1 + echo "Error: Script failed with exit code $exit_code at line $line_number" + if [ "$BASH_COMMAND" = "return 1" ]; then + echo -e "Last error message:\n$last_error" + else + echo -e "\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" + echo "THIS ERROR SHOULDN'T HAPPEN, PLEASE REPORT IT TO AFB TEAM OR KRIS" + echo -e "\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n" + echo "Last executed command: $BASH_COMMAND" + fi + if [ "$testing" = true ]; then + exit 0 + else + exit $exit_code + fi +} + +# Set up trap to catch errors and invoke the error handler +trap 'error_handler $LINENO' ERR + +check_git_status() { + local status=$(git status) + + if grep -q "Changes not staged for commit" <<< $status || grep -q "Untracked files" <<< $status; then + read -p "There are changes not staged for commit. The script will check only the staged version. Do you want to continue? (y/n): " choice + case "$choice" in + # Echo empty line + y|Y ) echo "";; + n|N ) echo "Aborting."; exit 1;; + * ) echo "Invalid choice. Aborting."; exit 1;; + esac + fi +} + +check_if_jq_is_installed() { + if ! command -v jq &>/dev/null; then + last_error="ERROR: jq is not installed. Please install jq to continue." + return 1 + fi +} + +check_if_file_exists() { + local file_path="$1" + if ! [ -f "$file_path" ]; then + last_error="ERROR: File $file_path does not exist" + return 1 + fi +} + +get_staged_version_of_a_file() { + local file_name="$1" + + # Using name reference in order to not create subshells for each assignment and allow to use global variables + # and global error handling + local -n staged_file_content_nameref=$2 + + check_if_file_exists "$file_name" + if git show :"$file_name" >/dev/null 2>&1; then + staged_file_content_nameref="$(git show :"$file_name")" + else + last_error="ERROR: File $file_name was requested by a template but it's not tracked neither staged." + return 1 + fi +} +parse_template_data() { + local template="$1" + local -n file_data_nameref=$2 + local staged_template + + get_staged_version_of_a_file "$template" staged_template + + # Extract lines starting with %domainsVariables exceptionrules: and process them with jq + local json_files_in_template + json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< "$staged_template" | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))') || json_files_in_template="[]" + # Extract lines starting with %include exceptionrules: and process them with jq + local included_filterlists_files_in_template + included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< "$staged_template" | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))') || included_filterlists_files_in_template="[]" + + if [ "$included_filterlists_files_in_template" = "[]" ]; then + last_error="ERROR: There is no list included in template $template_name" + return 1 + fi + + # Create a JSON object with the template name as the key and the extracted domainsVariables and include data as values + file_data_nameref=$(jq -n --arg template "$template" --argjson json_files_in_template "$json_files_in_template" --argjson included_filterlists_files_in_template "$included_filterlists_files_in_template" ' + { + $template: { + "domainsVariables": $json_files_in_template, + "include": $included_filterlists_files_in_template + } + }') +} + +update_templates_content() { + local file_template="$1" + # Merge the new template data into the existing templates_content JSON + templates_content=$(jq -n --argjson templates_content "$templates_content" --argjson file_template "$file_template" '$templates_content + $file_template') +} + +update_unique_json_files() { + local template_name="$1" + local file_data="$2" + # Extract the list of domainsVariables files from the template data + local json_files_list=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$file_data") + for json_file in $json_files_list; do + if ! grep -qwF "$json_file" <<< "${unique_json_files[@]}"; then + unique_json_files+=("$json_file") + fi + done +} + +update_unique_includes() { + local template_name="$1" + local file_data="$2" + # Extract the list of included files from the template data + local included_files_list=$(jq -r --arg file "$template_name" '.[$file].include[]' <<< "$file_data") + + for included_filterlist in $included_files_list; do + if ! grep -qwF "$included_filterlist" <<< "${unique_filterlists_to_include[@]}"; then + unique_filterlists_to_include+=("$included_filterlist") + fi + done +} + +check_if_valid_json() { + local json_file_path="$1" + if ! jq -e . >/dev/null 2>&1 <<< "$(cat "$json_file_path")"; then + last_error="ERROR: Invalid JSON content in $json_file_path" + return 1 + fi +} + +check_if_correct_domains_variables_json_structure() { + local json_file_content="$1" + # Check if the JSON structure is valid and matches the expected format: + # { "variable1": ["domain1", "domain2" (...)], "variable2": ["domain1", "domain3" (...)], (...)} + if ! echo $json_file_content | jq -e ' + type == "object" and + ([keys[] as $k | .[$k] | type == "array" and all(.[]; type == "string")] | all) + ' >/dev/null 2>&1; + then + last_error="ERROR: JSON structure is invalid in $json_file_path" + return 1 + fi +} + +check_if_duplicated_domains_variable_name_in_single_file() { + local json_file_path="$1" + local domains_variables_names="$2" + if [ "$(echo "$domains_variables_names" | sort | uniq -d)" ]; then + last_error="ERROR: Duplicate domains_variables_names found in $json_file_path" + return 1 + fi +} + +check_if_valid_domains_variable_name() { + local domains_variable_name="$1" + if ! [[ "$domains_variable_name" =~ ^[[:alnum:]_]+$ ]]; then + last_error="ERROR: Invalid domains variable name: $domains_variable_name, only alphanumeric characters and underscores are allowed" + return 1 + fi +} + +check_if_duplicated_domains() { + local json_file_path="$1" + local key="$2" + local value="$3" + # Value of domainsVariable is an array of strings representing domains, + # therefore the error message mentions domains. + if [ -n "$(echo "$value" | sort | uniq -di)" ]; then + last_error="ERROR: There are duplicated domains in $key in file $json_file_path:\n" + last_error+="$(echo "$value" | sort | uniq -di)" + return 1 + fi +} + +check_if_correct_domain() { + local json_file_path="$1" + local domains_variable_name="$2" + local domains="$3" + for domain in $domains; do + # Check if the domain matches the expected pattern + if ! echo "$domain" | grep -qP "^(?:(?:(?!-)[A-Za-z0-9-]{1,63}(? 0))')" '$all_domains_variables + $domains_variables_names | unique') + + # Add the JSON file content to the all_json_files_contents object + all_json_files_contents=$(echo "$all_json_files_contents" | jq --arg json_file_path "$json_file_path" --argjson json_file_content "$json_file_content" '.[$json_file_path] = $json_file_content') + + # Update the variables_in_json_files object with the keys from the JSON file + variables_in_json_files=$(echo "$variables_in_json_files" | jq --arg key "$json_file_path" --argjson value "$(echo "$all_json_files_contents" | jq 'to_entries | map({key: .key, value: (.value | keys)}) | from_entries' | jq --arg key "$json_file_path" '.[$key]')" '. * {($key): $value}') +} + +check_if_duplicated_domains_variable_name_between_files() { + local json_file_path="$1" + local domains_variables_names="$2" + for domains_variable_name in $domains_variables_names; do + + # Check if the domains variable name exists in the all_domains_variables array + if jq -e --arg name "$domains_variable_name" 'index($name) != null' <<< "$all_domains_variables" >/dev/null 2>&1; then + # If a duplicate is found, identify the file containing the duplicate and report an error + local file_with_duplicate=$(jq -r --arg name "$domains_variable_name" ' + to_entries | map(select(.value | has($name))) | .[0].key + ' <<< "$all_json_files_contents") + last_error="ERROR: Duplicate domains variable found in $json_file_path and $file_with_duplicate file: $domains_variable_name" + return 1 + fi + done +} + +find_domain_variables_in_filterlist() { + local filterlist_content="$1" + local filterlist_path="$2" + local -n all_lines_with_domain_variables_in_filterlist_nameref=$3 + # Find lines containing domain variables in the filterlist + all_lines_with_domain_variables_in_filterlist_nameref=$(grep -P '%<\{.*\}>%' <<< "$filterlist_content") || all_lines_with_domain_variables_in_filterlist_nameref='' +} + +process_filters() { + local all_lines_with_domain_variables_in_filterlist="$1" + local -n domains_variables_collected_from_filterlist_nameref="$2" + domains_variables_collected_from_filterlist_nameref='[]' + + for filter in $all_lines_with_domain_variables_in_filterlist; do + # Extract the domain variable from the filter + local domains_variable_match=$(grep -oP '(?<=%<\{).*?(?=\}>%)' <<< "$filter") + if [ "$(echo "$domains_variable_match" | wc -l)" -gt 1 ]; then + last_error="ERROR: More than 2 domain variables found in filter: $filter" + return 1 + fi + # Ensure the domain variable is correctly formatted in the filter + local true_matches=$(grep -P '(%<{(\w+)}>%(?:,~?[a-zA-Z0-9*.~-]+)*#[?@$]?#)|([,$]domain=(?:[a-zA-Z0-9*.~-]+\|)*%<{(\w+)}>%)' <<< "$filter") + + if [ -z "$true_matches" ]; then + last_error="ERROR: Domain variable added in a wrong way in filter: $filter" + return 1 + fi + domains_variables_collected_from_filterlist_nameref=$(jq --arg domains_variable_match "$domains_variable_match" '. + [$domains_variable_match]' <<< "$domains_variables_collected_from_filterlist_nameref") + done +} + +update_matches_and_variables() { + local domains_variables_collected_from_filterlist="$1" + local file_path="$2" + + # Update the list of all domain variable matches in filterlists + all_domain_variables_matches_in_filterlists=$(jq -n --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" --argjson matches "$domains_variables_collected_from_filterlist" '$all_domain_variables_matches_in_filterlists + $matches | unique') + + # Update the variables_in_filterlists object with the matches from the current filterlist + variables_in_filterlists=$(jq --arg key "$file_path" --argjson value "$domains_variables_collected_from_filterlist" '.[$key] = $value' <<< "$variables_in_filterlists") +} + +extract_domains_variables_in_included_filterlists() { + local template_name="$1" + local -n domains_variables_in_included_filterlists_nameref=$2 + # Extract the list of included filterlists from the template + local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content") + domains_variables_in_included_filterlists_nameref=() + + for included_filterlist in $included_filterlists; do + # Extract the domain variables from each included filterlist + local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< "$variables_in_filterlists") + for domain_variable in $domains_variables; do + domains_variables_in_included_filterlists_nameref+=("$domain_variable") + done + done + +} + +extract_domains_variables_in_included_json_files() { + local template_name="$1" + local -n domains_variables_in_included_json_files_nameref=$2 + + # Extract the list of included JSON files from the template + local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content") + domains_variables_in_included_json_files_nameref=() + + for included_json_file in $included_json_files; do + # Extract the domain variables from each included JSON file + local domains_variables=$(jq -r --arg key "$included_json_file" '.[$key][]' <<< "$variables_in_json_files") + for domain_variable in $domains_variables; do + domains_variables_in_included_json_files_nameref+=("$domain_variable") + done + done +} + +check_domain_variables_in_filterlists() { + + local template_name="$1" + local domains_variables_in_included_filterlists=() + local domains_variables_in_included_json_files=() + + # When for example $2 was empty, then the array had one element with empty string + if [ -n "$2" ]; then + domains_variables_in_included_filterlists=($2) + fi + if [ -n "$3" ]; then + domains_variables_in_included_json_files=($3) + fi + + # Extract the list of included filterlists and JSON files from the template + local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content") + local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content") + + for domain_variable_in_filterlist in ${domains_variables_in_included_filterlists[@]}; do + local found=false + for domain_variable_in_json_file in ${domains_variables_in_included_json_files[@]}; do + if [ "$domain_variable_in_filterlist" = "$domain_variable_in_json_file" ]; then + found=true + break + fi + done + if ! $found; then + last_error="Error: One of the filterlists:\n\n" + last_error+="$included_filterlists\n\n" + last_error+="included in the template $template_name contain a domain variable $domain_variable_in_filterlist " + last_error+="which wasn't found in any of the domains variables files included in that template:\n\n" + last_error+="$included_json_files" + return 1 + fi + done +} + +check_if_domains_variables_are_identical_in_lists_and_jsons() { + if [ "$all_domains_variables" != "$all_domain_variables_matches_in_filterlists" ]; then + last_error="Error: the domain variables in domain-variables file and the filter list are not the same\n" + last_error+="Extra variables in domain-variables files:\n" + last_error+="$(jq -n --argjson all_domains_variables "$all_domains_variables" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domains_variables - $all_domain_variables_matches_in_filterlists')\n" + last_error+="Extra variables in filter lists:\n" + last_error+=$(jq -n --argjson all_domains_variables "$all_domains_variables" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domain_variables_matches_in_filterlists - $all_domains_variables') + return 1 + fi +} + +main() { + check_git_status + check_if_jq_is_installed + for template_name in *.txt; do + templates_names+=("$template_name") + # To avoid creating a subshell, the variable is passed as a reference to parse_template_data function + # That helps with the error handling and allows to use global variables + local file_data + # Parse data from the template + parse_template_data "$template_name" file_data + # Update the templates_content JSON with the data from the file + update_templates_content "$file_data" + + update_unique_json_files "$template_name" "$file_data" + update_unique_includes "$template_name" "$file_data" + done + + for domains_variables_path in ${unique_json_files[@]}; do + check_if_file_exists "$domains_variables_path" + local staged_domains_variables_file + get_staged_version_of_a_file "$domains_variables_path" staged_domains_variables_file + check_if_correct_domains_variables_json_structure "$staged_domains_variables_file" + + # If jq would be used the duplicates would be automatically removed, therefore I used perl + local domains_variables_names=$(perl -0777 -ne 'print "$1\n" while /"([^"]+?)"(?=[\s\r\n]*:)/g' <<< $staged_domains_variables_file) + check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names" "$staged_domains_variables_file" + check_if_duplicated_domains_variable_name_between_files "$domains_variables_path" "$domains_variables_names" + update_domains_variables_data "$domains_variables_names" "$domains_variables_path" + done + + for filterlist_path in ${unique_filterlists_to_include[@]}; do + check_if_file_exists "$filterlist_path" + local filterlist_content + get_staged_version_of_a_file "$filterlist_path" filterlist_content + + local all_lines_with_domain_variables_in_filterlist + # This regex check is simpler than in filterlist delivery to also catch domains variables in the wrong place + # without starting with a complex regex. The full regex is in one of the next steps + + find_domain_variables_in_filterlist "$filterlist_content" "$filterlist_path" all_lines_with_domain_variables_in_filterlist + + if [ -z "$all_lines_with_domain_variables_in_filterlist" ]; then + # In case of lack of matches, the value of all_lines_with_domain_variables_in_filterlist should have just + # a message to show. + local domains_variables_collected_from_filterlist='[]' + else + local domains_variables_collected_from_filterlist + process_filters "$all_lines_with_domain_variables_in_filterlist" domains_variables_collected_from_filterlist + fi + + update_matches_and_variables "$domains_variables_collected_from_filterlist" "$filterlist_path" + done + + local domains_variables_in_included_filterlists=() + for template_name in ${templates_names[@]}; do + extract_domains_variables_in_included_filterlists "$template_name" domains_variables_in_included_filterlists + + local domains_variables_in_included_json_files + extract_domains_variables_in_included_json_files "$template_name" domains_variables_in_included_json_files + check_domain_variables_in_filterlists "$template_name" "$(echo ${domains_variables_in_included_filterlists[@]})" "$(echo ${domains_variables_in_included_json_files[@]})" + done + + check_if_domains_variables_are_identical_in_lists_and_jsons +} + +# For testing purposes only if the script has no arguments or the argument is main the process should run +# thanks to that the script can be tested without running the main function +if [ -z "${1:-}" ] || [ "$1" = "main" ]; then + main + + # Check if this file has unstaged changes: + pre_commit_git_status=$(git status :pre-commit-src/pre-commit) + + if grep -q "Changes not staged for commit" <<< $pre_commit_git_status || grep -q "Untracked files" <<< $pre_commit_git_status; then + echo "Unstaged changes detected in pre-commit file. Stage pre-commit changes before continuing." + exit 1 + fi + + pre_commit_tests_git_status=$(git status :pre-commit-src/tests/pre-commit-tests.sh) + if grep -q "Changes not staged for commit" <<< $pre_commit_tests_git_status || grep -q "Untracked files" <<< $pre_commit_tests_git_status; then + echo "Unstaged changes detected in pre-commit-tests file. Stage pre-commit changes before continuing." + exit 1 + fi + + # Run unit tests. The workaround with exit_code is meant to avoid failed test to trigger error_handling + exit_code=0 + ./pre-commit-src/tests/pre-commit-tests.sh || exit_code=1 + function_exit_code=$? + if [ $exit_code -ne 1 ]; then + exit_code=$function_exit_code + fi + + if [ $exit_code -ne 0 ]; then + echo "Tests failed with exit code $exit_code" + else + echo "Tests passed successfully" + fi + exit $exit_code + +elif [ "$1" = "--load-only" ]; then + testing=true + echo "Script loaded successfully" +else + "$@" +fi diff --git a/pre-commit-src/tests/files/.gitkeep b/pre-commit-src/tests/files/.gitkeep new file mode 100644 diff --git a/pre-commit-src/tests/pre-commit-tests.sh b/pre-commit-src/tests/pre-commit-tests.sh new file mode 100755 --- /dev/null +++ b/pre-commit-src/tests/pre-commit-tests.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Test script for pre-commit + +# Load the pre-commit script +source ./pre-commit-src/pre-commit --load-only + +score=0 +total=0 + +# Test function for check_if_file_exists +test_check_if_file_exists() { + echo "Running test_check_if_file_exists..." + + local exit_code + # Test with a non-existent file + check_if_file_exists "./pre-commit-src/tests/files/not_existing_file.dummy" || exit_code=1 + local catched_exit_code=$? + if [ $exit_code -eq 0 ]; then + exit_code=catched_exit_code + fi + if [ $exit_code -ne 0 ]; then + echo "Test passed: non-existent file" + score=$((score+1)) + else + echo "Test failed: non-existent file" + fi + total=$((total+1)) + + # Test with an existing file + touch "./pre-commit-src/tests/files/not_existing_file.dummy" + check_if_file_exists "./pre-commit-src/tests/files/not_existing_file.dummy" + exit_code=$? + rm "./pre-commit-src/tests/files/not_existing_file.dummy" + if [ $exit_code -eq 0 ]; then + echo "Test passed: existent file" + score=$((score+1)) + else + echo "Test failed: existent file" + fi + total=$((total+1)) +} +test_check_if_file_exists + +echo "All tests completed." + +if [ $score -eq $total ]; then + echo "All tests from pre-commit-tests.sh passed." + exit 0 +else + echo "$((total-score)) tests from pre-commit-tests.sh failed out of $total in total." + exit 1 +fi \ No newline at end of file