# HG changeset patch # User KrzGalcz # Date 1731506995 -3600 # Node ID 2bb339143f0158cc50fdd28bc523cd6c20600e03 # Parent b3dddcc5f20896f3a5fb0d86616eef6caf7fe878 Added OS specific code and performance improvements diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -1,13 +1,39 @@ # Exceptionrules This is a project which contains filter rules in order for Acceptable Ads program to work ***IMPORTANT:*** After cloning the project, you need to run the following command in order to add the pre-commit hook: +## Windows +Windows: turn on Developer Mode is settings->system->for developers and run +```sh +export MSYS=winsymlinks:nativestrict +``` + +```sh +wsl --update +``` + +```sh +wsl --install -d Ubuntu-24.04 +``` + +```sh + wsl -s Ubuntu-24.04 +``` + +```sh +wsl sudo apt update +``` + +```sh +wsl sudo apt install jq +``` + ```sh ln -s -f "$(pwd)/pre-commit-src/pre-commit" "$(pwd)/.git/hooks/pre-commit" ``` -If you are Windows user, you might need additional bash shell for pre-commit to work. For example the one used in [git for windows](https://gitforwindows.org/) should be sufficient. +When you add files/change their names remember to add it to template files and inform AFB team. -When you add files/change their names remember to add it to template files and inform AFB team. \ No newline at end of file +MAC update bash `brew install bash` diff --git a/pre-commit-src/pre-commit b/pre-commit-src/pre-commit --- a/pre-commit-src/pre-commit +++ b/pre-commit-src/pre-commit @@ -2,29 +2,55 @@ # Improve error handling, option e is not picked because it's expected for functions # to return non-0 statuses. set -Eeuo pipefail # Allow user input during commit exec < /dev/tty +# Allow modifying aliases +shopt -s expand_aliases + templates_content='{}' templates_names=() unique_json_files=() unique_filterlists_to_include=() all_domains_variables_in_json_files='[]' all_domains_variables_names='{}' all_json_files_contents='{}' variables_in_json_files='{}' variables_in_included_filterlists='{}' all_domain_variables_matches_in_filterlists='[]' last_error='' +unstaged_files='' testing=false +# When using Windows there is a problem with return character, therefore there is a need for extra --binary option. Look https://jqlang.github.io/jq/manual/ +# In general running this script directly within windows is very slow, so use of WSL2 is recommended but in case of some tests performed in windows, those lines are still needed. +if [ $OSTYPE = 'msys' ] || [ $OSTYPE = 'cygwin' ]; then + alias jq='jq -b' +else + alias jq='command jq' +fi + +# Mac grep doesn't have Perl regex, therefor it needs to be checked with another method +if [ $OSTYPE = 'darwin'* ]; then + alias grep='ggrep' +else + alias grep='command grep' +fi + +# In case on WSL (Windows) it's faster to invoke git.exe directly from Windows (since the files are in the windows FS). +if [ -n $(echo $WSL_DISTRO_NAME || echo '') ]; then + alias git='git.exe' +else + alias git='command git' +fi + error_handler() { local exit_code=$? local line_number=$1 echo "Error: Script failed with exit code $exit_code at line $line_number" if [ "${BASH_COMMAND:-}" = "return 1" ]; then echo -e "Last error message:\n$last_error" else echo -e "\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" @@ -37,71 +63,101 @@ error_handler() { else exit $exit_code fi } # Set up trap to catch errors and invoke the error handler trap 'error_handler $LINENO' ERR -check_git_status() { - local status=$(git status) +get_unstaged_files() { + unstaged_files=$(git ls-files . --exclude-standard --others -m) + local git_exit_code=$? - local git_status=$(grep -s "Changes not staged for commit" <<< $status || grep -s "Untracked files" <<< $status || echo '') - if ! [ -z "$git_status" ]; then + if [ $git_exit_code -ne 0 ]; then + last_error="Wrong setting of git, be sure that you run git as the same user as you cloned repo" + return 1 + fi + + if [ -n "$unstaged_files" ]; then read -p "There are changes not staged for commit. The script will check only the staged version. Do you want to continue? (y/n): " choice case "$choice" in y|Y ) echo "";; n|N ) echo "Aborting."; exit 1;; * ) echo "Invalid choice. Aborting."; exit 1;; esac fi } +get_staged_files() { + local -n staged_files_nameref=$1 + staged_files_nameref=$(git diff --name-only --cached) +} + check_if_jq_is_installed() { if ! command -v jq &>/dev/null; then last_error="ERROR: jq is not installed. Please install jq to continue." return 1 fi } check_if_file_exists() { local file_path="$1" if ! [ -f "$file_path" ]; then last_error="ERROR: File $file_path does not exist" return 1 fi } +check_if_unstaged_changes() { + local file_path=$1 + local -n answer_nameref=$2 + check_if_file_exists "$file_path" + local git_status=$(git status $file_path) + local lines_indicating_changes=$(grep -s "Changes not staged for commit" <<< $git_status || grep -s "Untracked files:" <<< $git_status || echo '') + + if [ -n "$lines_indicating_changes" ]; then + answer_nameref=true + else + answer_nameref=false + fi +} + get_staged_version_of_a_file() { local file_name="$1" # Using name reference in order to not create subshells for each assignment and allow to use global variables # and global error handling local -n staged_file_content_nameref=$2 check_if_file_exists "$file_name" - if git show :"$file_name" >/dev/null 2>&1; then - staged_file_content_nameref="$(git show :"$file_name")" + local matched_unstaged_file=$(grep -x "$file_name" <<< "$unstaged_files" || echo '') + + if [ -n "$matched_unstaged_file" ]; then + staged_file_content_nameref=$(git show ":$file_name" || echo '') else + staged_file_content_nameref=$(cat "$file_name") + fi + + if [ -z "$staged_file_content_nameref" ]; then last_error="ERROR: File $file_name was requested by a template but it's not tracked neither staged." return 1 fi } parse_template_data() { local template="$1" local -n file_data_nameref=$2 local staged_template get_staged_version_of_a_file "$template" staged_template # Extract lines starting with %domainsVariables exceptionrules: and process them with jq - local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< $staged_template | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]') + local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< $staged_template | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -Rs 'split("\n") | map(select(length > 0))' || echo '[]') # Extract lines starting with %include exceptionrules: and process them with jq - local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< $staged_template | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]') + local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< $staged_template | sed 's/^%include exceptionrules://; s/%$//' | jq -Rs 'split("\n") | map(select(length > 0))' || echo '[]') if [ "$included_filterlists_files_in_template" = "[]" ]; then last_error="ERROR: There is no list included in template $template_name" return 1 fi # Create a JSON object with the template name as the key and the extracted domainsVariables and include data as values file_data_nameref=$(jq -n --arg template "$template" --argjson json_files_in_template "$json_files_in_template" --argjson included_filterlists_files_in_template "$included_filterlists_files_in_template" ' @@ -206,26 +262,20 @@ check_if_duplicated_domains() { fi } check_if_correct_domain() { local json_file_path="$1" local domains_variable_name="$2" local domains="$3" # Preselect candidates for invalid domains, then check them - local candidates=$(grep -vP "^[a-zA-Z0-9]{1,63}\.[a-zA-Z]{2,24}$" <<< $domains || echo '') - local strong_candidates=$(grep -vP "^((([A-Za-z0-9][A-Za-z0-9-]{0,61})?[A-Za-z0-9]\.)+([A-Za-z]{2,63}|\*|xn--[a-zA-Z0-9-]{1,59}) )+$" <<< $candidates || echo '') - if [ -n "$strong_candidates" ]; then - for domain in $strong_candidates; do - # Check if the domain matches the expected pattern - if ! [[ $domain =~ ^(([A-Za-z0-9][A-Za-z0-9-]{0,61})?[A-Za-z0-9]\.)+([A-Za-z]{2,63}|\*|xn--[a-zA-Z0-9-]{1,59})$ ]]; then - last_error="ERROR: Invalid domain in $domains_variable_name: $domain in file $json_file_path" - return 1 - fi - done + local candidates=$(grep -vP "^(([A-Za-z0-9][A-Za-z0-9-]{0,61})?[A-Za-z0-9]\.)+([A-Za-z]{2,63}|\*|xn--[a-zA-Z0-9-]{1,59})$" <<< $domains || echo '') + if [ -n "$candidates" ]; then + last_error="ERROR: Invalid domain in $domains_variable_name: $candidates in file $json_file_path" + return 1 fi } check_if_correct_domains_variables() { local json_file_path="$1" local domains_variables_names="$2" local json_file_content="$3" check_if_duplicated_domains_variable_name_in_single_file "$json_file_path" "$domains_variables_names" @@ -240,17 +290,17 @@ check_if_correct_domains_variables() { } update_domains_variables_data() { local domains_variables_names="$1" local json_file_path="$2" local json_file_content="$3" # Merge the new domains variables into the existing all_domains_variables_in_json_files array - all_domains_variables_in_json_files=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson domains_variables_names "$(echo "$domains_variables_names" | jq -R -s 'split("\n") | map(select(length > 0))')" '$all_domains_variables_in_json_files + $domains_variables_names | unique') + all_domains_variables_in_json_files=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson domains_variables_names "$(echo "$domains_variables_names" | jq -Rs 'split("\n") | map(select(length > 0))')" '$all_domains_variables_in_json_files + $domains_variables_names | unique') # Adding all the json files content to the variable was causing "Too big variable" error # Therefore, the content is saved to a temporary file and then loaded back to the variable all_json_files_temp_file=$(mktemp) echo "$all_json_files_contents" > "$all_json_files_temp_file" json_file_content_temp=$(mktemp) echo "$json_file_content" > "$json_file_content_temp" @@ -345,22 +395,21 @@ update_matches_and_variables() { variables_in_included_filterlists=$(jq --arg key "$file_path" --argjson value "$domains_variables_collected_from_filterlist" '.[$key] = $value' <<< $variables_in_included_filterlists) } extract_domains_variables_in_included_filterlists() { local template_name="$1" local -n domains_variables_in_included_filterlists_nameref=$2 # Extract the list of included filterlists from the template - local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< $templates_content) + local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content") domains_variables_in_included_filterlists_nameref=() - for included_filterlist in $included_filterlists; do # Extract the domain variables from each included filterlist - local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< $variables_in_included_filterlists) + local domains_variables=$(jq -r --arg included_filterlist "$included_filterlist" '.[$included_filterlist][]' <<< "$variables_in_included_filterlists") for domain_variable in $domains_variables; do domains_variables_in_included_filterlists_nameref+=("$domain_variable") done done } extract_domains_variables_in_included_json_files() { local template_name="$1" @@ -423,48 +472,46 @@ check_if_domains_variables_are_identical last_error+="Extra variables in filter lists:\n" last_error+=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domain_variables_matches_in_filterlists - $all_domains_variables_in_json_files') return 1 fi } main() { - check_git_status + get_unstaged_files check_if_jq_is_installed for template_name in *.txt; do templates_names+=("$template_name") # To avoid creating a subshell, the variable is passed as a reference to parse_template_data function # That helps with the error handling and allows to use global variables local file_data # Parse data from the template parse_template_data "$template_name" file_data # Update the templates_content JSON with the data from the file update_templates_content "$file_data" update_unique_json_files "$template_name" "$file_data" update_unique_includes "$template_name" "$file_data" done for domains_variables_path in ${unique_json_files[@]}; do - check_if_file_exists "$domains_variables_path" local staged_domains_variables_file get_staged_version_of_a_file "$domains_variables_path" staged_domains_variables_file check_if_correct_domains_variables_json_structure "$staged_domains_variables_file" "$domains_variables_path" local domains_variables_names_incl_duplicates get_domains_variables_names_incl_duplicates "$staged_domains_variables_file" domains_variables_names_incl_duplicates check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names_incl_duplicates" "$staged_domains_variables_file" check_if_duplicated_domains_variable_name_between_files "$domains_variables_path" "$domains_variables_names_incl_duplicates" update_domains_variables_data "$domains_variables_names_incl_duplicates" "$domains_variables_path" "$staged_domains_variables_file" done for filterlist_path in ${unique_filterlists_to_include[@]}; do - check_if_file_exists "$filterlist_path" local filterlist_content get_staged_version_of_a_file "$filterlist_path" filterlist_content check_for_similar_to_domain_variable "$filterlist_content" "$filterlist_path" local all_lines_with_domain_variables_in_filterlist find_domain_variables_syntax_in_filterlist "$filterlist_content" "$filterlist_path" all_lines_with_domain_variables_in_filterlist if [ -z "$all_lines_with_domain_variables_in_filterlist" ]; then @@ -507,46 +554,64 @@ check_unit_tests() { last_error+="To check the details run 'pre-commit-src/tests/pre-commit-tests.sh'" return $exit_status else last_error="Unit tests passed successfully" fi } check_pre_commit_files() { - pre_commit_git_status=$(git status :pre-commit-src/pre-commit) - logs_for_unstaged_changes_in_pre_commit=$(grep -s "Changes not staged for commit" <<< $pre_commit_git_status || grep -s "Untracked files" <<< $pre_commit_git_status || echo '') - if ! [ -z "$logs_for_unstaged_changes_in_pre_commit" ]; then + local staged_files + get_staged_files staged_files + + local pre_commit_path='pre-commit-src/pre-commit' + check_if_file_exists "$pre_commit_path" + + local matched_pre_commit_in_unstaged=$(grep -x "$pre_commit_path" <<< "$unstaged_files" || echo '') + if [ -n "$matched_pre_commit_in_unstaged" ]; then last_error="Unstaged changes detected in pre-commit file. Stage pre-commit changes before continuing." return 1 fi - pre_commit_tests_git_status=$(git status :pre-commit-src/tests/pre-commit-tests.sh) - logs_for_unstaged_changes_in_pre_commit_tests=$(grep -s "Changes not staged for commit" <<< $pre_commit_tests_git_status || grep -s "Untracked files" <<< $pre_commit_tests_git_status || echo '') - if ! [ -z "$logs_for_unstaged_changes_in_pre_commit_tests" ]; then - last_error="Unstaged changes detected in pre-commit-tests file. Stage pre-commit tests changes before continuing." + local pre_commit_script_path='pre-commit-src/pre-commit-script.sh' + check_if_file_exists "$pre_commit_script_path" + local matched_pre_commit_script_in_unstaged='' # This file will go with the next commit. To better show what changed since the last push I decided to first push it without name change $(grep -x "$pre_commit_script_path" <<< "$unstaged_files" || echo '') + if [ -n "$matched_pre_commit_script_in_unstaged" ]; then + last_error="Unstaged changes detected in pre-commit-script.sh file. Stage pre-commit-script changes before continuing." return 1 fi + pre_commit_tests_path='pre-commit-src/tests/pre-commit-tests.sh' + check_if_file_exists "$pre_commit_tests_path" + local matched_pre_commit_tests_in_unstaged=$(grep -x "$pre_commit_tests_path" <<< "$unstaged_files" || echo '') + if [ -n "$matched_pre_commit_tests_in_unstaged" ]; then + last_error="Unstaged changes detected in pre-commit-tests file. Stage pre-commit tests changes before continuing." + return 1 + fi # Only if something changed in pre commit or pre commit tests the unit tests should be run - logs_for_commited_changes_in_pre_commit_or_test=$(grep -s "Changes to be committed" <<< $pre_commit_tests_git_status || grep -s "Changes to be committed" <<< $pre_commit_git_status || echo '') - if ! [ -z "$logs_for_commited_changes_in_pre_commit_or_test" ]; then + if [ -z "$matched_pre_commit_in_unstaged" + "$matched_pre_commit_script_in_unstaged" "$matched_pre_commit_tests_in_unstaged" ]; then check_unit_tests fi } - # For testing purposes only if the script has no arguments or the argument is main the process should run # thanks to that the script can be tested without running the main function if [ -z "${1:-}" ]; then echo 'Running pre-commit checks...' main check_pre_commit_files echo "Pre-commit checks passed successfully. Double check if there were no error messages above this message before pushing" + unalias jq + unalias grep + unalias git exit 0 elif [ "$1" = "--load-only" ]; then testing=true echo "Script loaded successfully" else - "$@" + echo Wrong option provided "$@" + unalias jq + unalias grep + unalias git + exit 1 fi diff --git a/pre-commit-src/tests/pre-commit-tests.sh b/pre-commit-src/tests/pre-commit-tests.sh --- a/pre-commit-src/tests/pre-commit-tests.sh +++ b/pre-commit-src/tests/pre-commit-tests.sh @@ -6,17 +6,17 @@ error_handler() { local exit_code=$? local line_number=$1 echo "Error on line $line_number. Exit code: $exit_code" exit $exit_code } trap 'error_handler $LINENO' ERR # Load the pre-commit script -source ./pre-commit-src/pre-commit --load-only +. ./pre-commit-src/pre-commit-script.sh --load-only score=0 total=0 mode="${1:-}" # Function to check if a file exists check_if_file_exists() { @@ -495,20 +495,25 @@ test_check_if_duplicated_domains_variabl test_find_find_domain_variables_syntax_in_filterlist test_process_filters test_update_matches_and_variables test_extract_domains_variables_in_included_filterlists test_extract_domains_variables_in_included_json_files test_check_domain_variables_in_filterlists test_check_if_domains_variables_are_identical_in_lists_and_jsons +# Remove aliases created in pre-commit-script.sh +unalias jq +unalias grep +unalias git + echo -e '\n-------------------------------------------\n' if [ $score -eq $total ]; then if [ "$mode" != '--no-verbose' ]; then echo 'All tests from pre-commit-tests.sh passed.' fi exit 0 else if [ "$mode" != '--no-verbose' ]; then echo "$((total-score)) tests from pre-commit-tests.sh failed out of $total in total." fi exit 1 -fi \ No newline at end of file +fi