Added OS specific code and performance improvements
authorKrzGalcz <k.galczynski@eyeo.com>
Wed, 13 Nov 2024 15:09:55 +0100
changeset 25711 2bb339143f01
parent 25710 b3dddcc5f208
child 25712 990d3ed1b36b
Added OS specific code and performance improvements
README.md
pre-commit-src/pre-commit
pre-commit-src/tests/pre-commit-tests.sh
--- a/README.md
+++ b/README.md
@@ -1,13 +1,39 @@
 # Exceptionrules
 
 This is a project which contains filter rules in order for Acceptable Ads program to work
 
 ***IMPORTANT:*** After cloning the project, you need to run the following command in order to add the pre-commit hook:
 
+## Windows
+Windows: turn on Developer Mode is settings->system->for developers and run 
+```sh
+export MSYS=winsymlinks:nativestrict
+```
+
+```sh
+wsl --update
+```
+
+```sh
+wsl --install -d Ubuntu-24.04
+```
+
+```sh
+ wsl -s Ubuntu-24.04
+```
+
+```sh
+wsl sudo apt update
+```
+
+```sh
+wsl sudo apt install jq 
+```
+
 ```sh
 ln -s -f "$(pwd)/pre-commit-src/pre-commit" "$(pwd)/.git/hooks/pre-commit"
 ```
 
-If you are Windows user, you might need additional bash shell for pre-commit to work. For example the one used in [git for windows](https://gitforwindows.org/) should be sufficient.
+When you add files/change their names remember to add it to template files and inform AFB team.
 
-When you add files/change their names remember to add it to template files and inform AFB team.
\ No newline at end of file
+MAC update bash `brew install bash`
--- a/pre-commit-src/pre-commit
+++ b/pre-commit-src/pre-commit
@@ -2,29 +2,55 @@
 
 # Improve error handling, option e is not picked because it's expected for functions
 # to return non-0 statuses.
 set -Eeuo pipefail
 
 # Allow user input during commit
 exec < /dev/tty
 
+# Allow modifying aliases
+shopt -s expand_aliases
+
 templates_content='{}'
 templates_names=()
 unique_json_files=()
 unique_filterlists_to_include=()
 all_domains_variables_in_json_files='[]'
 all_domains_variables_names='{}'
 all_json_files_contents='{}'
 variables_in_json_files='{}'
 variables_in_included_filterlists='{}'
 all_domain_variables_matches_in_filterlists='[]'
 last_error=''
+unstaged_files=''
 testing=false
 
+# When using Windows there is a problem with return character, therefore there is a need for extra --binary option. Look https://jqlang.github.io/jq/manual/
+# In general running this script directly within windows is very slow, so use of WSL2 is recommended but in case of some tests performed in windows, those lines are still needed.
+if [ $OSTYPE = 'msys' ] || [ $OSTYPE = 'cygwin' ]; then
+    alias jq='jq -b'
+else
+    alias jq='command jq'
+fi
+
+# Mac grep doesn't have Perl regex, therefor it needs to be checked with another method
+if [ $OSTYPE = 'darwin'* ]; then
+    alias grep='ggrep'
+else
+    alias grep='command grep'
+fi
+
+# In case on WSL (Windows) it's faster to invoke git.exe directly from Windows (since the files are in the windows FS).
+if [ -n $(echo $WSL_DISTRO_NAME || echo '') ]; then
+    alias git='git.exe'
+else
+    alias git='command git'
+fi
+
 error_handler() {
     local exit_code=$?
     local line_number=$1
     echo "Error: Script failed with exit code $exit_code at line $line_number"
     if [ "${BASH_COMMAND:-}" = "return 1" ]; then
         echo -e "Last error message:\n$last_error"
     else
         echo -e "\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
@@ -37,71 +63,101 @@ error_handler() {
     else
         exit $exit_code
     fi
 }
 
 # Set up trap to catch errors and invoke the error handler
 trap 'error_handler $LINENO' ERR
 
-check_git_status() {
-    local status=$(git status)
+get_unstaged_files() {
+    unstaged_files=$(git ls-files . --exclude-standard --others -m)
+    local git_exit_code=$?
 
-    local git_status=$(grep -s "Changes not staged for commit" <<< $status || grep -s "Untracked files" <<< $status || echo '')
-    if ! [ -z "$git_status" ]; then
+    if [ $git_exit_code -ne 0 ]; then
+        last_error="Wrong setting of git, be sure that you run git as the same user as you cloned repo"
+        return 1
+    fi
+
+    if [ -n "$unstaged_files" ]; then
         read -p "There are changes not staged for commit. The script will check only the staged version. Do you want to continue? (y/n): " choice
         case "$choice" in 
             y|Y ) echo "";;
             n|N ) echo "Aborting."; exit 1;;
             * ) echo "Invalid choice. Aborting."; exit 1;;
         esac
     fi
 }
 
+get_staged_files() {
+  local -n staged_files_nameref=$1
+  staged_files_nameref=$(git diff --name-only --cached)
+}
+
 check_if_jq_is_installed() {
     if ! command -v jq &>/dev/null; then
         last_error="ERROR: jq is not installed. Please install jq to continue."
         return 1
     fi
 }
 
 check_if_file_exists() {
     local file_path="$1"
     if ! [ -f "$file_path" ]; then
         last_error="ERROR: File $file_path does not exist"
         return 1
     fi
 }
 
+check_if_unstaged_changes() {
+    local file_path=$1
+    local -n answer_nameref=$2
+    check_if_file_exists "$file_path"
+    local git_status=$(git status $file_path)
+    local lines_indicating_changes=$(grep -s "Changes not staged for commit" <<< $git_status || grep -s "Untracked files:" <<< $git_status || echo '')
+
+    if [ -n "$lines_indicating_changes" ]; then
+        answer_nameref=true
+    else
+        answer_nameref=false
+    fi
+}
+
 get_staged_version_of_a_file() {
     local file_name="$1"
 
     # Using name reference in order to not create subshells for each assignment and allow to use global variables
     # and global error handling
     local -n staged_file_content_nameref=$2
 
     check_if_file_exists "$file_name"
-    if git show :"$file_name" >/dev/null 2>&1; then
-        staged_file_content_nameref="$(git show :"$file_name")"
+    local matched_unstaged_file=$(grep -x "$file_name" <<< "$unstaged_files" || echo '')
+
+    if [ -n "$matched_unstaged_file" ]; then
+        staged_file_content_nameref=$(git show ":$file_name" || echo '')
     else
+        staged_file_content_nameref=$(cat "$file_name")
+    fi
+
+    if [ -z "$staged_file_content_nameref" ]; then
         last_error="ERROR: File $file_name was requested by a template but it's not tracked neither staged."
         return 1
     fi
 }
 parse_template_data() {
     local template="$1"
     local -n file_data_nameref=$2
     local staged_template
 
     get_staged_version_of_a_file "$template" staged_template
 
     # Extract lines starting with %domainsVariables exceptionrules: and process them with jq
-    local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< $staged_template | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]')
+    local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< $staged_template | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -Rs 'split("\n") | map(select(length > 0))' || echo '[]')
     # Extract lines starting with %include exceptionrules: and process them with jq
-    local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< $staged_template | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]')
+    local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< $staged_template | sed 's/^%include exceptionrules://; s/%$//' | jq -Rs 'split("\n") | map(select(length > 0))' || echo '[]')
 
     if [ "$included_filterlists_files_in_template" = "[]" ]; then
         last_error="ERROR: There is no list included in template $template_name"
         return 1
     fi
 
     # Create a JSON object with the template name as the key and the extracted domainsVariables and include data as values
     file_data_nameref=$(jq -n --arg template "$template" --argjson json_files_in_template "$json_files_in_template" --argjson included_filterlists_files_in_template "$included_filterlists_files_in_template" '
@@ -206,26 +262,20 @@ check_if_duplicated_domains() {
     fi
 }
 
 check_if_correct_domain() {
     local json_file_path="$1"
     local domains_variable_name="$2"
     local domains="$3"
     # Preselect candidates for invalid domains, then check them 
-    local candidates=$(grep -vP "^[a-zA-Z0-9]{1,63}\.[a-zA-Z]{2,24}$" <<< $domains || echo '')
-    local strong_candidates=$(grep -vP "^((([A-Za-z0-9][A-Za-z0-9-]{0,61})?[A-Za-z0-9]\.)+([A-Za-z]{2,63}|\*|xn--[a-zA-Z0-9-]{1,59}) )+$" <<< $candidates || echo '')
-    if [ -n "$strong_candidates" ]; then
-        for domain in $strong_candidates; do
-            # Check if the domain matches the expected pattern
-            if ! [[ $domain =~ ^(([A-Za-z0-9][A-Za-z0-9-]{0,61})?[A-Za-z0-9]\.)+([A-Za-z]{2,63}|\*|xn--[a-zA-Z0-9-]{1,59})$ ]]; then
-                last_error="ERROR: Invalid domain in $domains_variable_name: $domain in file $json_file_path"
-                return 1
-            fi
-        done
+    local candidates=$(grep -vP "^(([A-Za-z0-9][A-Za-z0-9-]{0,61})?[A-Za-z0-9]\.)+([A-Za-z]{2,63}|\*|xn--[a-zA-Z0-9-]{1,59})$" <<< $domains || echo '')
+    if [ -n "$candidates" ]; then
+      last_error="ERROR: Invalid domain in $domains_variable_name: $candidates in file $json_file_path"
+      return 1
     fi
 }
 
 check_if_correct_domains_variables() {
     local json_file_path="$1"
     local domains_variables_names="$2"
     local json_file_content="$3"
     check_if_duplicated_domains_variable_name_in_single_file "$json_file_path" "$domains_variables_names"
@@ -240,17 +290,17 @@ check_if_correct_domains_variables() {
 }
 
 update_domains_variables_data() {
     local domains_variables_names="$1"
     local json_file_path="$2"
     local json_file_content="$3"
 
     # Merge the new domains variables into the existing all_domains_variables_in_json_files array
-    all_domains_variables_in_json_files=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson domains_variables_names "$(echo "$domains_variables_names" | jq -R -s 'split("\n") | map(select(length > 0))')" '$all_domains_variables_in_json_files + $domains_variables_names | unique')
+    all_domains_variables_in_json_files=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson domains_variables_names "$(echo "$domains_variables_names" | jq -Rs 'split("\n") | map(select(length > 0))')" '$all_domains_variables_in_json_files + $domains_variables_names | unique')
 
     # Adding all the json files content to the variable was causing "Too big variable" error
     # Therefore, the content is saved to a temporary file and then loaded back to the variable
 
     all_json_files_temp_file=$(mktemp)
     echo "$all_json_files_contents" > "$all_json_files_temp_file"
     json_file_content_temp=$(mktemp)
     echo "$json_file_content" > "$json_file_content_temp"
@@ -345,22 +395,21 @@ update_matches_and_variables() {
     variables_in_included_filterlists=$(jq --arg key "$file_path" --argjson value "$domains_variables_collected_from_filterlist" '.[$key] = $value' <<< $variables_in_included_filterlists)
 }
 
 extract_domains_variables_in_included_filterlists() {
 
     local template_name="$1"
     local -n domains_variables_in_included_filterlists_nameref=$2
     # Extract the list of included filterlists from the template
-    local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< $templates_content)
+    local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content")
     domains_variables_in_included_filterlists_nameref=()
-
     for included_filterlist in $included_filterlists; do
         # Extract the domain variables from each included filterlist
-        local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< $variables_in_included_filterlists)
+        local domains_variables=$(jq -r --arg included_filterlist "$included_filterlist" '.[$included_filterlist][]' <<< "$variables_in_included_filterlists")
         for domain_variable in $domains_variables; do
             domains_variables_in_included_filterlists_nameref+=("$domain_variable")
         done
     done
 }
 
 extract_domains_variables_in_included_json_files() {
     local template_name="$1"
@@ -423,48 +472,46 @@ check_if_domains_variables_are_identical
         last_error+="Extra variables in filter lists:\n"
         last_error+=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" '$all_domain_variables_matches_in_filterlists - $all_domains_variables_in_json_files')
         return 1
     fi
 }
 
 
 main() {
-    check_git_status
+    get_unstaged_files
     check_if_jq_is_installed
     for template_name in *.txt; do
         templates_names+=("$template_name")
         # To avoid creating a subshell, the variable is passed as a reference to parse_template_data function
         # That helps with the error handling and allows to use global variables
         local file_data
         # Parse data from the template
         parse_template_data "$template_name" file_data
         # Update the templates_content JSON with the data from the file
         update_templates_content "$file_data"
 
         update_unique_json_files "$template_name" "$file_data"
         update_unique_includes "$template_name" "$file_data"
     done
 
     for domains_variables_path in ${unique_json_files[@]}; do
-        check_if_file_exists "$domains_variables_path"
         local staged_domains_variables_file
         get_staged_version_of_a_file "$domains_variables_path" staged_domains_variables_file
         check_if_correct_domains_variables_json_structure "$staged_domains_variables_file" "$domains_variables_path"
         local domains_variables_names_incl_duplicates
         get_domains_variables_names_incl_duplicates "$staged_domains_variables_file" domains_variables_names_incl_duplicates
 
         check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names_incl_duplicates" "$staged_domains_variables_file"
 
         check_if_duplicated_domains_variable_name_between_files "$domains_variables_path" "$domains_variables_names_incl_duplicates"
         update_domains_variables_data "$domains_variables_names_incl_duplicates" "$domains_variables_path" "$staged_domains_variables_file"
     done
 
     for filterlist_path in ${unique_filterlists_to_include[@]}; do
-        check_if_file_exists "$filterlist_path"
         local filterlist_content
         get_staged_version_of_a_file "$filterlist_path" filterlist_content
 
         check_for_similar_to_domain_variable  "$filterlist_content" "$filterlist_path"
         local all_lines_with_domain_variables_in_filterlist
         find_domain_variables_syntax_in_filterlist "$filterlist_content" "$filterlist_path" all_lines_with_domain_variables_in_filterlist
 
         if [ -z "$all_lines_with_domain_variables_in_filterlist" ]; then
@@ -507,46 +554,64 @@ check_unit_tests() {
         last_error+="To check the details run 'pre-commit-src/tests/pre-commit-tests.sh'"
         return $exit_status
     else
         last_error="Unit tests passed successfully"
     fi
 }
 
 check_pre_commit_files() {
-    pre_commit_git_status=$(git status :pre-commit-src/pre-commit)
-    logs_for_unstaged_changes_in_pre_commit=$(grep -s "Changes not staged for commit" <<< $pre_commit_git_status || grep -s "Untracked files" <<< $pre_commit_git_status || echo '')
-    if ! [ -z "$logs_for_unstaged_changes_in_pre_commit" ]; then
+    local staged_files
+    get_staged_files staged_files
+
+    local pre_commit_path='pre-commit-src/pre-commit'
+    check_if_file_exists "$pre_commit_path"
+
+    local matched_pre_commit_in_unstaged=$(grep -x "$pre_commit_path" <<< "$unstaged_files" || echo '')
+    if [ -n "$matched_pre_commit_in_unstaged" ]; then
         last_error="Unstaged changes detected in pre-commit file. Stage pre-commit changes before continuing."
         return 1
     fi
 
-    pre_commit_tests_git_status=$(git status :pre-commit-src/tests/pre-commit-tests.sh)
-    logs_for_unstaged_changes_in_pre_commit_tests=$(grep -s "Changes not staged for commit" <<< $pre_commit_tests_git_status || grep -s "Untracked files" <<< $pre_commit_tests_git_status || echo '')
-    if ! [ -z "$logs_for_unstaged_changes_in_pre_commit_tests" ]; then
-        last_error="Unstaged changes detected in pre-commit-tests file. Stage pre-commit tests changes before continuing."
+    local pre_commit_script_path='pre-commit-src/pre-commit-script.sh'
+    check_if_file_exists "$pre_commit_script_path"
+    local matched_pre_commit_script_in_unstaged='' # This file will go with the next commit. To better show what changed since the last push I decided to first push it without name change $(grep -x "$pre_commit_script_path" <<< "$unstaged_files" || echo '')
+    if [ -n "$matched_pre_commit_script_in_unstaged" ]; then
+        last_error="Unstaged changes detected in pre-commit-script.sh file. Stage pre-commit-script changes before continuing."
         return 1
     fi
 
+    pre_commit_tests_path='pre-commit-src/tests/pre-commit-tests.sh'
+    check_if_file_exists "$pre_commit_tests_path"
+    local matched_pre_commit_tests_in_unstaged=$(grep -x "$pre_commit_tests_path" <<< "$unstaged_files" || echo '')
+    if [ -n "$matched_pre_commit_tests_in_unstaged" ]; then
+        last_error="Unstaged changes detected in pre-commit-tests file. Stage pre-commit tests changes before continuing."
+        return 1
+    fi
     # Only if something changed in pre commit or pre commit tests the unit tests should be run
-    logs_for_commited_changes_in_pre_commit_or_test=$(grep -s "Changes to be committed" <<< $pre_commit_tests_git_status || grep -s "Changes to be committed" <<< $pre_commit_git_status || echo '')
-    if ! [ -z "$logs_for_commited_changes_in_pre_commit_or_test" ]; then
+    if [ -z "$matched_pre_commit_in_unstaged" + "$matched_pre_commit_script_in_unstaged" "$matched_pre_commit_tests_in_unstaged" ]; then
         check_unit_tests
     fi
 }
 
-
 # For testing purposes only if the script has no arguments or the argument is main the process should run
 # thanks to that the script can be tested without running the main function
 if [ -z "${1:-}" ]; then
     echo 'Running pre-commit checks...'
     main
     check_pre_commit_files
 
     echo "Pre-commit checks passed successfully. Double check if there were no error messages above this message before pushing"
+    unalias jq
+    unalias grep
+    unalias git
     exit 0
 
 elif [ "$1" = "--load-only" ]; then
     testing=true
     echo "Script loaded successfully"
 else
-    "$@"
+    echo Wrong option provided "$@"
+    unalias jq
+    unalias grep
+    unalias git
+    exit 1
 fi
--- a/pre-commit-src/tests/pre-commit-tests.sh
+++ b/pre-commit-src/tests/pre-commit-tests.sh
@@ -6,17 +6,17 @@ error_handler() {
     local exit_code=$?
     local line_number=$1
     echo "Error on line $line_number. Exit code: $exit_code"
     exit $exit_code
 }
 trap 'error_handler $LINENO' ERR
 
 # Load the pre-commit script
-source ./pre-commit-src/pre-commit --load-only
+. ./pre-commit-src/pre-commit-script.sh --load-only
 
 score=0
 total=0
 
 mode="${1:-}"
 
 # Function to check if a file exists
 check_if_file_exists() {
@@ -495,20 +495,25 @@ test_check_if_duplicated_domains_variabl
 test_find_find_domain_variables_syntax_in_filterlist
 test_process_filters
 test_update_matches_and_variables
 test_extract_domains_variables_in_included_filterlists
 test_extract_domains_variables_in_included_json_files
 test_check_domain_variables_in_filterlists
 test_check_if_domains_variables_are_identical_in_lists_and_jsons
 
+# Remove aliases created in pre-commit-script.sh
+unalias jq
+unalias grep
+unalias git
+
 echo -e '\n-------------------------------------------\n'
 if [ $score -eq $total ]; then
     if [ "$mode" != '--no-verbose' ]; then
         echo 'All tests from pre-commit-tests.sh passed.'
     fi
     exit 0
 else
     if [ "$mode" != '--no-verbose' ]; then
         echo "$((total-score)) tests from pre-commit-tests.sh failed out of $total in total."
     fi
     exit 1
-fi
\ No newline at end of file
+fi