pre-commit-src/pre-commit
changeset 25710 b3dddcc5f208
parent 25709 ca5e4f738e57
child 25711 2bb339143f01
--- a/pre-commit-src/pre-commit
+++ b/pre-commit-src/pre-commit
@@ -89,19 +89,19 @@ get_staged_version_of_a_file() {
 parse_template_data() {
     local template="$1"
     local -n file_data_nameref=$2
     local staged_template
 
     get_staged_version_of_a_file "$template" staged_template
 
     # Extract lines starting with %domainsVariables exceptionrules: and process them with jq
-    local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< "$staged_template" | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]')
+    local json_files_in_template=$(grep "^%domainsVariables exceptionrules:" <<< $staged_template | sed 's/^%domainsVariables exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]')
     # Extract lines starting with %include exceptionrules: and process them with jq
-    local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< "$staged_template" | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]')
+    local included_filterlists_files_in_template=$(grep "^%include exceptionrules:" <<< $staged_template | sed 's/^%include exceptionrules://; s/%$//' | jq -R -s 'split("\n") | map(select(length > 0))' || echo '[]')
 
     if [ "$included_filterlists_files_in_template" = "[]" ]; then
         last_error="ERROR: There is no list included in template $template_name"
         return 1
     fi
 
     # Create a JSON object with the template name as the key and the extracted domainsVariables and include data as values
     file_data_nameref=$(jq -n --arg template "$template" --argjson json_files_in_template "$json_files_in_template" --argjson included_filterlists_files_in_template "$included_filterlists_files_in_template" '
@@ -118,42 +118,42 @@ update_templates_content() {
     # Merge the new template data into the existing templates_content JSON
     templates_content=$(jq -n --argjson templates_content "$templates_content" --argjson file_template "$file_template" '$templates_content + $file_template')
 }
 
 update_unique_json_files() {
     local template_name="$1"
     local file_data="$2"
     # Extract the list of domainsVariables files from the template data
-    local json_files_list=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$file_data")
+    local json_files_list=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< $file_data)
     for json_file in $json_files_list; do
-        local matches_in_unique_json_files=$(grep -swF "$json_file" <<< "${unique_json_files[@]}" || echo '')
+        local matches_in_unique_json_files=$(grep -swF "$json_file" <<< ${unique_json_files[@]} || echo '')
         if [ -z "$matches_in_unique_json_files" ]; then
             unique_json_files+=("$json_file")
         fi
     done
 }
 
 update_unique_includes() {
     local template_name="$1"
     local file_data="$2"
     # Extract the list of included files from the template data
-    local included_files_list=$(jq -r --arg file "$template_name" '.[$file].include[]' <<< "$file_data")
+    local included_files_list=$(jq -r --arg file "$template_name" '.[$file].include[]' <<< $file_data)
 
     for included_filterlist in $included_files_list; do
-        local matches_in_unique_filterlists=$(grep -swF "$included_filterlist" <<< "${unique_filterlists_to_include[@]}" || echo '')
+        local matches_in_unique_filterlists=$(grep -swF "$included_filterlist" <<< ${unique_filterlists_to_include[@]} || echo '')
         if [ -z "$matches_in_unique_filterlists" ]; then
             unique_filterlists_to_include+=("$included_filterlist")
         fi
     done
 }
 
 check_if_valid_json() {
     local json_file_path="$1"
-    if ! jq -e . >/dev/null 2>&1 <<< "$(cat "$json_file_path")"; then
+    if ! jq -e . >/dev/null 2>&1 <<< $(cat "$json_file_path"); then
         last_error="ERROR: Invalid JSON content in $json_file_path"
         return 1
     fi
 }
 
 get_domains_variables_names_incl_duplicates() {
     local json_file_content="$1"
     local -n domains_variables_names_incl_duplicated_nameref=$2
@@ -205,162 +205,179 @@ check_if_duplicated_domains() {
         return 1
     fi
 }
 
 check_if_correct_domain() {
     local json_file_path="$1"
     local domains_variable_name="$2"
     local domains="$3"
-    for domain in $domains; do
-        # Check if the domain matches the expected pattern
-        if [ -z $(grep -sP "^(?:(?:(?!-)[A-Za-z0-9-]{1,63}(?<!-)\.)*(?:[A-Za-z]{2,})$)|(?:^(?:(?!-)[A-Za-z0-9-]{1,63}(?<!-)\.)+\*)$" <<< "$domain") ]; then
-            last_error="ERROR: Invalid domain in $domains_variable_name: $domain in file $json_file_path"
-            return 1
-        fi
-    done
+    # Preselect candidates for invalid domains, then check them 
+    local candidates=$(grep -vP "^[a-zA-Z0-9]{1,63}\.[a-zA-Z]{2,24}$" <<< $domains || echo '')
+    local strong_candidates=$(grep -vP "^((([A-Za-z0-9][A-Za-z0-9-]{0,61})?[A-Za-z0-9]\.)+([A-Za-z]{2,63}|\*|xn--[a-zA-Z0-9-]{1,59}) )+$" <<< $candidates || echo '')
+    if [ -n "$strong_candidates" ]; then
+        for domain in $strong_candidates; do
+            # Check if the domain matches the expected pattern
+            if ! [[ $domain =~ ^(([A-Za-z0-9][A-Za-z0-9-]{0,61})?[A-Za-z0-9]\.)+([A-Za-z]{2,63}|\*|xn--[a-zA-Z0-9-]{1,59})$ ]]; then
+                last_error="ERROR: Invalid domain in $domains_variable_name: $domain in file $json_file_path"
+                return 1
+            fi
+        done
+    fi
 }
 
 check_if_correct_domains_variables() {
     local json_file_path="$1"
     local domains_variables_names="$2"
     local json_file_content="$3"
     check_if_duplicated_domains_variable_name_in_single_file "$json_file_path" "$domains_variables_names"
     for domains_variable_name in $domains_variables_names; do
         check_if_valid_domains_variable_name "$domains_variable_name"
         # Extract the value associated with the domains variable name from the JSON file
-        local domains=$(jq -r --arg key "$domains_variable_name" '.[$key][]' <<< "$json_file_content")
+        local domains=$(jq -r --arg key "$domains_variable_name" '.[$key][]' <<< $json_file_content)
 
         check_if_duplicated_domains "$json_file_path" "$domains_variable_name" "$domains"
         check_if_correct_domain "$json_file_path" "$domains_variable_name" "$domains"
     done
 }
 
 update_domains_variables_data() {
     local domains_variables_names="$1"
     local json_file_path="$2"
-    local json_file_content=$(jq -c . < "$json_file_path")
-    
+    local json_file_content="$3"
+
     # Merge the new domains variables into the existing all_domains_variables_in_json_files array
     all_domains_variables_in_json_files=$(jq -n --argjson all_domains_variables_in_json_files "$all_domains_variables_in_json_files" --argjson domains_variables_names "$(echo "$domains_variables_names" | jq -R -s 'split("\n") | map(select(length > 0))')" '$all_domains_variables_in_json_files + $domains_variables_names | unique')
 
-    # Add the JSON file content to the all_json_files_contents object
-    all_json_files_contents=$(echo "$all_json_files_contents" | jq --arg json_file_path "$json_file_path" --argjson json_file_content "$json_file_content" '.[$json_file_path] = $json_file_content')
+    # Adding all the json files content to the variable was causing "Too big variable" error
+    # Therefore, the content is saved to a temporary file and then loaded back to the variable
+
+    all_json_files_temp_file=$(mktemp)
+    echo "$all_json_files_contents" > "$all_json_files_temp_file"
+    json_file_content_temp=$(mktemp)
+    echo "$json_file_content" > "$json_file_content_temp"
+
+    all_json_files_contents=$(jq --arg json_file_path "$json_file_path" --slurpfile json_file_content "$json_file_content_temp" '.[$json_file_path] = $json_file_content[0]' "$all_json_files_temp_file")
+
+    rm "$json_file_content_temp"
+    rm "$all_json_files_temp_file"
 
     # Update the variables_in_json_files object with the keys from the JSON file
     variables_in_json_files=$(echo "$variables_in_json_files" | jq --arg key "$json_file_path" --argjson value "$(echo "$all_json_files_contents" | jq 'to_entries | map({key: .key, value: (.value | keys)}) | from_entries' | jq --arg key "$json_file_path" '.[$key]')" '. * {($key): $value}')
 }
 
 check_if_duplicated_domains_variable_name_between_files() {
     local json_file_path="$1"
     local domains_variables_names="$2"
     for domains_variable_name in $domains_variables_names; do
 
         # Check if the domains variable name exists in the all_domains_variables_in_json_files array
-        if jq -e --arg name "$domains_variable_name" 'index($name) != null' <<< "$all_domains_variables_in_json_files" >/dev/null 2>&1; then
+        if jq -e --arg name "$domains_variable_name" 'index($name) != null' <<< $all_domains_variables_in_json_files >/dev/null 2>&1; then
             # If a duplicate is found, identify the file containing the duplicate and report an error
             local file_with_duplicate=$(jq -r --arg name "$domains_variable_name" '
             to_entries | map(select(.value | has($name))) | .[0].key
-            ' <<< "$all_json_files_contents")
+            ' <<< $all_json_files_contents)
             last_error="ERROR: Duplicate domains variable found in $json_file_path and $file_with_duplicate file: $domains_variable_name"
             return 1
         fi
     done
 }
 
 find_domain_variables_syntax_in_filterlist() {
     local filterlist_content="$1"
     local filterlist_path="$2"
     local -n all_lines_with_domain_variables_in_filterlist_nameref=$3
     # Find lines containing domain variables in the filterlist
     # The regex to find them is simpler than in filterlist delivery to also catch domains variables in the wrong place
     # without starting with a complex regex. The full regex is in one of the next steps
-    all_lines_with_domain_variables_in_filterlist_nameref=$(grep -P '%<\{.*\}>%' <<< "$filterlist_content" || echo '')
+    all_lines_with_domain_variables_in_filterlist_nameref=$(grep -P '%<\{.*\}>%' <<< $filterlist_content || echo '')
 }
 
-check_for_simiar_to_domain_variable() {
+check_for_similar_to_domain_variable() {
     local filterlist_content="$1"
     local filterlist_path="$2"
 
-    local regex_for_missing_beginning='([^%]|^)<\{|%[^<]?\{|%<[^{]'
-    local regex_for_missing_ending='[^}]>%|\}[^>]?%|\}>([^%]|$)'
-    local full_regex_for_missing_character="($regex_for_missing_beginning).*($regex_for_missing_ending|\}>%)|($regex_for_missing_beginning|%<\{).*($regex_for_missing_ending)"
-    local lines_with_similar_to_domain_variable
 
-    lines_with_similar_to_domain_variable=$(grep -P "$full_regex_for_missing_character" <<< "$filterlist_content" || echo '')
-    if [ -n "$lines_with_similar_to_domain_variable" ]; then
-        last_error="ERROR: Found a line in $filterlist_path that is similar to a domain variable, but it's not a domain variable:\n\n"
-        last_error+="$lines_with_similar_to_domain_variable"
-        return 1
+    # Precheck to quickly gather only the potentiall candidates for the full check
+    local weak_candidates=$(grep -P "[%{}<>].\w+.[%{}<>]" <<< $filterlist_content || echo '')
+    local candidates=$(grep -P "[%{}<>].?[%{}<>].*[%{}<>].?[%{}<>]" <<< $weak_candidates || echo '')
+    local strong_candidates=$(grep -oP "([^%{}<>]|^)[%{}<>]+.?[%{}<>]+.*[%{}<>]+.?[%{}<>]+([^%{}<>]|$)" <<< $candidates || echo '')
+    if [ -n "$strong_candidates" ]; then
+        local lines_with_similar_to_domain_variable=$(grep -vP "([|,=]|^)%<\{\w+\}>%([,|#]|$)" <<< $candidates || echo '')
+        if [ -n "$lines_with_similar_to_domain_variable" ]; then
+            last_error="ERROR: Found a line in $filterlist_path that is similar to a domain variable, but it's not a domain variable:\n\n"
+            last_error+="$lines_with_similar_to_domain_variable"
+            return 1
+        fi
     fi
     }
 
 process_filters() {
     local all_lines_with_domain_variables_in_filterlist="$1"
     local -n domains_variables_collected_from_filterlist_nameref="$2"
     domains_variables_collected_from_filterlist_nameref='[]'
 
     for filter in $all_lines_with_domain_variables_in_filterlist; do
         # Extract the domain variable from the filter
-        local domains_variable_match=$(grep -oP '(?<=%<\{).*?(?=\}>%)' <<< "$filter")
+        local domains_variable_match=$(grep -oP '(?<=%<\{).*?(?=\}>%)' <<< $filter)
         if [ "$(echo "$domains_variable_match" | wc -l)" -gt 1 ]; then
             last_error="ERROR: More than 2 domain variables found in filter: $filter"
             return 1
         fi
         # Ensure the domain variable is correctly formatted in the filter
         local true_matches
-        true_matches=$(grep -P '(%<{(\w+)}>%(?:,~?[a-zA-Z0-9*.~-]+)*#[?@$]?#)|([,$]domain=(?:[a-zA-Z0-9*.~-]+\|)*%<{(\w+)}>%)' <<< "$filter")
+        true_matches=$(grep -P '(%<{(\w+)}>%(?:,~?[a-zA-Z0-9*.~-]+)*#[?@$]?#)|([,$]domain=(?:[a-zA-Z0-9*.~-]+\|)*%<{(\w+)}>%)' <<< $filter)
 
         if [ -z "$true_matches" ]; then
             last_error="ERROR: Domain variable added in a wrong way in filter: $filter"
             return 1
         fi
-        domains_variables_collected_from_filterlist_nameref=$(jq --arg domains_variable_match "$domains_variable_match" '. + [$domains_variable_match]' <<< "$domains_variables_collected_from_filterlist_nameref")
+        domains_variables_collected_from_filterlist_nameref=$(jq --arg domains_variable_match "$domains_variable_match" '. + [$domains_variable_match]' <<< $domains_variables_collected_from_filterlist_nameref)
     done
 }
 
 update_matches_and_variables() {
     local domains_variables_collected_from_filterlist="$1"
     local file_path="$2"
     
     # Update the list of all domain variable matches in filterlists
     all_domain_variables_matches_in_filterlists=$(jq -n --argjson all_domain_variables_matches_in_filterlists "$all_domain_variables_matches_in_filterlists" --argjson matches "$domains_variables_collected_from_filterlist" '$all_domain_variables_matches_in_filterlists + $matches | unique')
 
     # Update the variables_in_included_filterlists object with the matches from the current filterlist
-    variables_in_included_filterlists=$(jq --arg key "$file_path" --argjson value "$domains_variables_collected_from_filterlist" '.[$key] = $value' <<< "$variables_in_included_filterlists")
+    variables_in_included_filterlists=$(jq --arg key "$file_path" --argjson value "$domains_variables_collected_from_filterlist" '.[$key] = $value' <<< $variables_in_included_filterlists)
 }
 
 extract_domains_variables_in_included_filterlists() {
 
     local template_name="$1"
     local -n domains_variables_in_included_filterlists_nameref=$2
     # Extract the list of included filterlists from the template
-    local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content")
+    local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< $templates_content)
     domains_variables_in_included_filterlists_nameref=()
 
     for included_filterlist in $included_filterlists; do
         # Extract the domain variables from each included filterlist
-        local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< "$variables_in_included_filterlists")
+        local domains_variables=$(jq -r --arg key "$included_filterlist" '.[$key][]' <<< $variables_in_included_filterlists)
         for domain_variable in $domains_variables; do
             domains_variables_in_included_filterlists_nameref+=("$domain_variable")
         done
     done
 }
 
 extract_domains_variables_in_included_json_files() {
     local template_name="$1"
     local -n domains_variables_in_included_json_files_nameref=$2
 
     # Extract the list of included JSON files from the template
-    local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content")
+    local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< $templates_content)
     domains_variables_in_included_json_files_nameref=()
 
     for included_json_file in $included_json_files; do
         # Extract the domain variables from each included JSON file
-        local domains_variables=$(jq -r --arg key "$included_json_file" '.[$key][]' <<< "$variables_in_json_files")
+        local domains_variables=$(jq -r --arg key "$included_json_file" '.[$key][]' <<< $variables_in_json_files)
         for domain_variable in $domains_variables; do
             domains_variables_in_included_json_files_nameref+=("$domain_variable")
         done
     done
 }
 
 check_domain_variables_in_filterlists() {
     local template_name="$1"
@@ -371,18 +388,18 @@ check_domain_variables_in_filterlists() 
     if [ -n "$2" ]; then
         domains_variables_in_included_filterlists=($2)
     fi
     if [ -n "$3" ]; then
         domains_variables_in_included_json_files=($3)
     fi
 
     # Extract the list of included filterlists and JSON files from the template
-    local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< "$templates_content")
-    local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< "$templates_content")
+    local included_filterlists=$(jq -r --arg template_name "$template_name" '.[$template_name].include[]' <<< $templates_content)
+    local included_json_files=$(jq -r --arg template_name "$template_name" '.[$template_name].domainsVariables[]' <<< $templates_content)
 
     for domain_variable_in_filterlist in ${domains_variables_in_included_filterlists[@]}; do
         local found=false
         for domain_variable_in_json_file in ${domains_variables_in_included_json_files[@]}; do
             if [ "$domain_variable_in_filterlist" = "$domain_variable_in_json_file" ]; then
                 found=true
                 break
             fi
@@ -429,30 +446,29 @@ main() {
 
     for domains_variables_path in ${unique_json_files[@]}; do
         check_if_file_exists "$domains_variables_path"
         local staged_domains_variables_file
         get_staged_version_of_a_file "$domains_variables_path" staged_domains_variables_file
         check_if_correct_domains_variables_json_structure "$staged_domains_variables_file" "$domains_variables_path"
         local domains_variables_names_incl_duplicates
         get_domains_variables_names_incl_duplicates "$staged_domains_variables_file" domains_variables_names_incl_duplicates
-        
+
         check_if_correct_domains_variables "$domains_variables_path" "$domains_variables_names_incl_duplicates" "$staged_domains_variables_file"
 
         check_if_duplicated_domains_variable_name_between_files "$domains_variables_path" "$domains_variables_names_incl_duplicates"
-        update_domains_variables_data "$domains_variables_names_incl_duplicates" "$domains_variables_path"
+        update_domains_variables_data "$domains_variables_names_incl_duplicates" "$domains_variables_path" "$staged_domains_variables_file"
     done
 
     for filterlist_path in ${unique_filterlists_to_include[@]}; do
         check_if_file_exists "$filterlist_path"
         local filterlist_content
         get_staged_version_of_a_file "$filterlist_path" filterlist_content
 
-        check_for_simiar_to_domain_variable  "$filterlist_content" "$filterlist_path"
-
+        check_for_similar_to_domain_variable  "$filterlist_content" "$filterlist_path"
         local all_lines_with_domain_variables_in_filterlist
         find_domain_variables_syntax_in_filterlist "$filterlist_content" "$filterlist_path" all_lines_with_domain_variables_in_filterlist
 
         if [ -z "$all_lines_with_domain_variables_in_filterlist" ]; then
             # In case of lack of matches, the value of all_lines_with_domain_variables_in_filterlist should have just
             # a message to show.
             local domains_variables_collected_from_filterlist='[]'
         else
@@ -474,24 +490,26 @@ main() {
 
     check_if_domains_variables_are_identical_in_lists_and_jsons
 }
 
 check_unit_tests() {
     local exit_status=0
 
 
-    ./pre-commit-src/tests/pre-commit-tests.sh || exit_status=1
+    ./pre-commit-src/tests/pre-commit-tests.sh '--no-verbose' || exit_status=1
     function_exit_code=$?
     if [ $exit_status -ne 1 ]; then
         exit_status=$function_exit_code
     fi
 
     if [ $exit_status -ne 0 ]; then
         last_error="Unit tests failed with exit code $exit_status"
+        last_error+="\nPlease fix the issues before continuing"
+        last_error+="To check the details run 'pre-commit-src/tests/pre-commit-tests.sh'"
         return $exit_status
     else
         last_error="Unit tests passed successfully"
     fi
 }
 
 check_pre_commit_files() {
     pre_commit_git_status=$(git status :pre-commit-src/pre-commit)
@@ -513,17 +531,18 @@ check_pre_commit_files() {
     if ! [ -z "$logs_for_commited_changes_in_pre_commit_or_test" ]; then
         check_unit_tests
     fi
 }
 
 
 # For testing purposes only if the script has no arguments or the argument is main the process should run
 # thanks to that the script can be tested without running the main function
-if [ -z "${1:-}" ] || [ "$1" = "main" ]; then
+if [ -z "${1:-}" ]; then
+    echo 'Running pre-commit checks...'
     main
     check_pre_commit_files
 
     echo "Pre-commit checks passed successfully. Double check if there were no error messages above this message before pushing"
     exit 0
 
 elif [ "$1" = "--load-only" ]; then
     testing=true