Add some throttling and verbose

This commit is contained in:
CactiChameleon9 2024-12-23 22:37:34 +00:00
parent dd21ae23ed
commit 672b265259

View File

@ -2,7 +2,12 @@
#! nix-shell -i bash -p htmlq #! nix-shell -i bash -p htmlq
search_args=$1 search_args=$1
recipies_per_search=500 throttling=$2
recipies_per_search=50
if [[ -z "$2" ]]; then
throttling=0
fi
update_cache() { update_cache() {
@ -62,12 +67,20 @@ get_search_urls() {
} }
echo "Welcome! Beginning scrape"
json_results="[]" json_results="[]"
total_search_pages=$(get_number_of_search_pages)
echo
echo "Scraping $total_search_pages search pages"
echo "Each has a max of $recipies_per_search recipes"
# For each of the search pages... # For each of the search pages...
for page in $(seq $(get_number_of_search_pages)); do for page in $(seq $total_search_pages); do
# for page in {1..2}; do # For testing only do a few pages # for page in {1..2}; do # For testing only do a few pages
echo "Starting search page $page..."
# Make an array to store the main ingredients # Make an array to store the main ingredients
declare -A main_ingredients declare -A main_ingredients
@ -76,9 +89,11 @@ for page in $(seq $(get_number_of_search_pages)); do
urls=$(get_search_urls $page) urls=$(get_search_urls $page)
declare -i count=0 declare -i count=0
for url in $(get_search_urls $page); do for url in $(get_search_urls $page); do
echo "Recipe $count done"
ingredients=$(get_main_ingredients $url) ingredients=$(get_main_ingredients $url)
main_ingredients[$count]=$(echo "$ingredients" | awk '$1=$1' ORS=' - ' | sed 's/ - $//') main_ingredients[$count]=$(echo "$ingredients" | awk '$1=$1' ORS=' - ' | sed 's/ - $//')
count+=1 count+=1
sleep $throttling
done done
# Now process each simple_json from the search page adding in the ingredients # Now process each simple_json from the search page adding in the ingredients