Add some throttling and verbose

This commit is contained in:
CactiChameleon9 2024-12-23 22:37:34 +00:00
parent dd21ae23ed
commit 672b265259

View File

@ -2,7 +2,12 @@
#! nix-shell -i bash -p htmlq
search_args=$1
recipies_per_search=500
throttling=$2
recipies_per_search=50
if [[ -z "$2" ]]; then
throttling=0
fi
update_cache() {
@ -62,12 +67,20 @@ get_search_urls() {
}
echo "Welcome! Beginning scrape"
json_results="[]"
total_search_pages=$(get_number_of_search_pages)
echo
echo "Scraping $total_search_pages search pages"
echo "Each has a max of $recipies_per_search recipes"
# For each of the search pages...
for page in $(seq $(get_number_of_search_pages)); do
for page in $(seq $total_search_pages); do
# for page in {1..2}; do # For testing only do a few pages
echo "Starting search page $page..."
# Make an array to store the main ingredients
declare -A main_ingredients
@ -76,9 +89,11 @@ for page in $(seq $(get_number_of_search_pages)); do
urls=$(get_search_urls $page)
declare -i count=0
for url in $(get_search_urls $page); do
echo "Recipe $count done"
ingredients=$(get_main_ingredients $url)
main_ingredients[$count]=$(echo "$ingredients" | awk '$1=$1' ORS=' - ' | sed 's/ - $//')
count+=1
sleep $throttling
done
# Now process each simple_json from the search page adding in the ingredients