Add some throttling and verbose
This commit is contained in:
parent
dd21ae23ed
commit
672b265259
@ -2,7 +2,12 @@
|
||||
#! nix-shell -i bash -p htmlq
|
||||
|
||||
search_args=$1
|
||||
recipies_per_search=500
|
||||
throttling=$2
|
||||
recipies_per_search=50
|
||||
|
||||
if [[ -z "$2" ]]; then
|
||||
throttling=0
|
||||
fi
|
||||
|
||||
|
||||
update_cache() {
|
||||
@ -62,12 +67,20 @@ get_search_urls() {
|
||||
}
|
||||
|
||||
|
||||
echo "Welcome! Beginning scrape"
|
||||
|
||||
json_results="[]"
|
||||
|
||||
total_search_pages=$(get_number_of_search_pages)
|
||||
|
||||
echo
|
||||
echo "Scraping $total_search_pages search pages"
|
||||
echo "Each has a max of $recipies_per_search recipes"
|
||||
|
||||
# For each of the search pages...
|
||||
for page in $(seq $(get_number_of_search_pages)); do
|
||||
for page in $(seq $total_search_pages); do
|
||||
# for page in {1..2}; do # For testing only do a few pages
|
||||
echo "Starting search page $page..."
|
||||
|
||||
# Make an array to store the main ingredients
|
||||
declare -A main_ingredients
|
||||
@ -76,9 +89,11 @@ for page in $(seq $(get_number_of_search_pages)); do
|
||||
urls=$(get_search_urls $page)
|
||||
declare -i count=0
|
||||
for url in $(get_search_urls $page); do
|
||||
echo "Recipe $count done"
|
||||
ingredients=$(get_main_ingredients $url)
|
||||
main_ingredients[$count]=$(echo "$ingredients" | awk '$1=$1' ORS=' - ' | sed 's/ - $//')
|
||||
count+=1
|
||||
sleep $throttling
|
||||
done
|
||||
|
||||
# Now process each simple_json from the search page adding in the ingredients
|
||||
|
Loading…
x
Reference in New Issue
Block a user