Add some throttling and verbose
This commit is contained in:
parent
dd21ae23ed
commit
672b265259
@ -2,7 +2,12 @@
|
|||||||
#! nix-shell -i bash -p htmlq
|
#! nix-shell -i bash -p htmlq
|
||||||
|
|
||||||
search_args=$1
|
search_args=$1
|
||||||
recipies_per_search=500
|
throttling=$2
|
||||||
|
recipies_per_search=50
|
||||||
|
|
||||||
|
if [[ -z "$2" ]]; then
|
||||||
|
throttling=0
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
update_cache() {
|
update_cache() {
|
||||||
@ -62,12 +67,20 @@ get_search_urls() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
echo "Welcome! Beginning scrape"
|
||||||
|
|
||||||
json_results="[]"
|
json_results="[]"
|
||||||
|
|
||||||
|
total_search_pages=$(get_number_of_search_pages)
|
||||||
|
|
||||||
|
echo
|
||||||
|
echo "Scraping $total_search_pages search pages"
|
||||||
|
echo "Each has a max of $recipies_per_search recipes"
|
||||||
|
|
||||||
# For each of the search pages...
|
# For each of the search pages...
|
||||||
for page in $(seq $(get_number_of_search_pages)); do
|
for page in $(seq $total_search_pages); do
|
||||||
# for page in {1..2}; do # For testing only do a few pages
|
# for page in {1..2}; do # For testing only do a few pages
|
||||||
|
echo "Starting search page $page..."
|
||||||
|
|
||||||
# Make an array to store the main ingredients
|
# Make an array to store the main ingredients
|
||||||
declare -A main_ingredients
|
declare -A main_ingredients
|
||||||
@ -76,9 +89,11 @@ for page in $(seq $(get_number_of_search_pages)); do
|
|||||||
urls=$(get_search_urls $page)
|
urls=$(get_search_urls $page)
|
||||||
declare -i count=0
|
declare -i count=0
|
||||||
for url in $(get_search_urls $page); do
|
for url in $(get_search_urls $page); do
|
||||||
|
echo "Recipe $count done"
|
||||||
ingredients=$(get_main_ingredients $url)
|
ingredients=$(get_main_ingredients $url)
|
||||||
main_ingredients[$count]=$(echo "$ingredients" | awk '$1=$1' ORS=' - ' | sed 's/ - $//')
|
main_ingredients[$count]=$(echo "$ingredients" | awk '$1=$1' ORS=' - ' | sed 's/ - $//')
|
||||||
count+=1
|
count+=1
|
||||||
|
sleep $throttling
|
||||||
done
|
done
|
||||||
|
|
||||||
# Now process each simple_json from the search page adding in the ingredients
|
# Now process each simple_json from the search page adding in the ingredients
|
||||||
|
Loading…
x
Reference in New Issue
Block a user