Clean up according to shellcheck
This commit is contained in:
parent
125995cba3
commit
a6c872adae
@ -15,20 +15,22 @@ update_cache() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
get_number_of_search_pages() {
|
get_number_of_search_pages() {
|
||||||
local results=$(curl "https://www.bbcgoodfood.com/api/search-frontend/search?limit=1&${search_args}" -s | jq '.searchResults.totalItems')
|
local results
|
||||||
|
results=$(curl "https://www.bbcgoodfood.com/api/search-frontend/search?limit=1&${search_args}" -s | jq '.searchResults.totalItems')
|
||||||
|
|
||||||
if [[ $(($results % recipies_per_search)) == 0 ]]; then
|
if [[ $((results % recipies_per_search)) == 0 ]]; then
|
||||||
echo $(($results / $recipies_per_search))
|
echo $((results / recipies_per_search))
|
||||||
else
|
else
|
||||||
echo $(($results / $recipies_per_search + 1))
|
echo $((results / recipies_per_search + 1))
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
get_main_ingredients() {
|
get_main_ingredients() {
|
||||||
|
local data
|
||||||
if [[ -z "$1" ]]; then
|
if [[ -z "$1" ]]; then
|
||||||
local data=$cache
|
data=$cache
|
||||||
else
|
else
|
||||||
local data=$(curl $1 -s)
|
data=$(curl "$1" -s)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "$data" | htmlq -p | grep '<a class="link link--styled" data-component="Link" href="/glossary/.*">.*</a>' | sed 's/^.*<a class="link link--styled" data-component="Link" href=".*">//' | sed 's/<\/a>.*//' | sed 's/,//'
|
echo "$data" | htmlq -p | grep '<a class="link link--styled" data-component="Link" href="/glossary/.*">.*</a>' | sed 's/^.*<a class="link link--styled" data-component="Link" href=".*">//' | sed 's/<\/a>.*//' | sed 's/,//'
|
||||||
@ -36,10 +38,11 @@ get_main_ingredients() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
get_page_json() { # Unused - but could be helpful for expanding this out layer
|
get_page_json() { # Unused - but could be helpful for expanding this out layer
|
||||||
|
local data
|
||||||
if [[ -z "$1" ]]; then
|
if [[ -z "$1" ]]; then
|
||||||
local data=$cache
|
data=$cache
|
||||||
else
|
else
|
||||||
local data=$(curl $1 -s)
|
data=$(curl "$1" -s)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "$data" | htmlq -p | grep '<script data-testid="page-schema" type="application/ld+json">' -A1 | tail -n1 | sed 's/<\/script>//' | jq
|
echo "$data" | htmlq -p | grep '<script data-testid="page-schema" type="application/ld+json">' -A1 | tail -n1 | sed 's/<\/script>//' | jq
|
||||||
@ -48,7 +51,8 @@ get_page_json() { # Unused - but could be helpful for expanding this out layer
|
|||||||
get_simple_search_json() {
|
get_simple_search_json() {
|
||||||
local page=$1
|
local page=$1
|
||||||
|
|
||||||
local data=$(curl "https://www.bbcgoodfood.com/api/search-frontend/search?limit=${recipies_per_search}&page=${page}&${search_args}" -s)
|
local data
|
||||||
|
data=$(curl "https://www.bbcgoodfood.com/api/search-frontend/search?limit=${recipies_per_search}&page=${page}&${search_args}" -s)
|
||||||
|
|
||||||
echo "$data" | jq '.searchResults.items[] | {
|
echo "$data" | jq '.searchResults.items[] | {
|
||||||
title: .title,
|
title: .title,
|
||||||
@ -61,7 +65,8 @@ get_simple_search_json() {
|
|||||||
get_search_urls() {
|
get_search_urls() {
|
||||||
local page=$1
|
local page=$1
|
||||||
|
|
||||||
local data=$(curl "https://www.bbcgoodfood.com/api/search-frontend/search?limit=${recipies_per_search}&page=${page}&${search_args}" -s)
|
local data
|
||||||
|
data=$(curl "https://www.bbcgoodfood.com/api/search-frontend/search?limit=${recipies_per_search}&page=${page}&${search_args}" -s)
|
||||||
|
|
||||||
echo "$data" | jq -r '.searchResults.items[] | "https://www.bbcgoodfood.com\(.url)"'
|
echo "$data" | jq -r '.searchResults.items[] | "https://www.bbcgoodfood.com\(.url)"'
|
||||||
}
|
}
|
||||||
@ -78,7 +83,7 @@ echo "Scraping $total_search_pages search pages"
|
|||||||
echo "Each has a max of $recipies_per_search recipes"
|
echo "Each has a max of $recipies_per_search recipes"
|
||||||
|
|
||||||
# For each of the search pages...
|
# For each of the search pages...
|
||||||
for page in $(seq $total_search_pages); do
|
for page in $(seq "$total_search_pages"); do
|
||||||
# for page in {1..2}; do # For testing only do a few pages
|
# for page in {1..2}; do # For testing only do a few pages
|
||||||
echo "Starting search page $page..."
|
echo "Starting search page $page..."
|
||||||
|
|
||||||
@ -86,21 +91,21 @@ for page in $(seq $total_search_pages); do
|
|||||||
declare -A main_ingredients
|
declare -A main_ingredients
|
||||||
|
|
||||||
# Get the search urls and grab their main ingredients
|
# Get the search urls and grab their main ingredients
|
||||||
urls=$(get_search_urls $page)
|
urls=$(get_search_urls "$page")
|
||||||
declare -i count=0
|
declare -i count=0
|
||||||
for url in $(get_search_urls $page); do
|
for url in $urls; do
|
||||||
echo "Recipe $count done"
|
echo "Recipe $count done"
|
||||||
ingredients=$(get_main_ingredients $url)
|
ingredients=$(get_main_ingredients "$url")
|
||||||
main_ingredients[$count]=$(echo "$ingredients" | awk '$1=$1' ORS=' - ' | sed 's/ - $//')
|
main_ingredients[$count]=$(echo "$ingredients" | awk '$1=$1' ORS=' - ' | sed 's/ - $//')
|
||||||
count+=1
|
count+=1
|
||||||
sleep $throttling
|
sleep "$throttling"
|
||||||
done
|
done
|
||||||
|
|
||||||
# Now process each simple_json from the search page adding in the ingredients
|
# Now process each simple_json from the search page adding in the ingredients
|
||||||
# and then adding it to the final json array
|
# and then adding it to the final json array
|
||||||
count=0
|
count=0
|
||||||
IFS=$'\n'
|
IFS=$'\n'
|
||||||
for result in $(get_simple_search_json $page); do
|
for result in $(get_simple_search_json "$page"); do
|
||||||
ingredients=${main_ingredients[$count]}
|
ingredients=${main_ingredients[$count]}
|
||||||
json=$(echo "$result" | jq ". + {\"ingredients\": \"$ingredients\"}")
|
json=$(echo "$result" | jq ". + {\"ingredients\": \"$ingredients\"}")
|
||||||
json_results=$(echo "$json_results" | jq ". + [$json]")
|
json_results=$(echo "$json_results" | jq ". + [$json]")
|
||||||
@ -111,14 +116,14 @@ for page in $(seq $total_search_pages); do
|
|||||||
done
|
done
|
||||||
|
|
||||||
# Print that final json array
|
# Print that final json array
|
||||||
echo $json_results | jq
|
echo "$json_results" | jq
|
||||||
|
|
||||||
# Save json file
|
# Save json file
|
||||||
echo $json_results | jq > goodFoodData.json
|
echo "$json_results" | jq > goodFoodData.json
|
||||||
|
|
||||||
# Save CSV
|
# Save CSV
|
||||||
echo "Title,Url,Rating,Time,Skill,Ingredients" > goodFoodData.csv
|
echo "Title,Url,Rating,Time,Skill,Ingredients" > goodFoodData.csv
|
||||||
echo $json_results | jq -r '.[] | [
|
echo "$json_results" | jq -r '.[] | [
|
||||||
.title,
|
.title,
|
||||||
.url,
|
.url,
|
||||||
.rating,
|
.rating,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user