Add basic single URL scraping
This commit is contained in:
commit
2cd5440660
30
scrape-all.sh
Normal file
30
scrape-all.sh
Normal file
@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
|
||||
update_cache() {
|
||||
cache=$(curl "$1" -s)
|
||||
}
|
||||
|
||||
get_main_ingredients() {
|
||||
if [[ -z "$1" ]]; then
|
||||
local data=$cache
|
||||
else
|
||||
local data=$1
|
||||
fi
|
||||
|
||||
echo "$data" | htmlq -p | grep '<a class="link link--styled" data-component="Link" href="/glossary/.*">.*</a>' | sed 's/^.*<a class="link link--styled" data-component="Link" href=".*">//' | sed 's/<\/a>.*//'
|
||||
echo "$data" | htmlq -p | grep '<!-- -->.*<!-- -->' | sed 's/<!-- -->/@/g' | awk -F@ '{print $2}'
|
||||
}
|
||||
|
||||
get_json() {
|
||||
if [[ -z "$1" ]]; then
|
||||
local data=$cache
|
||||
else
|
||||
local data=$1
|
||||
fi
|
||||
|
||||
echo "$data" | htmlq -p | grep '<script data-testid="page-schema" type="application/ld+json">' -A1 | tail -n1 | sed 's/<\/script>//' | jq
|
||||
}
|
||||
|
||||
update_cache "https://www.bbcgoodfood.com/recipes/vegan-banana-bread"
|
||||
get_json
|
||||
get_main_ingredients
|
Loading…
x
Reference in New Issue
Block a user