Recipe scraper fixes
This commit is contained in:
@@ -33,9 +33,9 @@
|
||||
A70D7CA12AC73CA800D53DBF /* RecipeEditView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A70D7CA02AC73CA700D53DBF /* RecipeEditView.swift */; };
|
||||
A70D7CA32AC74B3B00D53DBF /* DateExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = A70D7CA22AC74B3B00D53DBF /* DateExtension.swift */; };
|
||||
A74D33BE2AF82AAE00D06555 /* SwiftSoup in Frameworks */ = {isa = PBXBuildFile; productRef = A74D33BD2AF82AAE00D06555 /* SwiftSoup */; };
|
||||
A74D33C32AFCD1C300D06555 /* RecipeScraper.swift in Sources */ = {isa = PBXBuildFile; fileRef = A74D33C22AFCD1C300D06555 /* RecipeScraper.swift */; };
|
||||
A76B8A6F2ADFFA8800096CEC /* SupportedLanguage.swift in Sources */ = {isa = PBXBuildFile; fileRef = A76B8A6E2ADFFA8800096CEC /* SupportedLanguage.swift */; };
|
||||
A76B8A712AE002AE00096CEC /* AlertHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = A76B8A702AE002AE00096CEC /* AlertHandler.swift */; };
|
||||
A781E7612AF822D000452F6F /* RecipeScraper.swift in Sources */ = {isa = PBXBuildFile; fileRef = A781E7602AF822CF00452F6F /* RecipeScraper.swift */; };
|
||||
A7AEAE642AD5521400135378 /* Localizable.xcstrings in Resources */ = {isa = PBXBuildFile; fileRef = A7AEAE632AD5521400135378 /* Localizable.xcstrings */; };
|
||||
A7F3F8E82ACBFC760076C227 /* KeywordPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A7F3F8E72ACBFC760076C227 /* KeywordPickerView.swift */; };
|
||||
A7F3F8EA2ACC221C0076C227 /* CategoryPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A7F3F8E92ACC221C0076C227 /* CategoryPickerView.swift */; };
|
||||
@@ -88,10 +88,10 @@
|
||||
A703226E2ABB1DD700D7C4ED /* ColorExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ColorExtension.swift; sourceTree = "<group>"; };
|
||||
A70D7CA02AC73CA700D53DBF /* RecipeEditView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecipeEditView.swift; sourceTree = "<group>"; };
|
||||
A70D7CA22AC74B3B00D53DBF /* DateExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DateExtension.swift; sourceTree = "<group>"; };
|
||||
A74D33BF2AF82CB500D06555 /* Scraper.playground */ = {isa = PBXFileReference; lastKnownFileType = file.playground; path = Scraper.playground; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.swift; };
|
||||
A74D33BF2AF82CB500D06555 /* TestScraper.playground */ = {isa = PBXFileReference; lastKnownFileType = file.playground; path = TestScraper.playground; sourceTree = "<group>"; xcLanguageSpecificationIdentifier = xcode.lang.swift; };
|
||||
A74D33C22AFCD1C300D06555 /* RecipeScraper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecipeScraper.swift; sourceTree = "<group>"; };
|
||||
A76B8A6E2ADFFA8800096CEC /* SupportedLanguage.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SupportedLanguage.swift; sourceTree = "<group>"; };
|
||||
A76B8A702AE002AE00096CEC /* AlertHandler.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AlertHandler.swift; sourceTree = "<group>"; };
|
||||
A781E7602AF822CF00452F6F /* RecipeScraper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecipeScraper.swift; sourceTree = "<group>"; };
|
||||
A7AEAE632AD5521400135378 /* Localizable.xcstrings */ = {isa = PBXFileReference; lastKnownFileType = text.json.xcstrings; path = Localizable.xcstrings; sourceTree = "<group>"; };
|
||||
A7F3F8E72ACBFC760076C227 /* KeywordPickerView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeywordPickerView.swift; sourceTree = "<group>"; };
|
||||
A7F3F8E92ACC221C0076C227 /* CategoryPickerView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CategoryPickerView.swift; sourceTree = "<group>"; };
|
||||
@@ -255,8 +255,8 @@
|
||||
A781E75F2AF8228100452F6F /* RecipeImport */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
A781E7602AF822CF00452F6F /* RecipeScraper.swift */,
|
||||
A74D33BF2AF82CB500D06555 /* Scraper.playground */,
|
||||
A74D33BF2AF82CB500D06555 /* TestScraper.playground */,
|
||||
A74D33C22AFCD1C300D06555 /* RecipeScraper.swift */,
|
||||
);
|
||||
path = RecipeImport;
|
||||
sourceTree = "<group>";
|
||||
@@ -421,7 +421,7 @@
|
||||
A703226A2ABAF49800D7C4ED /* JSONCoderExtension.swift in Sources */,
|
||||
A703226D2ABAF90D00D7C4ED /* APIController.swift in Sources */,
|
||||
A70171822AA8E71900064C43 /* Nextcloud_Cookbook_iOS_ClientApp.swift in Sources */,
|
||||
A781E7612AF822D000452F6F /* RecipeScraper.swift in Sources */,
|
||||
A74D33C32AFCD1C300D06555 /* RecipeScraper.swift in Sources */,
|
||||
A70171AD2AA8EF4700064C43 /* MainViewModel.swift in Sources */,
|
||||
A76B8A6F2ADFFA8800096CEC /* SupportedLanguage.swift in Sources */,
|
||||
A70171C92AB4CBB400064C43 /* OnboardingView.swift in Sources */,
|
||||
|
||||
@@ -51,6 +51,27 @@
|
||||
<key>orderHint</key>
|
||||
<integer>4</integer>
|
||||
</dict>
|
||||
<key>TestScraper (Playground) 1.xcscheme</key>
|
||||
<dict>
|
||||
<key>isShown</key>
|
||||
<false/>
|
||||
<key>orderHint</key>
|
||||
<integer>8</integer>
|
||||
</dict>
|
||||
<key>TestScraper (Playground) 2.xcscheme</key>
|
||||
<dict>
|
||||
<key>isShown</key>
|
||||
<false/>
|
||||
<key>orderHint</key>
|
||||
<integer>9</integer>
|
||||
</dict>
|
||||
<key>TestScraper (Playground).xcscheme</key>
|
||||
<dict>
|
||||
<key>isShown</key>
|
||||
<false/>
|
||||
<key>orderHint</key>
|
||||
<integer>7</integer>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
||||
|
||||
@@ -2,14 +2,14 @@
|
||||
// RecipeScraper.swift
|
||||
// Nextcloud Cookbook iOS Client
|
||||
//
|
||||
// Created by Vincent Meilinger on 05.11.23.
|
||||
// Created by Vincent Meilinger on 09.11.23.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
import SwiftSoup
|
||||
|
||||
class RecipeScraper {
|
||||
func scrape(url: String) -> RecipeDetail? {
|
||||
func scrape(url: String) throws -> RecipeDetail? {
|
||||
var contents: String? = nil
|
||||
if let url = URL(string: url) {
|
||||
do {
|
||||
@@ -26,42 +26,87 @@ class RecipeScraper {
|
||||
print("ERROR: no contents")
|
||||
exit(1)
|
||||
}
|
||||
let doc = try SwiftSoup.parse(html)
|
||||
|
||||
let doc: Document = try SwiftSoup.parse(html)
|
||||
let elements: Elements = try doc.select("script")
|
||||
for elem in elements.array() {
|
||||
for attr in elem.getAttributes()!.asList() {
|
||||
if attr.getValue() == "application/ld+json" {
|
||||
toDict(elem)
|
||||
guard let dict = toDict(elem) else { continue }
|
||||
return getRecipe(fromDict: dict)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
private func toDict(_ elem: Element) -> [String: Any] {
|
||||
private func toDict(_ elem: Element) -> [String: Any]? {
|
||||
var recipeDict: [String: Any]? = nil
|
||||
do {
|
||||
let jsonString = try elem.html()
|
||||
//print(json)
|
||||
let json = try JSONSerialization.jsonObject(with: jsonString.data(using: .utf8)!, options: .fragmentsAllowed)
|
||||
if let recipe = json as? [String : Any] {
|
||||
return recipe
|
||||
recipeDict = recipe
|
||||
} else if let recipe = (json as! [Any])[0] as? [String : Any] {
|
||||
return recipe
|
||||
recipeDict = recipe
|
||||
}
|
||||
} catch {
|
||||
print("COULD NOT DECODE")
|
||||
print("Unable to decode json")
|
||||
return nil
|
||||
}
|
||||
|
||||
guard let recipeDict = recipeDict else {
|
||||
print("Json is not a dict")
|
||||
return nil
|
||||
}
|
||||
|
||||
if recipeDict["@type"] as? String ?? "" == "Recipe" {
|
||||
return recipeDict
|
||||
} else if (recipeDict["@type"] as? [String] ?? []).contains("Recipe") {
|
||||
return recipeDict
|
||||
} else {
|
||||
print("Json dict is not a recipe ...")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
private func getRecipe(fromDict recipe: Dictionary<String, Any>) {
|
||||
if recipe["@type"] as? String ?? "" == "Recipe" {
|
||||
print(recipe["name"] ?? "No name")
|
||||
print(recipe["recipeIngredient"] ?? "No ingredients")
|
||||
print(recipe["recipeInstruction"] ?? "No instruction")
|
||||
} else if (recipe["@type"] as? [String] ?? []).contains("Recipe") {
|
||||
print(recipe["name"] ?? "No name")
|
||||
}
|
||||
private func getRecipe(fromDict recipe: Dictionary<String, Any>) -> RecipeDetail? {
|
||||
|
||||
var recipeDetail = RecipeDetail()
|
||||
recipeDetail.name = recipe["name"] as? String ?? "New Recipe"
|
||||
recipeDetail.recipeCategory = recipe["recipeCategory"] as? String ?? ""
|
||||
recipeDetail.keywords = recipe["keywords"] as? String ?? ""
|
||||
recipeDetail.description = recipe["description"] as? String ?? ""
|
||||
recipeDetail.dateCreated = recipe["dateCreated"] as? String ?? ""
|
||||
recipeDetail.dateModified = recipe["dateModified"] as? String ?? ""
|
||||
recipeDetail.imageUrl = recipe["imageUrl"] as? String ?? ""
|
||||
recipeDetail.url = recipe["url"] as? String ?? ""
|
||||
recipeDetail.cookTime = recipe["cookTime"] as? String ?? ""
|
||||
recipeDetail.prepTime = recipe["prepTime"] as? String ?? ""
|
||||
recipeDetail.totalTime = recipe["totalTime"] as? String ?? ""
|
||||
recipeDetail.recipeInstructions = stringArrayForKey("recipeInstructions", dict: recipe)
|
||||
recipeDetail.recipeYield = recipe["recipeYield"] as? Int ?? 0
|
||||
recipeDetail.recipeIngredient = recipe["recipeIngredient"] as? [String] ?? []
|
||||
recipeDetail.tool = recipe["tool"] as? [String] ?? []
|
||||
recipeDetail.nutrition = recipe["nutrition"] as? [String:String] ?? [:]
|
||||
|
||||
return recipeDetail
|
||||
}
|
||||
|
||||
private func stringArrayForKey(_ key: String, dict: Dictionary<String, Any>) -> [String] {
|
||||
if let value = dict[key] as? [String] {
|
||||
return value
|
||||
} else if let orderedList = dict[key] as? [Any] {
|
||||
var entries: [String] = []
|
||||
for dict in orderedList {
|
||||
guard let dict = dict as? [String: Any] else { continue }
|
||||
guard let text = dict["text"] as? String else { continue }
|
||||
entries.append(text)
|
||||
}
|
||||
return entries
|
||||
}
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,59 +5,5 @@ import Foundation
|
||||
|
||||
//let url = "https://www.chefkoch.de/rezepte/1385981243676608/Knusprige-Entenbrust.html"
|
||||
let url = "https://www.allrecipes.com/recipe/234620/mascarpone-mashed-potatoes/"
|
||||
var contents: String? = nil
|
||||
if let url = URL(string: url) {
|
||||
do {
|
||||
contents = try String(contentsOf: url)
|
||||
//print(contents)
|
||||
} catch {
|
||||
print("ERROR: Could not load url content.")
|
||||
}
|
||||
|
||||
} else {
|
||||
print("ERROR: Bad url.")
|
||||
}
|
||||
|
||||
guard let html = contents else {
|
||||
print("ERROR: no contents")
|
||||
exit(1)
|
||||
}
|
||||
|
||||
let doc: Document = try SwiftSoup.parse(html)
|
||||
let elements: Elements = try doc.select("script")
|
||||
for elem in elements.array() {
|
||||
for attr in elem.getAttributes()!.asList() {
|
||||
//print(attr.getValue())
|
||||
if attr.getValue() == "application/ld+json" {
|
||||
|
||||
do {
|
||||
let jsonString = try elem.html()
|
||||
//print(json)
|
||||
let json = try JSONSerialization.jsonObject(with: jsonString.data(using: .utf8)!, options: .fragmentsAllowed)
|
||||
if let recipe = json as? [String : Any] {
|
||||
print("1")
|
||||
getRecipe(fromDict: recipe)
|
||||
} else if let recipe = (json as! [Any])[0] as? [String : Any] {
|
||||
print("2")
|
||||
getRecipe(fromDict: recipe)
|
||||
}
|
||||
|
||||
|
||||
} catch {
|
||||
print("COULD NOT DECODE")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func getRecipe(fromDict recipe: Dictionary<String, Any>) {
|
||||
|
||||
if recipe["@type"] as? String ?? "" == "Recipe" {
|
||||
print(recipe["name"] ?? "No name")
|
||||
print(recipe["recipeIngredient"] ?? "No ingredients")
|
||||
print(recipe["recipeInstruction"] ?? "No instruction")
|
||||
} else if (recipe["@type"] as? [String] ?? []).contains("Recipe") {
|
||||
print(recipe["name"] ?? "No name")
|
||||
}
|
||||
}
|
||||
let scraper = RecipeScaper()
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
import SwiftSoup
|
||||
import Foundation
|
||||
|
||||
class RecipeScraper {
|
||||
func scrape(url: String) throws -> RecipeDetail? {
|
||||
var contents: String? = nil
|
||||
if let url = URL(string: url) {
|
||||
do {
|
||||
contents = try String(contentsOf: url)
|
||||
} catch {
|
||||
print("ERROR: Could not load url content.")
|
||||
}
|
||||
|
||||
} else {
|
||||
print("ERROR: Bad url.")
|
||||
}
|
||||
|
||||
guard let html = contents else {
|
||||
print("ERROR: no contents")
|
||||
exit(1)
|
||||
}
|
||||
let doc = try SwiftSoup.parse(html)
|
||||
|
||||
let elements: Elements = try doc.select("script")
|
||||
for elem in elements.array() {
|
||||
for attr in elem.getAttributes()!.asList() {
|
||||
if attr.getValue() == "application/ld+json" {
|
||||
guard let dict = toDict(elem) else { continue }
|
||||
return getRecipe(fromDict: dict)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
private func toDict(_ elem: Element) -> [String: Any]? {
|
||||
var recipeDict: [String: Any]? = nil
|
||||
do {
|
||||
let jsonString = try elem.html()
|
||||
//print(json)
|
||||
let json = try JSONSerialization.jsonObject(with: jsonString.data(using: .utf8)!, options: .fragmentsAllowed)
|
||||
if let recipe = json as? [String : Any] {
|
||||
recipeDict = recipe
|
||||
} else if let recipe = (json as! [Any])[0] as? [String : Any] {
|
||||
recipeDict = recipe
|
||||
}
|
||||
} catch {
|
||||
print("Unable to decode json")
|
||||
return nil
|
||||
}
|
||||
|
||||
guard let recipeDict = recipeDict else {
|
||||
print("Json is not a dict")
|
||||
return nil
|
||||
}
|
||||
|
||||
if recipeDict["@type"] as? String ?? "" == "Recipe" {
|
||||
return recipeDict
|
||||
} else if (recipeDict["@type"] as? [String] ?? []).contains("Recipe") {
|
||||
return recipeDict
|
||||
} else {
|
||||
print("Json dict is not a recipe ...")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
private func getRecipe(fromDict recipe: Dictionary<String, Any>) -> RecipeDetail? {
|
||||
|
||||
var recipeDetail = RecipeDetail()
|
||||
recipeDetail.name = recipe["name"] as? String ?? "New Recipe"
|
||||
recipeDetail.recipeCategory = recipe["recipeCategory"] as? String ?? ""
|
||||
recipeDetail.keywords = recipe["keywords"] as? String ?? ""
|
||||
recipeDetail.description = recipe["description"] as? String ?? ""
|
||||
recipeDetail.dateCreated = recipe["dateCreated"] as? String ?? ""
|
||||
recipeDetail.dateModified = recipe["dateModified"] as? String ?? ""
|
||||
recipeDetail.imageUrl = recipe["imageUrl"] as? String ?? ""
|
||||
recipeDetail.url = recipe["url"] as? String ?? ""
|
||||
recipeDetail.cookTime = recipe["cookTime"] as? String ?? ""
|
||||
recipeDetail.prepTime = recipe["prepTime"] as? String ?? ""
|
||||
recipeDetail.totalTime = recipe["totalTime"] as? String ?? ""
|
||||
recipeDetail.recipeInstructions = stringArrayForKey("recipeInstructions", dict: recipe)
|
||||
recipeDetail.recipeYield = recipe["recipeYield"] as? Int ?? 0
|
||||
recipeDetail.recipeIngredient = recipe["recipeIngredient"] as? [String] ?? []
|
||||
recipeDetail.tool = recipe["tool"] as? [String] ?? []
|
||||
recipeDetail.nutrition = recipe["nutrition"] as? [String:String] ?? [:]
|
||||
|
||||
return recipeDetail
|
||||
}
|
||||
|
||||
private func stringArrayForKey(_ key: String, dict: Dictionary<String, Any>) -> [String] {
|
||||
if let value = dict[key] as? [String] {
|
||||
return value
|
||||
} else if let orderedList = dict[key] as? [Any] {
|
||||
var entries: [String] = []
|
||||
for dict in orderedList {
|
||||
guard let dict = dict as? [String: Any] else { continue }
|
||||
guard let text = dict["text"] as? String else { continue }
|
||||
entries.append(text)
|
||||
}
|
||||
return entries
|
||||
}
|
||||
return []
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//let url = "https://www.chefkoch.de/rezepte/1385981243676608/Knusprige-Entenbrust.html"
|
||||
let url = "https://www.allrecipes.com/recipe/234620/mascarpone-mashed-potatoes/"
|
||||
|
||||
let scraper = RecipeScraper()
|
||||
|
||||
do {
|
||||
let recipe = try scraper.scrape(url: url)
|
||||
print(recipe)
|
||||
} catch {
|
||||
print("No recipe on this website found.")
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<playground version='5.0' target-platform='ios' buildActiveScheme='true' importAppTypes='true'>
|
||||
<timeline fileName='timeline.xctimeline'/>
|
||||
</playground>
|
||||
Reference in New Issue
Block a user