From 03f821c1bac4c95600d88c6923a9145e8e581b4b Mon Sep 17 00:00:00 2001 From: Vicnet <35202538+VincentMeilinger@users.noreply.github.com> Date: Thu, 9 Nov 2023 11:24:27 +0100 Subject: [PATCH] Recipe scraper fixes --- .../project.pbxproj | 12 +- .../xcschemes/xcschememanagement.plist | 21 ++++ .../RecipeImport/RecipeScraper.swift | 79 +++++++++--- .../Scraper.playground/Contents.swift | 56 +-------- .../TestScraper.playground/Contents.swift | 118 ++++++++++++++++++ .../contents.xcplayground | 4 + 6 files changed, 212 insertions(+), 78 deletions(-) create mode 100644 Nextcloud Cookbook iOS Client/RecipeImport/TestScraper.playground/Contents.swift create mode 100644 Nextcloud Cookbook iOS Client/RecipeImport/TestScraper.playground/contents.xcplayground diff --git a/Nextcloud Cookbook iOS Client.xcodeproj/project.pbxproj b/Nextcloud Cookbook iOS Client.xcodeproj/project.pbxproj index 338d783..a6b89a2 100644 --- a/Nextcloud Cookbook iOS Client.xcodeproj/project.pbxproj +++ b/Nextcloud Cookbook iOS Client.xcodeproj/project.pbxproj @@ -33,9 +33,9 @@ A70D7CA12AC73CA800D53DBF /* RecipeEditView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A70D7CA02AC73CA700D53DBF /* RecipeEditView.swift */; }; A70D7CA32AC74B3B00D53DBF /* DateExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = A70D7CA22AC74B3B00D53DBF /* DateExtension.swift */; }; A74D33BE2AF82AAE00D06555 /* SwiftSoup in Frameworks */ = {isa = PBXBuildFile; productRef = A74D33BD2AF82AAE00D06555 /* SwiftSoup */; }; + A74D33C32AFCD1C300D06555 /* RecipeScraper.swift in Sources */ = {isa = PBXBuildFile; fileRef = A74D33C22AFCD1C300D06555 /* RecipeScraper.swift */; }; A76B8A6F2ADFFA8800096CEC /* SupportedLanguage.swift in Sources */ = {isa = PBXBuildFile; fileRef = A76B8A6E2ADFFA8800096CEC /* SupportedLanguage.swift */; }; A76B8A712AE002AE00096CEC /* AlertHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = A76B8A702AE002AE00096CEC /* AlertHandler.swift */; }; - A781E7612AF822D000452F6F /* RecipeScraper.swift in Sources */ = {isa = PBXBuildFile; fileRef = A781E7602AF822CF00452F6F /* RecipeScraper.swift */; }; A7AEAE642AD5521400135378 /* Localizable.xcstrings in Resources */ = {isa = PBXBuildFile; fileRef = A7AEAE632AD5521400135378 /* Localizable.xcstrings */; }; A7F3F8E82ACBFC760076C227 /* KeywordPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A7F3F8E72ACBFC760076C227 /* KeywordPickerView.swift */; }; A7F3F8EA2ACC221C0076C227 /* CategoryPickerView.swift in Sources */ = {isa = PBXBuildFile; fileRef = A7F3F8E92ACC221C0076C227 /* CategoryPickerView.swift */; }; @@ -88,10 +88,10 @@ A703226E2ABB1DD700D7C4ED /* ColorExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ColorExtension.swift; sourceTree = ""; }; A70D7CA02AC73CA700D53DBF /* RecipeEditView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecipeEditView.swift; sourceTree = ""; }; A70D7CA22AC74B3B00D53DBF /* DateExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DateExtension.swift; sourceTree = ""; }; - A74D33BF2AF82CB500D06555 /* Scraper.playground */ = {isa = PBXFileReference; lastKnownFileType = file.playground; path = Scraper.playground; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.swift; }; + A74D33BF2AF82CB500D06555 /* TestScraper.playground */ = {isa = PBXFileReference; lastKnownFileType = file.playground; path = TestScraper.playground; sourceTree = ""; xcLanguageSpecificationIdentifier = xcode.lang.swift; }; + A74D33C22AFCD1C300D06555 /* RecipeScraper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecipeScraper.swift; sourceTree = ""; }; A76B8A6E2ADFFA8800096CEC /* SupportedLanguage.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SupportedLanguage.swift; sourceTree = ""; }; A76B8A702AE002AE00096CEC /* AlertHandler.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AlertHandler.swift; sourceTree = ""; }; - A781E7602AF822CF00452F6F /* RecipeScraper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RecipeScraper.swift; sourceTree = ""; }; A7AEAE632AD5521400135378 /* Localizable.xcstrings */ = {isa = PBXFileReference; lastKnownFileType = text.json.xcstrings; path = Localizable.xcstrings; sourceTree = ""; }; A7F3F8E72ACBFC760076C227 /* KeywordPickerView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KeywordPickerView.swift; sourceTree = ""; }; A7F3F8E92ACC221C0076C227 /* CategoryPickerView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CategoryPickerView.swift; sourceTree = ""; }; @@ -255,8 +255,8 @@ A781E75F2AF8228100452F6F /* RecipeImport */ = { isa = PBXGroup; children = ( - A781E7602AF822CF00452F6F /* RecipeScraper.swift */, - A74D33BF2AF82CB500D06555 /* Scraper.playground */, + A74D33BF2AF82CB500D06555 /* TestScraper.playground */, + A74D33C22AFCD1C300D06555 /* RecipeScraper.swift */, ); path = RecipeImport; sourceTree = ""; @@ -421,7 +421,7 @@ A703226A2ABAF49800D7C4ED /* JSONCoderExtension.swift in Sources */, A703226D2ABAF90D00D7C4ED /* APIController.swift in Sources */, A70171822AA8E71900064C43 /* Nextcloud_Cookbook_iOS_ClientApp.swift in Sources */, - A781E7612AF822D000452F6F /* RecipeScraper.swift in Sources */, + A74D33C32AFCD1C300D06555 /* RecipeScraper.swift in Sources */, A70171AD2AA8EF4700064C43 /* MainViewModel.swift in Sources */, A76B8A6F2ADFFA8800096CEC /* SupportedLanguage.swift in Sources */, A70171C92AB4CBB400064C43 /* OnboardingView.swift in Sources */, diff --git a/Nextcloud Cookbook iOS Client.xcodeproj/xcuserdata/vincentmeilinger.xcuserdatad/xcschemes/xcschememanagement.plist b/Nextcloud Cookbook iOS Client.xcodeproj/xcuserdata/vincentmeilinger.xcuserdatad/xcschemes/xcschememanagement.plist index e2b77cd..3091979 100644 --- a/Nextcloud Cookbook iOS Client.xcodeproj/xcuserdata/vincentmeilinger.xcuserdatad/xcschemes/xcschememanagement.plist +++ b/Nextcloud Cookbook iOS Client.xcodeproj/xcuserdata/vincentmeilinger.xcuserdatad/xcschemes/xcschememanagement.plist @@ -51,6 +51,27 @@ orderHint 4 + TestScraper (Playground) 1.xcscheme + + isShown + + orderHint + 8 + + TestScraper (Playground) 2.xcscheme + + isShown + + orderHint + 9 + + TestScraper (Playground).xcscheme + + isShown + + orderHint + 7 + diff --git a/Nextcloud Cookbook iOS Client/RecipeImport/RecipeScraper.swift b/Nextcloud Cookbook iOS Client/RecipeImport/RecipeScraper.swift index 4955630..f30fcb4 100644 --- a/Nextcloud Cookbook iOS Client/RecipeImport/RecipeScraper.swift +++ b/Nextcloud Cookbook iOS Client/RecipeImport/RecipeScraper.swift @@ -2,14 +2,14 @@ // RecipeScraper.swift // Nextcloud Cookbook iOS Client // -// Created by Vincent Meilinger on 05.11.23. +// Created by Vincent Meilinger on 09.11.23. // import Foundation import SwiftSoup class RecipeScraper { - func scrape(url: String) -> RecipeDetail? { + func scrape(url: String) throws -> RecipeDetail? { var contents: String? = nil if let url = URL(string: url) { do { @@ -26,42 +26,87 @@ class RecipeScraper { print("ERROR: no contents") exit(1) } + let doc = try SwiftSoup.parse(html) - let doc: Document = try SwiftSoup.parse(html) let elements: Elements = try doc.select("script") for elem in elements.array() { for attr in elem.getAttributes()!.asList() { if attr.getValue() == "application/ld+json" { - toDict(elem) + guard let dict = toDict(elem) else { continue } + return getRecipe(fromDict: dict) } } } + return nil } - private func toDict(_ elem: Element) -> [String: Any] { + private func toDict(_ elem: Element) -> [String: Any]? { + var recipeDict: [String: Any]? = nil do { let jsonString = try elem.html() //print(json) let json = try JSONSerialization.jsonObject(with: jsonString.data(using: .utf8)!, options: .fragmentsAllowed) if let recipe = json as? [String : Any] { - return recipe + recipeDict = recipe } else if let recipe = (json as! [Any])[0] as? [String : Any] { - return recipe - } + recipeDict = recipe + } } catch { - print("COULD NOT DECODE") + print("Unable to decode json") + return nil + } + + guard let recipeDict = recipeDict else { + print("Json is not a dict") + return nil + } + + if recipeDict["@type"] as? String ?? "" == "Recipe" { + return recipeDict + } else if (recipeDict["@type"] as? [String] ?? []).contains("Recipe") { + return recipeDict + } else { + print("Json dict is not a recipe ...") + return nil } } - private func getRecipe(fromDict recipe: Dictionary) { - if recipe["@type"] as? String ?? "" == "Recipe" { - print(recipe["name"] ?? "No name") - print(recipe["recipeIngredient"] ?? "No ingredients") - print(recipe["recipeInstruction"] ?? "No instruction") - } else if (recipe["@type"] as? [String] ?? []).contains("Recipe") { - print(recipe["name"] ?? "No name") - } + private func getRecipe(fromDict recipe: Dictionary) -> RecipeDetail? { + + var recipeDetail = RecipeDetail() + recipeDetail.name = recipe["name"] as? String ?? "New Recipe" + recipeDetail.recipeCategory = recipe["recipeCategory"] as? String ?? "" + recipeDetail.keywords = recipe["keywords"] as? String ?? "" + recipeDetail.description = recipe["description"] as? String ?? "" + recipeDetail.dateCreated = recipe["dateCreated"] as? String ?? "" + recipeDetail.dateModified = recipe["dateModified"] as? String ?? "" + recipeDetail.imageUrl = recipe["imageUrl"] as? String ?? "" + recipeDetail.url = recipe["url"] as? String ?? "" + recipeDetail.cookTime = recipe["cookTime"] as? String ?? "" + recipeDetail.prepTime = recipe["prepTime"] as? String ?? "" + recipeDetail.totalTime = recipe["totalTime"] as? String ?? "" + recipeDetail.recipeInstructions = stringArrayForKey("recipeInstructions", dict: recipe) + recipeDetail.recipeYield = recipe["recipeYield"] as? Int ?? 0 + recipeDetail.recipeIngredient = recipe["recipeIngredient"] as? [String] ?? [] + recipeDetail.tool = recipe["tool"] as? [String] ?? [] + recipeDetail.nutrition = recipe["nutrition"] as? [String:String] ?? [:] + + return recipeDetail } + private func stringArrayForKey(_ key: String, dict: Dictionary) -> [String] { + if let value = dict[key] as? [String] { + return value + } else if let orderedList = dict[key] as? [Any] { + var entries: [String] = [] + for dict in orderedList { + guard let dict = dict as? [String: Any] else { continue } + guard let text = dict["text"] as? String else { continue } + entries.append(text) + } + return entries + } + return [] + } } diff --git a/Nextcloud Cookbook iOS Client/RecipeImport/Scraper.playground/Contents.swift b/Nextcloud Cookbook iOS Client/RecipeImport/Scraper.playground/Contents.swift index 2ca041c..e418b58 100644 --- a/Nextcloud Cookbook iOS Client/RecipeImport/Scraper.playground/Contents.swift +++ b/Nextcloud Cookbook iOS Client/RecipeImport/Scraper.playground/Contents.swift @@ -5,59 +5,5 @@ import Foundation //let url = "https://www.chefkoch.de/rezepte/1385981243676608/Knusprige-Entenbrust.html" let url = "https://www.allrecipes.com/recipe/234620/mascarpone-mashed-potatoes/" -var contents: String? = nil -if let url = URL(string: url) { - do { - contents = try String(contentsOf: url) - //print(contents) - } catch { - print("ERROR: Could not load url content.") - } - -} else { - print("ERROR: Bad url.") -} -guard let html = contents else { - print("ERROR: no contents") - exit(1) -} - -let doc: Document = try SwiftSoup.parse(html) -let elements: Elements = try doc.select("script") -for elem in elements.array() { - for attr in elem.getAttributes()!.asList() { - //print(attr.getValue()) - if attr.getValue() == "application/ld+json" { - - do { - let jsonString = try elem.html() - //print(json) - let json = try JSONSerialization.jsonObject(with: jsonString.data(using: .utf8)!, options: .fragmentsAllowed) - if let recipe = json as? [String : Any] { - print("1") - getRecipe(fromDict: recipe) - } else if let recipe = (json as! [Any])[0] as? [String : Any] { - print("2") - getRecipe(fromDict: recipe) - } - - - } catch { - print("COULD NOT DECODE") - } - } - } -} - - -func getRecipe(fromDict recipe: Dictionary) { - - if recipe["@type"] as? String ?? "" == "Recipe" { - print(recipe["name"] ?? "No name") - print(recipe["recipeIngredient"] ?? "No ingredients") - print(recipe["recipeInstruction"] ?? "No instruction") - } else if (recipe["@type"] as? [String] ?? []).contains("Recipe") { - print(recipe["name"] ?? "No name") - } -} +let scraper = RecipeScaper() diff --git a/Nextcloud Cookbook iOS Client/RecipeImport/TestScraper.playground/Contents.swift b/Nextcloud Cookbook iOS Client/RecipeImport/TestScraper.playground/Contents.swift new file mode 100644 index 0000000..2d92032 --- /dev/null +++ b/Nextcloud Cookbook iOS Client/RecipeImport/TestScraper.playground/Contents.swift @@ -0,0 +1,118 @@ +import SwiftSoup +import Foundation + +class RecipeScraper { + func scrape(url: String) throws -> RecipeDetail? { + var contents: String? = nil + if let url = URL(string: url) { + do { + contents = try String(contentsOf: url) + } catch { + print("ERROR: Could not load url content.") + } + + } else { + print("ERROR: Bad url.") + } + + guard let html = contents else { + print("ERROR: no contents") + exit(1) + } + let doc = try SwiftSoup.parse(html) + + let elements: Elements = try doc.select("script") + for elem in elements.array() { + for attr in elem.getAttributes()!.asList() { + if attr.getValue() == "application/ld+json" { + guard let dict = toDict(elem) else { continue } + return getRecipe(fromDict: dict) + } + } + } + return nil + } + + + private func toDict(_ elem: Element) -> [String: Any]? { + var recipeDict: [String: Any]? = nil + do { + let jsonString = try elem.html() + //print(json) + let json = try JSONSerialization.jsonObject(with: jsonString.data(using: .utf8)!, options: .fragmentsAllowed) + if let recipe = json as? [String : Any] { + recipeDict = recipe + } else if let recipe = (json as! [Any])[0] as? [String : Any] { + recipeDict = recipe + } + } catch { + print("Unable to decode json") + return nil + } + + guard let recipeDict = recipeDict else { + print("Json is not a dict") + return nil + } + + if recipeDict["@type"] as? String ?? "" == "Recipe" { + return recipeDict + } else if (recipeDict["@type"] as? [String] ?? []).contains("Recipe") { + return recipeDict + } else { + print("Json dict is not a recipe ...") + return nil + } + } + + private func getRecipe(fromDict recipe: Dictionary) -> RecipeDetail? { + + var recipeDetail = RecipeDetail() + recipeDetail.name = recipe["name"] as? String ?? "New Recipe" + recipeDetail.recipeCategory = recipe["recipeCategory"] as? String ?? "" + recipeDetail.keywords = recipe["keywords"] as? String ?? "" + recipeDetail.description = recipe["description"] as? String ?? "" + recipeDetail.dateCreated = recipe["dateCreated"] as? String ?? "" + recipeDetail.dateModified = recipe["dateModified"] as? String ?? "" + recipeDetail.imageUrl = recipe["imageUrl"] as? String ?? "" + recipeDetail.url = recipe["url"] as? String ?? "" + recipeDetail.cookTime = recipe["cookTime"] as? String ?? "" + recipeDetail.prepTime = recipe["prepTime"] as? String ?? "" + recipeDetail.totalTime = recipe["totalTime"] as? String ?? "" + recipeDetail.recipeInstructions = stringArrayForKey("recipeInstructions", dict: recipe) + recipeDetail.recipeYield = recipe["recipeYield"] as? Int ?? 0 + recipeDetail.recipeIngredient = recipe["recipeIngredient"] as? [String] ?? [] + recipeDetail.tool = recipe["tool"] as? [String] ?? [] + recipeDetail.nutrition = recipe["nutrition"] as? [String:String] ?? [:] + + return recipeDetail + } + + private func stringArrayForKey(_ key: String, dict: Dictionary) -> [String] { + if let value = dict[key] as? [String] { + return value + } else if let orderedList = dict[key] as? [Any] { + var entries: [String] = [] + for dict in orderedList { + guard let dict = dict as? [String: Any] else { continue } + guard let text = dict["text"] as? String else { continue } + entries.append(text) + } + return entries + } + return [] + } +} + + +//let url = "https://www.chefkoch.de/rezepte/1385981243676608/Knusprige-Entenbrust.html" +let url = "https://www.allrecipes.com/recipe/234620/mascarpone-mashed-potatoes/" + +let scraper = RecipeScraper() + +do { + let recipe = try scraper.scrape(url: url) + print(recipe) +} catch { + print("No recipe on this website found.") +} diff --git a/Nextcloud Cookbook iOS Client/RecipeImport/TestScraper.playground/contents.xcplayground b/Nextcloud Cookbook iOS Client/RecipeImport/TestScraper.playground/contents.xcplayground new file mode 100644 index 0000000..cf026f2 --- /dev/null +++ b/Nextcloud Cookbook iOS Client/RecipeImport/TestScraper.playground/contents.xcplayground @@ -0,0 +1,4 @@ + + + + \ No newline at end of file