|
|
@@ -35,7 +35,7 @@ class ParserController @Inject()(
|
|
|
) { (parser) =>
|
|
|
val doc = _browser.get(url)
|
|
|
val title = doc >> parser.titleExtractor
|
|
|
- val servings = doc >> parser.servingExtractor
|
|
|
+ val servings = (doc >?> parser.servingExtractor).flatten
|
|
|
val prepTime = parser.prepTimeExtractor.flatMap(doc >?> _)
|
|
|
val cookTime = parser.cookTimeExtractor.flatMap(doc >?> _)
|
|
|
val ingredients = doc >> parser.ingredientExtractor
|
|
|
@@ -50,7 +50,7 @@ class ParserController @Inject()(
|
|
|
1.0f,
|
|
|
UnitType.NUMBER,
|
|
|
ingredients.toSeq,
|
|
|
- /* tflucke@[2023-10-26]: Do not pss along the instructions since this
|
|
|
+ /* tflucke@[2023-10-26]: Do not pass along the instructions since this
|
|
|
* could be a violation of the Recipe Author's copyright. */
|
|
|
Nil, //instructions.toSeq,
|
|
|
None,
|
|
|
@@ -71,41 +71,68 @@ class ParserController @Inject()(
|
|
|
("recipetineats.com" -> Parser.recipeTinEats),
|
|
|
("mamalovestocook.com" -> Parser.recipeTinEats),
|
|
|
("soulfullymade.com" -> Parser.recipeTinEats),
|
|
|
+ ("familycookierecipes.com" -> Parser.recipeTinEats),
|
|
|
+ ("familyfreshmeals.com" -> Parser.recipeTinEats),
|
|
|
+ ("handmadefarmhouse.com" -> Parser.recipeTinEats),
|
|
|
("sallysbakingaddiction.com" -> Parser.tastyRecipes),
|
|
|
("darngoodveggies.com" -> Parser.tastyRecipes),
|
|
|
+ ("pickledplum.com" -> Parser.tastyRecipes),
|
|
|
("seriouseats.com" -> Parser.seriousEats),
|
|
|
("greatist.com" -> Parser.greatist),
|
|
|
- ("dimitrasdishes.com" -> Parser.dimitrasDishes)
|
|
|
+ ("dimitrasdishes.com" -> Parser.dimitrasDishes),
|
|
|
+ ("jif.com" -> Parser.jif),
|
|
|
+ ("kingarthurbaking.com" -> Parser.kingArthurBaking)
|
|
|
).get(hostNoWWW)
|
|
|
}
|
|
|
|
|
|
private def _guessFoodFromStr(
|
|
|
foodLine: String
|
|
|
): Future[Ingredient.IngredientId] = {
|
|
|
- import gov.usda.nal.fdc.models.DataType._
|
|
|
val foodLineFiltered = foodLine
|
|
|
.filter(_ <= 0x7f)
|
|
|
+ .filterNot(_ == '!')
|
|
|
.filterNot(_ == ':')
|
|
|
.filterNot(_ == '/')
|
|
|
- usdaController.fdc.getFoodsSearch(foodLineFiltered, Seq(
|
|
|
- // Branded,
|
|
|
- Foundation, SRLegacy
|
|
|
- ), pageSize = Some(10))().flatMap({ (fdcResult) =>
|
|
|
- Future.sequence(
|
|
|
- fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId))
|
|
|
- ).map(_.flatten
|
|
|
- .headOption
|
|
|
- .fold[Ingredient.IngredientId](
|
|
|
- Ingredient.USDAId(fdcResult.foods.head.fdcId)
|
|
|
- )((foodNode) => Ingredient.FoodNodeId(foodNode._id))
|
|
|
- ).transform({
|
|
|
- case Success(x) => Success(x)
|
|
|
- case Failure(x) => println(s"Food lookup failed: $x");Failure(x)
|
|
|
- })
|
|
|
+ searchFdcIndex(foodLineFiltered).transformWith {
|
|
|
+ case Success(Some(ingredientId)) => Future.successful(ingredientId)
|
|
|
+ case Success(None) => searchSelfIndex(foodLineFiltered)
|
|
|
+ case Failure(e) => Future.failed(e)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ def searchFdcIndex(foodLine: String): Future[Option[Ingredient.IngredientId]] = {
|
|
|
+ import gov.usda.nal.fdc.models.DataType._
|
|
|
+ import gov.usda.nal.fdc.models.SearchResult
|
|
|
+ usdaController.fdc.getFoodsSearch(foodLine, Seq(
|
|
|
+ // Branded,
|
|
|
+ Foundation, SRLegacy
|
|
|
+ ), pageSize = Some(10))().flatMap({
|
|
|
+ case SearchResult(_, _, _, _, Nil) => Future.successful(None)
|
|
|
+ case SearchResult(_, _, _, _, foods) =>
|
|
|
+ Future.sequence(
|
|
|
+ foods.map((food) => foodController.getByFdcId(food.fdcId))
|
|
|
+ ).map(_.flatten
|
|
|
+ .headOption
|
|
|
+ .fold[Ingredient.IngredientId](
|
|
|
+ Ingredient.USDAId(foods.head.fdcId)
|
|
|
+ )((foodNode) => Ingredient.FoodNodeId(foodNode._id))
|
|
|
+ ).map(Some(_))
|
|
|
})
|
|
|
}
|
|
|
+
|
|
|
+ def searchSelfIndex(foodLine: String): Future[Ingredient.IngredientId] = {
|
|
|
+ foodController.findByName(foodLine)
|
|
|
+ .transform {
|
|
|
+ case Success(Nil) =>
|
|
|
+ Failure(new NoSuchElementException(foodLine))
|
|
|
+ case Success(foodNode::rest) =>
|
|
|
+ Success(Ingredient.FoodNodeId(foodNode._id))
|
|
|
+ case Failure(e) => Failure(e)
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
+
|
|
|
case class Parser(
|
|
|
titleExtractor: HtmlExtractor[Element, String],
|
|
|
servingExtractor: HtmlExtractor[Element, Option[Float]],
|
|
|
@@ -182,14 +209,14 @@ object Parser {
|
|
|
elementList("div.tasty-recipes-ingredients-body > ul > li").map(
|
|
|
_.map({(listItem) => (
|
|
|
((listItem >?> elementList("span"))
|
|
|
- .map(_.last)
|
|
|
+ .flatMap(_.lastOption)
|
|
|
.fold(0.0f)((elm: Element) =>
|
|
|
(elm >?> attr("data-amount"))
|
|
|
.fold(0.0f)(_.toFloat)
|
|
|
)
|
|
|
),
|
|
|
(listItem >?> elementList("span"))
|
|
|
- .map(_.last)
|
|
|
+ .flatMap(_.lastOption)
|
|
|
.fold[MeasureUnit](Gram)((elm: Element) =>
|
|
|
(elm >?> attr("data-unit"))
|
|
|
.flatMap(MeasureUnit.guessUnit _)
|
|
|
@@ -296,6 +323,84 @@ object Parser {
|
|
|
texts("div.mv-create-instructions > ol > li")
|
|
|
)
|
|
|
|
|
|
+ val jif = Parser(
|
|
|
+ text("h1.recipe-name"),
|
|
|
+ elementList("div.recipe-breakdown-step").map(
|
|
|
+ _.filter((listItem) => (listItem >?> text("i.servings")).isDefined)
|
|
|
+ .map(_ >> text("span.recipe-breakdown-detail"))
|
|
|
+ .head
|
|
|
+ ).map(_.toFloatOption),
|
|
|
+ Some(elementList("div.recipe-breakdown-step").map(
|
|
|
+ _.filter((listItem) => (listItem >?> text("i.prep")).isDefined)
|
|
|
+ .map(_ >> text("span.recipe-breakdown-detail"))
|
|
|
+ .head
|
|
|
+ )),
|
|
|
+ Some(elementList("div.recipe-breakdown-step").map(
|
|
|
+ _.filter((listItem) => (listItem >?> text("i.cook")).isDefined)
|
|
|
+ .map(_ >> text("span.recipe-breakdown-detail"))
|
|
|
+ .head
|
|
|
+ )),
|
|
|
+ texts("div.recipe-ingredients > ul > li")
|
|
|
+ .map(_.map(_
|
|
|
+ .replaceAll("\u00BD", "1/2")
|
|
|
+ .replaceAll("\u00BC", "1/4")
|
|
|
+ .replaceAll("\u00BE", "3/4")
|
|
|
+ .replaceAll("\u2150", "1/7")
|
|
|
+ .replaceAll("\u2151", "1/9")
|
|
|
+ .replaceAll("\u2152", "1/10")
|
|
|
+ .replaceAll("\u2153", "1/3")
|
|
|
+ .replaceAll("\u2154", "2/3")
|
|
|
+ .replaceAll("\u2155", "1/5")
|
|
|
+ .replaceAll("\u2156", "2/5")
|
|
|
+ .replaceAll("\u2157", "3/5")
|
|
|
+ .replaceAll("\u2158", "4/5")
|
|
|
+ .replaceAll("\u2159", "1/6")
|
|
|
+ .replaceAll("\u215A", "5/6")
|
|
|
+ .replaceAll("\u215B", "1/8")
|
|
|
+ .replaceAll("\u215C", "3/8")
|
|
|
+ .replaceAll("\u215D", "5/8")
|
|
|
+ .replaceAll("\u215E", "7/8")
|
|
|
+ .replaceAll("\u215F", "1/")
|
|
|
+ .replaceAll("\u00F1", "n")
|
|
|
+ .trim
|
|
|
+ ))
|
|
|
+ .map(_.map(_parseIngredient _)),
|
|
|
+ texts("div.recipe-directions > ul > li > p")
|
|
|
+ )
|
|
|
+
|
|
|
+ val kingArthurBaking = Parser(
|
|
|
+ text("h1 > span"),
|
|
|
+ text("div.stat__item--yield > span").map(_.toFloatOption),
|
|
|
+ Some(text("div.stat__item--prep > span")),
|
|
|
+ Some(text("div.stat__item--bake > span")),
|
|
|
+ texts("div.ingredient-section > ul > li")
|
|
|
+ .map(_.map(_
|
|
|
+ .replaceAll("\u00BD", "1/2")
|
|
|
+ .replaceAll("\u00BC", "1/4")
|
|
|
+ .replaceAll("\u00BE", "3/4")
|
|
|
+ .replaceAll("\u2150", "1/7")
|
|
|
+ .replaceAll("\u2151", "1/9")
|
|
|
+ .replaceAll("\u2152", "1/10")
|
|
|
+ .replaceAll("\u2153", "1/3")
|
|
|
+ .replaceAll("\u2154", "2/3")
|
|
|
+ .replaceAll("\u2155", "1/5")
|
|
|
+ .replaceAll("\u2156", "2/5")
|
|
|
+ .replaceAll("\u2157", "3/5")
|
|
|
+ .replaceAll("\u2158", "4/5")
|
|
|
+ .replaceAll("\u2159", "1/6")
|
|
|
+ .replaceAll("\u215A", "5/6")
|
|
|
+ .replaceAll("\u215B", "1/8")
|
|
|
+ .replaceAll("\u215C", "3/8")
|
|
|
+ .replaceAll("\u215D", "5/8")
|
|
|
+ .replaceAll("\u215E", "7/8")
|
|
|
+ .replaceAll("\u215F", "1/")
|
|
|
+ .replaceAll("\u00F1", "n")
|
|
|
+ .trim
|
|
|
+ ))
|
|
|
+ .map(_.map(_parseIngredient _)),
|
|
|
+ texts("div.field field--recipe-steps > ol > li > p")
|
|
|
+ )
|
|
|
+
|
|
|
private def _parseFraction(fractionLine: String) = {
|
|
|
val fractionPattern = raw"(\d+)/(\d+)[\d-_]*".r
|
|
|
val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)[\d-_]*".r
|