|
|
@@ -39,7 +39,9 @@ class ParserController @Inject()(
|
|
|
val ingredients = doc >> parser.ingredientExtractor
|
|
|
val instructions = doc >> parser.instructionExtractor
|
|
|
|
|
|
- Future.sequence(ingredients.map(_parseIngredient _))
|
|
|
+ Future.sequence(ingredients.map({
|
|
|
+ case (amt, u, line) => _guessFoodFromStr(line).map(Ingredient(_, amt, u))
|
|
|
+ }))
|
|
|
.map((ingredients) => Ok(Json.toJson(RecipeNodeNoId(
|
|
|
title,
|
|
|
servings.getOrElse(1.0f),
|
|
|
@@ -63,35 +65,17 @@ class ParserController @Inject()(
|
|
|
("epicurious.com" -> Parser.epicurious),
|
|
|
("mccormick.com" -> Parser.mccormick),
|
|
|
("recipetineats.com" -> Parser.recipeTinEats),
|
|
|
- ("mamalovestocook.com" -> Parser.recipeTinEats)
|
|
|
+ ("mamalovestocook.com" -> Parser.recipeTinEats),
|
|
|
+ ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction)
|
|
|
).get(hostNoWWW)
|
|
|
}
|
|
|
|
|
|
- private def _parseIngredient(ingredientLine: String): Future[Ingredient] = {
|
|
|
- val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
- val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
-
|
|
|
- ingredientLine match {
|
|
|
- case numberPattern(amount, unit, rest) =>
|
|
|
- _guessFoodFromStr(rest).map(Ingredient(
|
|
|
- _,
|
|
|
- amount.toFloat,
|
|
|
- MeasureUnit.guessUnit(unit).getOrElse(Count)
|
|
|
- ))
|
|
|
- case fractionPattern(numerator, denominator, unit, rest) =>
|
|
|
- _guessFoodFromStr(rest).map(Ingredient(
|
|
|
- _,
|
|
|
- numerator.toFloat/denominator.toFloat,
|
|
|
- MeasureUnit.guessUnit(unit).getOrElse(Count)
|
|
|
- ))
|
|
|
- case noUnitLine => _guessFoodFromStr(noUnitLine).map(Ingredient(_, 1, Count))
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = {
|
|
|
import gov.usda.nal.fdc.models.DataType._
|
|
|
- usdaController.fdc.getFoodsSearch(foodLine.filterNot(_ == '/'), Seq(
|
|
|
+ usdaController.fdc.getFoodsSearch(foodLine
|
|
|
+ .filter(_ <= 0x7f)
|
|
|
+ .filterNot(_ == ':')
|
|
|
+ .filterNot(_ == '/'), Seq(
|
|
|
Foundation, Survey, SRLegacy
|
|
|
), pageSize = Some(10))().flatMap({ (fdcResult) =>
|
|
|
Future.sequence(
|
|
|
@@ -103,7 +87,7 @@ class ParserController @Inject()(
|
|
|
)((foodNode) => Ingredient.FoodNodeId(foodNode._id))
|
|
|
).transform({
|
|
|
case Success(x) => Success(x)
|
|
|
- case Failure(x) => println(foodLine);Failure(x)
|
|
|
+ case Failure(x) => println(s"Food lookup failed: $x");Failure(x)
|
|
|
})
|
|
|
})
|
|
|
}
|
|
|
@@ -114,7 +98,7 @@ case class Parser(
|
|
|
servingExtractor: HtmlExtractor[Element, Option[Float]],
|
|
|
prepTimeExtractor: Option[HtmlExtractor[Element, String]],
|
|
|
cookTimeExtractor: Option[HtmlExtractor[Element, String]],
|
|
|
- ingredientExtractor: HtmlExtractor[Element, Iterable[String]],
|
|
|
+ ingredientExtractor: HtmlExtractor[Element, Iterable[(Float, MeasureUnit, String)]],
|
|
|
instructionExtractor: HtmlExtractor[Element, Iterable[String]],
|
|
|
)
|
|
|
|
|
|
@@ -125,18 +109,20 @@ object Parser {
|
|
|
text(".main-title .count").map(_.toFloatOption),
|
|
|
Some(text(".prep_time .first_content")),
|
|
|
cookTimeExtractor = Some(text(".ingredients .first_content")),
|
|
|
- ingredientExtractor = texts(".recipe-about-list li"),
|
|
|
+ ingredientExtractor = texts(".recipe-about-list li").map(_.map(_parseIngredient _)),
|
|
|
texts(".instructions-main span.para")
|
|
|
)
|
|
|
+
|
|
|
val epicurious = Parser(
|
|
|
text("h1"),
|
|
|
text("""div[data-testid="IngredientList"] > p""")
|
|
|
.map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
|
|
|
None,
|
|
|
None,
|
|
|
- texts("""div[data-testid="IngredientList"] > div > div"""),
|
|
|
+ texts("""div[data-testid="IngredientList"] > div > div""").map(_.map(_parseIngredient _)),
|
|
|
texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""")
|
|
|
)
|
|
|
+
|
|
|
val recipeTinEats = Parser(
|
|
|
text("h2.wprm-recipe-name"),
|
|
|
text("span.wprm-recipe-servings").map(_.toFloatOption),
|
|
|
@@ -163,9 +149,43 @@ object Parser {
|
|
|
.replaceAll("\u215D", "5/8")
|
|
|
.replaceAll("\u215E", "7/8")
|
|
|
.replaceAll("\u215F", "1/")
|
|
|
- .filter(_ <= 0x7f)
|
|
|
.trim
|
|
|
- )),
|
|
|
+ ))
|
|
|
+ .map(_.map(_parseIngredient _)),
|
|
|
texts("div.wprm-recipe-instruction-text")
|
|
|
)
|
|
|
+
|
|
|
+ val sallysBakingAddiction = Parser(
|
|
|
+ text("h2.tasty-recipes-title"),
|
|
|
+ text("span.tasty-recipes-yield")
|
|
|
+ .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
|
|
|
+ Some(text("span.tasty-recipes-prep-time")),
|
|
|
+ Some(text("span.tasty-recipes-cook-time")),
|
|
|
+ elementList("div.tasty-recipes-ingredients-body > ul > li").map(_.map({(listItem) => (
|
|
|
+ ((listItem >?> elementList("span"))
|
|
|
+ .map(_.last)
|
|
|
+ .fold(0.0f)((elm: Element) => (elm >?> attr("data-amount")).fold(0.0f)(_.toFloat))
|
|
|
+ ),
|
|
|
+ (listItem >?> elementList("span"))
|
|
|
+ .map(_.last)
|
|
|
+ .fold[MeasureUnit](Gram)((elm: Element) => (elm >?> attr("data-unit")).flatMap(MeasureUnit.guessUnit _).getOrElse(Count)),
|
|
|
+ listItem >> text("strong")
|
|
|
+ )})),
|
|
|
+ texts("div.tasty-recipes-instructions-body > ol > li")
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+ private def _parseIngredient(ingredientLine: String): (Float, MeasureUnit, String) = {
|
|
|
+ val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
+ val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
+
|
|
|
+ ingredientLine match {
|
|
|
+ case numberPattern(amount, unit, rest) =>
|
|
|
+ (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
|
|
|
+ case fractionPattern(numerator, denominator, unit, rest) =>
|
|
|
+ (numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
|
|
|
+ case noUnitLine =>
|
|
|
+ (1, Count, noUnitLine)
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|