Sfoglia il codice sorgente

Added parser for Sally's Baking Addiction.

Thomas Flucke 2 anni fa
parent
commit
571c46867b

+ 51 - 31
server/app/com/weEat/controllers/ParserController.scala

@@ -39,7 +39,9 @@ class ParserController @Inject()(
       val ingredients = doc >> parser.ingredientExtractor
       val instructions = doc >> parser.instructionExtractor
 
-      Future.sequence(ingredients.map(_parseIngredient _))
+      Future.sequence(ingredients.map({
+        case (amt, u, line) => _guessFoodFromStr(line).map(Ingredient(_, amt, u))
+      }))
         .map((ingredients) => Ok(Json.toJson(RecipeNodeNoId(
           title,
           servings.getOrElse(1.0f),
@@ -63,35 +65,17 @@ class ParserController @Inject()(
       ("epicurious.com" -> Parser.epicurious),
       ("mccormick.com" -> Parser.mccormick),
       ("recipetineats.com" -> Parser.recipeTinEats),
-      ("mamalovestocook.com" -> Parser.recipeTinEats)
+      ("mamalovestocook.com" -> Parser.recipeTinEats),
+      ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction)
     ).get(hostNoWWW)
   }
 
-  private def _parseIngredient(ingredientLine: String): Future[Ingredient] = {
-    val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
-    val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
-
-    ingredientLine match {
-      case numberPattern(amount, unit, rest) =>
-        _guessFoodFromStr(rest).map(Ingredient(
-          _,
-          amount.toFloat,
-          MeasureUnit.guessUnit(unit).getOrElse(Count)
-        ))
-      case fractionPattern(numerator, denominator, unit, rest) =>
-        _guessFoodFromStr(rest).map(Ingredient(
-          _,
-          numerator.toFloat/denominator.toFloat,
-          MeasureUnit.guessUnit(unit).getOrElse(Count)
-        ))
-      case noUnitLine => _guessFoodFromStr(noUnitLine).map(Ingredient(_, 1, Count))
-    }
-
-  }
-
   private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = {
     import gov.usda.nal.fdc.models.DataType._
-    usdaController.fdc.getFoodsSearch(foodLine.filterNot(_ == '/'), Seq(
+    usdaController.fdc.getFoodsSearch(foodLine
+      .filter(_ <= 0x7f)
+      .filterNot(_ == ':')
+      .filterNot(_ == '/'), Seq(
       Foundation, Survey, SRLegacy
     ), pageSize = Some(10))().flatMap({ (fdcResult) =>
       Future.sequence(
@@ -103,7 +87,7 @@ class ParserController @Inject()(
         )((foodNode) => Ingredient.FoodNodeId(foodNode._id))
       ).transform({
         case Success(x) => Success(x)
-        case Failure(x) => println(foodLine);Failure(x)
+        case Failure(x) => println(s"Food lookup failed: $x");Failure(x)
       })
     })
   }
@@ -114,7 +98,7 @@ case class Parser(
   servingExtractor: HtmlExtractor[Element, Option[Float]],
   prepTimeExtractor: Option[HtmlExtractor[Element, String]],
   cookTimeExtractor: Option[HtmlExtractor[Element, String]],
-  ingredientExtractor: HtmlExtractor[Element, Iterable[String]],
+  ingredientExtractor: HtmlExtractor[Element, Iterable[(Float, MeasureUnit, String)]],
   instructionExtractor: HtmlExtractor[Element, Iterable[String]],
 )
 
@@ -125,18 +109,20 @@ object Parser {
     text(".main-title .count").map(_.toFloatOption),
     Some(text(".prep_time .first_content")),
     cookTimeExtractor = Some(text(".ingredients .first_content")),
-    ingredientExtractor = texts(".recipe-about-list li"),
+    ingredientExtractor = texts(".recipe-about-list li").map(_.map(_parseIngredient _)),
     texts(".instructions-main span.para")
   )
+
   val epicurious = Parser(
     text("h1"),
     text("""div[data-testid="IngredientList"] > p""")
       .map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
     None,
     None,
-    texts("""div[data-testid="IngredientList"] > div > div"""),
+    texts("""div[data-testid="IngredientList"] > div > div""").map(_.map(_parseIngredient _)),
     texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""")
   )
+
   val recipeTinEats = Parser(
     text("h2.wprm-recipe-name"),
     text("span.wprm-recipe-servings").map(_.toFloatOption),
@@ -163,9 +149,43 @@ object Parser {
         .replaceAll("\u215D", "5/8")
         .replaceAll("\u215E", "7/8")
         .replaceAll("\u215F", "1/")
-        .filter(_ <= 0x7f)
         .trim
-      )),
+      ))
+      .map(_.map(_parseIngredient _)),
     texts("div.wprm-recipe-instruction-text")
   )
+
+  val sallysBakingAddiction = Parser(
+    text("h2.tasty-recipes-title"),
+    text("span.tasty-recipes-yield")
+      .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
+    Some(text("span.tasty-recipes-prep-time")),
+    Some(text("span.tasty-recipes-cook-time")),
+    elementList("div.tasty-recipes-ingredients-body > ul > li").map(_.map({(listItem) => (
+      ((listItem >?> elementList("span"))
+        .map(_.last)
+        .fold(0.0f)((elm: Element) => (elm >?> attr("data-amount")).fold(0.0f)(_.toFloat))
+      ),
+      (listItem >?> elementList("span"))
+        .map(_.last)
+        .fold[MeasureUnit](Gram)((elm: Element) => (elm >?> attr("data-unit")).flatMap(MeasureUnit.guessUnit _).getOrElse(Count)),
+      listItem >> text("strong")
+    )})),
+    texts("div.tasty-recipes-instructions-body > ol > li")
+  )
+
+
+  private def _parseIngredient(ingredientLine: String): (Float, MeasureUnit, String) = {
+    val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
+    val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
+
+    ingredientLine match {
+      case numberPattern(amount, unit, rest) =>
+        (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
+      case fractionPattern(numerator, denominator, unit, rest) =>
+        (numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
+      case noUnitLine =>
+        (1, Count, noUnitLine)
+    }
+  }
 }

+ 1 - 0
shared/shared/src/main/scala/com/weEat/shared/models/FoodNode.scala

@@ -32,6 +32,7 @@ sealed trait FoodNodeId extends FoodNode {
   val uid: Identifier
 
   def withId(id: Identifier): FoodNodeId = withId(id, uid)
+  def versionId = _id
 
   lazy val user: Future[User] = UserController.get(uid.toString)()
 }

+ 1 - 0
shared/shared/src/test/scala/com/weEat/shared/models/MeasureUnitTest.scala

@@ -18,6 +18,7 @@ class MeasureUnitTest() extends WordSpec with MustMatchers {
         ("teaspoons" -> TeaspoonUS),
         ("Teaspoon" -> TeaspoonUS),
         ("Teaspoons" -> TeaspoonUS),
+        ("tsp" -> TeaspoonUS),
         ("Whole" -> Count),
         ("individual school container" -> Count),
         ("1 cup" -> CupUS),