소스 검색

Added more custom parsers.

Thomas Flucke 2 년 전
부모
커밋
6099f58f9c
1개의 변경된 파일126개의 추가작업 그리고 21개의 파일을 삭제
  1. 126 21
      server/app/com/weEat/controllers/ParserController.scala

+ 126 - 21
server/app/com/weEat/controllers/ParserController.scala

@@ -35,7 +35,7 @@ class ParserController @Inject()(
     ) { (parser) =>
       val doc = _browser.get(url)
       val title = doc >> parser.titleExtractor
-      val servings = doc >> parser.servingExtractor
+      val servings = (doc >?> parser.servingExtractor).flatten
       val prepTime = parser.prepTimeExtractor.flatMap(doc >?> _)
       val cookTime = parser.cookTimeExtractor.flatMap(doc >?> _)
       val ingredients = doc >> parser.ingredientExtractor
@@ -50,7 +50,7 @@ class ParserController @Inject()(
           1.0f,
           UnitType.NUMBER,
           ingredients.toSeq,
-          /* tflucke@[2023-10-26]: Do not pss along the instructions since this
+          /* tflucke@[2023-10-26]: Do not pass along the instructions since this
            * could be a violation of the Recipe Author's copyright. */
           Nil, //instructions.toSeq,
           None,
@@ -71,41 +71,68 @@ class ParserController @Inject()(
       ("recipetineats.com" -> Parser.recipeTinEats),
       ("mamalovestocook.com" -> Parser.recipeTinEats),
       ("soulfullymade.com" -> Parser.recipeTinEats),
+      ("familycookierecipes.com" -> Parser.recipeTinEats),
+      ("familyfreshmeals.com" -> Parser.recipeTinEats),
+      ("handmadefarmhouse.com" -> Parser.recipeTinEats),
       ("sallysbakingaddiction.com" -> Parser.tastyRecipes),
       ("darngoodveggies.com" -> Parser.tastyRecipes),
+      ("pickledplum.com" -> Parser.tastyRecipes),
       ("seriouseats.com" -> Parser.seriousEats),
       ("greatist.com" -> Parser.greatist),
-      ("dimitrasdishes.com" -> Parser.dimitrasDishes)
+      ("dimitrasdishes.com" -> Parser.dimitrasDishes),
+      ("jif.com" -> Parser.jif),
+      ("kingarthurbaking.com" -> Parser.kingArthurBaking)
     ).get(hostNoWWW)
   }
 
   private def _guessFoodFromStr(
     foodLine: String
   ): Future[Ingredient.IngredientId] = {
-    import gov.usda.nal.fdc.models.DataType._
     val foodLineFiltered = foodLine
       .filter(_ <= 0x7f)
+      .filterNot(_ == '!')
       .filterNot(_ == ':')
       .filterNot(_ == '/')
-    usdaController.fdc.getFoodsSearch(foodLineFiltered, Seq(
-      // Branded, 
-        Foundation, SRLegacy
-    ), pageSize = Some(10))().flatMap({ (fdcResult) =>
-      Future.sequence(
-        fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId))
-      ).map(_.flatten
-        .headOption
-        .fold[Ingredient.IngredientId](
-          Ingredient.USDAId(fdcResult.foods.head.fdcId)
-        )((foodNode) => Ingredient.FoodNodeId(foodNode._id))
-      ).transform({
-        case Success(x) => Success(x)
-        case Failure(x) => println(s"Food lookup failed: $x");Failure(x)
-      })
+    searchFdcIndex(foodLineFiltered).transformWith {
+      case Success(Some(ingredientId)) => Future.successful(ingredientId)
+      case Success(None) => searchSelfIndex(foodLineFiltered)
+      case Failure(e) => Future.failed(e)
+    }
+  }
+
+  def searchFdcIndex(foodLine: String): Future[Option[Ingredient.IngredientId]] = {
+    import gov.usda.nal.fdc.models.DataType._
+    import gov.usda.nal.fdc.models.SearchResult
+    usdaController.fdc.getFoodsSearch(foodLine, Seq(
+      // Branded,
+      Foundation, SRLegacy
+    ), pageSize = Some(10))().flatMap({
+      case SearchResult(_, _, _, _, Nil) => Future.successful(None)
+      case SearchResult(_, _, _, _, foods) =>
+        Future.sequence(
+          foods.map((food) => foodController.getByFdcId(food.fdcId))
+        ).map(_.flatten
+          .headOption
+          .fold[Ingredient.IngredientId](
+            Ingredient.USDAId(foods.head.fdcId)
+          )((foodNode) => Ingredient.FoodNodeId(foodNode._id))
+        ).map(Some(_))
     })
   }
+
+  def searchSelfIndex(foodLine: String): Future[Ingredient.IngredientId] = {
+    foodController.findByName(foodLine)
+      .transform {
+        case Success(Nil) => 
+          Failure(new NoSuchElementException(foodLine))
+        case Success(foodNode::rest) =>
+          Success(Ingredient.FoodNodeId(foodNode._id))
+        case Failure(e) => Failure(e)
+      }
+  }
 }
 
+
 case class Parser(
   titleExtractor: HtmlExtractor[Element, String],
   servingExtractor: HtmlExtractor[Element, Option[Float]],
@@ -182,14 +209,14 @@ object Parser {
     elementList("div.tasty-recipes-ingredients-body > ul > li").map(
       _.map({(listItem) => (
         ((listItem >?> elementList("span"))
-          .map(_.last)
+          .flatMap(_.lastOption)
           .fold(0.0f)((elm: Element) =>
             (elm >?> attr("data-amount"))
               .fold(0.0f)(_.toFloat)
           )
         ),
         (listItem >?> elementList("span"))
-          .map(_.last)
+          .flatMap(_.lastOption)
           .fold[MeasureUnit](Gram)((elm: Element) =>
             (elm >?> attr("data-unit"))
               .flatMap(MeasureUnit.guessUnit _)
@@ -296,6 +323,84 @@ object Parser {
     texts("div.mv-create-instructions > ol > li")
   )
 
+  val jif = Parser(
+    text("h1.recipe-name"),
+    elementList("div.recipe-breakdown-step").map(
+      _.filter((listItem) => (listItem >?> text("i.servings")).isDefined)
+        .map(_ >> text("span.recipe-breakdown-detail"))
+        .head
+    ).map(_.toFloatOption),
+    Some(elementList("div.recipe-breakdown-step").map(
+      _.filter((listItem) => (listItem >?> text("i.prep")).isDefined)
+        .map(_ >> text("span.recipe-breakdown-detail"))
+        .head
+    )),
+    Some(elementList("div.recipe-breakdown-step").map(
+      _.filter((listItem) => (listItem >?> text("i.cook")).isDefined)
+        .map(_ >> text("span.recipe-breakdown-detail"))
+        .head
+    )),
+    texts("div.recipe-ingredients > ul > li")
+      .map(_.map(_
+        .replaceAll("\u00BD", "1/2")
+        .replaceAll("\u00BC", "1/4")
+        .replaceAll("\u00BE", "3/4")
+        .replaceAll("\u2150", "1/7")
+        .replaceAll("\u2151", "1/9")
+        .replaceAll("\u2152", "1/10")
+        .replaceAll("\u2153", "1/3")
+        .replaceAll("\u2154", "2/3")
+        .replaceAll("\u2155", "1/5")
+        .replaceAll("\u2156", "2/5")
+        .replaceAll("\u2157", "3/5")
+        .replaceAll("\u2158", "4/5")
+        .replaceAll("\u2159", "1/6")
+        .replaceAll("\u215A", "5/6")
+        .replaceAll("\u215B", "1/8")
+        .replaceAll("\u215C", "3/8")
+        .replaceAll("\u215D", "5/8")
+        .replaceAll("\u215E", "7/8")
+        .replaceAll("\u215F", "1/")
+        .replaceAll("\u00F1", "n")
+        .trim
+      ))
+      .map(_.map(_parseIngredient _)),
+    texts("div.recipe-directions > ul > li > p")
+  )
+
+  val kingArthurBaking = Parser(
+    text("h1 > span"),
+    text("div.stat__item--yield > span").map(_.toFloatOption),
+    Some(text("div.stat__item--prep > span")),
+    Some(text("div.stat__item--bake > span")),
+    texts("div.ingredient-section > ul > li")
+      .map(_.map(_
+        .replaceAll("\u00BD", "1/2")
+        .replaceAll("\u00BC", "1/4")
+        .replaceAll("\u00BE", "3/4")
+        .replaceAll("\u2150", "1/7")
+        .replaceAll("\u2151", "1/9")
+        .replaceAll("\u2152", "1/10")
+        .replaceAll("\u2153", "1/3")
+        .replaceAll("\u2154", "2/3")
+        .replaceAll("\u2155", "1/5")
+        .replaceAll("\u2156", "2/5")
+        .replaceAll("\u2157", "3/5")
+        .replaceAll("\u2158", "4/5")
+        .replaceAll("\u2159", "1/6")
+        .replaceAll("\u215A", "5/6")
+        .replaceAll("\u215B", "1/8")
+        .replaceAll("\u215C", "3/8")
+        .replaceAll("\u215D", "5/8")
+        .replaceAll("\u215E", "7/8")
+        .replaceAll("\u215F", "1/")
+        .replaceAll("\u00F1", "n")
+        .trim
+      ))
+      .map(_.map(_parseIngredient _)),
+    texts("div.field field--recipe-steps > ol > li > p")
+  )
+
   private def _parseFraction(fractionLine: String) = {
     val fractionPattern = raw"(\d+)/(\d+)[\d-_]*".r
     val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)[\d-_]*".r