|
|
@@ -13,6 +13,7 @@ import net.ruippeixotog.scalascraper.dsl.DSL._
|
|
|
import net.ruippeixotog.scalascraper.dsl.DSL.Extract._
|
|
|
import net.ruippeixotog.scalascraper.model.Element
|
|
|
import net.ruippeixotog.scalascraper.scraper.HtmlExtractor
|
|
|
+import scala.util._
|
|
|
|
|
|
@Singleton
|
|
|
class ParserController @Inject()(
|
|
|
@@ -59,7 +60,8 @@ class ParserController @Inject()(
|
|
|
val hostNoWWW = if (host.startsWith("www.")) host.substring("www.".length) else host
|
|
|
Map(
|
|
|
("epicurious.com" -> Parser.epicurious),
|
|
|
- ("mccormick.com" -> Parser.mccormick)
|
|
|
+ ("mccormick.com" -> Parser.mccormick),
|
|
|
+ ("recipetineats.com" -> Parser.recipeTinEats)
|
|
|
).get(hostNoWWW)
|
|
|
}
|
|
|
|
|
|
@@ -67,6 +69,7 @@ class ParserController @Inject()(
|
|
|
val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
|
|
|
+ //println(ingredientLine)
|
|
|
ingredientLine match {
|
|
|
case numberPattern(amount, unit, rest) =>
|
|
|
_guessFoodFromStr(rest).map(Ingredient(_, amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count)))
|
|
|
@@ -86,7 +89,10 @@ class ParserController @Inject()(
|
|
|
.map(_.flatten
|
|
|
.headOption
|
|
|
.fold[Ingredient.IngredientId](Ingredient.USDAId(fdcResult.foods.head.fdcId))((foodNode) => Ingredient.FoodNodeId(foodNode._id))
|
|
|
- )
|
|
|
+ ).transform({
|
|
|
+ case Success(x) => Success(x)
|
|
|
+ case Failure(x) => println(foodLine);Failure(x)
|
|
|
+ })
|
|
|
})
|
|
|
}
|
|
|
}
|
|
|
@@ -117,4 +123,12 @@ object Parser {
|
|
|
texts("""div[data-testid="IngredientList"] > div > div"""),
|
|
|
texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""")
|
|
|
)
|
|
|
+ val recipeTinEats = Parser(
|
|
|
+ text("h2.wprm-recipe-name"),
|
|
|
+ text("span.wprm-recipe-servings").map(_.toFloatOption),
|
|
|
+ Some(text("span.wprm-recipe-prep_time-minutes")),
|
|
|
+ Some(text("span.wprm-recipe-cook_time-minutes")),
|
|
|
+ texts("li.wprm-recipe-ingredient").map(_.map(_.filter(_ <= 0x7f).filterNot(_ == '/').trim)),
|
|
|
+ texts("div.wprm-recipe-instruction-text")
|
|
|
+ )
|
|
|
}
|