|
|
@@ -11,6 +11,7 @@ import com.weEat.services.OAuth2Service
|
|
|
import net.ruippeixotog.scalascraper.browser.JsoupBrowser
|
|
|
import net.ruippeixotog.scalascraper.dsl.DSL._
|
|
|
import net.ruippeixotog.scalascraper.dsl.DSL.Extract._
|
|
|
+//import net.ruippeixotog.scalascraper.dsl.DSL.Parse._
|
|
|
import net.ruippeixotog.scalascraper.model.Element
|
|
|
import net.ruippeixotog.scalascraper.scraper.HtmlExtractor
|
|
|
import scala.util._
|
|
|
@@ -33,8 +34,8 @@ class ParserController @Inject()(
|
|
|
val doc = _browser.get(url)
|
|
|
val title = doc >> parser.titleExtractor
|
|
|
val servings = doc >> parser.servingExtractor
|
|
|
- val prepTime = parser.prepTimeExtractor.map(doc >> _)
|
|
|
- val cookTime = parser.cookTimeExtractor.map(doc >> _)
|
|
|
+ val prepTime = parser.prepTimeExtractor.flatMap(doc >?> _)
|
|
|
+ val cookTime = parser.cookTimeExtractor.flatMap(doc >?> _)
|
|
|
val ingredients = doc >> parser.ingredientExtractor
|
|
|
val instructions = doc >> parser.instructionExtractor
|
|
|
|
|
|
@@ -61,7 +62,8 @@ class ParserController @Inject()(
|
|
|
Map(
|
|
|
("epicurious.com" -> Parser.epicurious),
|
|
|
("mccormick.com" -> Parser.mccormick),
|
|
|
- ("recipetineats.com" -> Parser.recipeTinEats)
|
|
|
+ ("recipetineats.com" -> Parser.recipeTinEats),
|
|
|
+ ("mamalovestocook.com" -> Parser.recipeTinEats)
|
|
|
).get(hostNoWWW)
|
|
|
}
|
|
|
|
|
|
@@ -69,12 +71,19 @@ class ParserController @Inject()(
|
|
|
val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
|
|
|
- //println(ingredientLine)
|
|
|
ingredientLine match {
|
|
|
case numberPattern(amount, unit, rest) =>
|
|
|
- _guessFoodFromStr(rest).map(Ingredient(_, amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count)))
|
|
|
+ _guessFoodFromStr(rest).map(Ingredient(
|
|
|
+ _,
|
|
|
+ amount.toFloat,
|
|
|
+ MeasureUnit.guessUnit(unit).getOrElse(Count)
|
|
|
+ ))
|
|
|
case fractionPattern(numerator, denominator, unit, rest) =>
|
|
|
- _guessFoodFromStr(rest).map(Ingredient(_, numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count)))
|
|
|
+ _guessFoodFromStr(rest).map(Ingredient(
|
|
|
+ _,
|
|
|
+ numerator.toFloat/denominator.toFloat,
|
|
|
+ MeasureUnit.guessUnit(unit).getOrElse(Count)
|
|
|
+ ))
|
|
|
case noUnitLine => _guessFoodFromStr(noUnitLine).map(Ingredient(_, 1, Count))
|
|
|
}
|
|
|
|
|
|
@@ -82,17 +91,20 @@ class ParserController @Inject()(
|
|
|
|
|
|
private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = {
|
|
|
import gov.usda.nal.fdc.models.DataType._
|
|
|
- usdaController.fdc.getFoodsSearch(foodLine, Seq(
|
|
|
+ usdaController.fdc.getFoodsSearch(foodLine.filterNot(_ == '/'), Seq(
|
|
|
Foundation, Survey, SRLegacy
|
|
|
), pageSize = Some(10))().flatMap({ (fdcResult) =>
|
|
|
- Future.sequence(fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId)))
|
|
|
- .map(_.flatten
|
|
|
- .headOption
|
|
|
- .fold[Ingredient.IngredientId](Ingredient.USDAId(fdcResult.foods.head.fdcId))((foodNode) => Ingredient.FoodNodeId(foodNode._id))
|
|
|
- ).transform({
|
|
|
- case Success(x) => Success(x)
|
|
|
- case Failure(x) => println(foodLine);Failure(x)
|
|
|
- })
|
|
|
+ Future.sequence(
|
|
|
+ fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId))
|
|
|
+ ).map(_.flatten
|
|
|
+ .headOption
|
|
|
+ .fold[Ingredient.IngredientId](
|
|
|
+ Ingredient.USDAId(fdcResult.foods.head.fdcId)
|
|
|
+ )((foodNode) => Ingredient.FoodNodeId(foodNode._id))
|
|
|
+ ).transform({
|
|
|
+ case Success(x) => Success(x)
|
|
|
+ case Failure(x) => println(foodLine);Failure(x)
|
|
|
+ })
|
|
|
})
|
|
|
}
|
|
|
}
|
|
|
@@ -109,6 +121,7 @@ case class Parser(
|
|
|
object Parser {
|
|
|
val mccormick = Parser(
|
|
|
text("h1"),
|
|
|
+ // TODO use extractors
|
|
|
text(".main-title .count").map(_.toFloatOption),
|
|
|
Some(text(".prep_time .first_content")),
|
|
|
cookTimeExtractor = Some(text(".ingredients .first_content")),
|
|
|
@@ -117,7 +130,8 @@ object Parser {
|
|
|
)
|
|
|
val epicurious = Parser(
|
|
|
text("h1"),
|
|
|
- text("""div[data-testid="IngredientList"] > p""").map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
|
|
|
+ text("""div[data-testid="IngredientList"] > p""")
|
|
|
+ .map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
|
|
|
None,
|
|
|
None,
|
|
|
texts("""div[data-testid="IngredientList"] > div > div"""),
|
|
|
@@ -128,7 +142,30 @@ object Parser {
|
|
|
text("span.wprm-recipe-servings").map(_.toFloatOption),
|
|
|
Some(text("span.wprm-recipe-prep_time-minutes")),
|
|
|
Some(text("span.wprm-recipe-cook_time-minutes")),
|
|
|
- texts("li.wprm-recipe-ingredient").map(_.map(_.filter(_ <= 0x7f).filterNot(_ == '/').trim)),
|
|
|
+ texts("li.wprm-recipe-ingredient")
|
|
|
+ .map(_.map(_
|
|
|
+ .replaceAll("\u00BD", "1/2")
|
|
|
+ .replaceAll("\u00BC", "1/4")
|
|
|
+ .replaceAll("\u00BE", "3/4")
|
|
|
+ .replaceAll("\u2150", "1/7")
|
|
|
+ .replaceAll("\u2151", "1/9")
|
|
|
+ .replaceAll("\u2152", "1/10")
|
|
|
+ .replaceAll("\u2153", "1/3")
|
|
|
+ .replaceAll("\u2154", "2/3")
|
|
|
+ .replaceAll("\u2155", "1/5")
|
|
|
+ .replaceAll("\u2156", "2/5")
|
|
|
+ .replaceAll("\u2157", "3/5")
|
|
|
+ .replaceAll("\u2158", "4/5")
|
|
|
+ .replaceAll("\u2159", "1/6")
|
|
|
+ .replaceAll("\u215A", "5/6")
|
|
|
+ .replaceAll("\u215B", "1/8")
|
|
|
+ .replaceAll("\u215C", "3/8")
|
|
|
+ .replaceAll("\u215D", "5/8")
|
|
|
+ .replaceAll("\u215E", "7/8")
|
|
|
+ .replaceAll("\u215F", "1/")
|
|
|
+ .filter(_ <= 0x7f)
|
|
|
+ .trim
|
|
|
+ )),
|
|
|
texts("div.wprm-recipe-instruction-text")
|
|
|
)
|
|
|
}
|