| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- package com.weEat.controllers
- import com.weEat.shared.models._
- import javax.inject.{Inject,Singleton}
- import play.api.libs.json._
- import play.api.mvc._
- import scala.concurrent.Future
- import com.weEat.models.Authorization
- import scalaoauth2.provider.{AuthInfoRequest,OAuth2ProviderActionBuilders}
- import com.weEat.services.OAuth2Service
- import net.ruippeixotog.scalascraper.browser.JsoupBrowser
- import net.ruippeixotog.scalascraper.dsl.DSL._
- import net.ruippeixotog.scalascraper.dsl.DSL.Extract._
- import net.ruippeixotog.scalascraper.model.Element
- import net.ruippeixotog.scalascraper.scraper.HtmlExtractor
- @Singleton
- class ParserController @Inject()(
- val controllerComponents: ControllerComponents,
- oauth: OAuth2Service,
- usdaController: USDAController,
- foodController: FoodController
- ) extends BaseController
- with OAuth2ProviderActionBuilders {
- implicit val ec = scala.concurrent.ExecutionContext.global
- private val _browser = JsoupBrowser()
- def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] =>
- val url = request.body
- _findParser(url).fold(Future.successful(NotFound(s"No parser available for $url."))) { (parser) =>
- val doc = _browser.get(url)
- val title = doc >> parser.titleExtractor
- val servings = doc >> parser.servingExtractor
- val prepTime = parser.prepTimeExtractor.map(doc >> _)
- val cookTime = parser.cookTimeExtractor.map(doc >> _)
- val ingredients = doc >> parser.ingredientExtractor
- val instructions = doc >> parser.instructionExtractor
- Future.sequence(ingredients.map(_parseIngredient _))
- .map((ingredients) => Ok(Json.toJson(RecipeNodeNoId(
- title,
- servings.getOrElse(1.0f),
- 1.0f,
- UnitType.NUMBER,
- ingredients.toSeq,
- /* tflucke@[2023-10-26]: Do not pss along the instructions since this
- * could be a violation of the Recipe Author's copyright. */
- Nil, //instructions.toSeq,
- None,
- None,
- Some(url)
- ))))
- }
- })
- private def _findParser(url: String): Option[Parser] = {
- val host = new java.net.URL(url).getAuthority()
- val hostNoWWW = if (host.startsWith("www.")) host.substring("www.".length) else host
- Map(
- ("epicurious.com" -> Parser.epicurious),
- ("mccormick.com" -> Parser.mccormick)
- ).get(hostNoWWW)
- }
- private def _parseIngredient(ingredientLine: String): Future[Ingredient] = {
- val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
- val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
- ingredientLine match {
- case numberPattern(amount, unit, rest) =>
- _guessFoodFromStr(rest).map(Ingredient(_, amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count)))
- case fractionPattern(numerator, denominator, unit, rest) =>
- _guessFoodFromStr(rest).map(Ingredient(_, numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count)))
- case noUnitLine => _guessFoodFromStr(noUnitLine).map(Ingredient(_, 1, Count))
- }
- }
- private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = {
- import gov.usda.nal.fdc.models.DataType._
- usdaController.fdc.getFoodsSearch(foodLine, Seq(
- Foundation, Survey, SRLegacy
- ), pageSize = Some(10))().flatMap({ (fdcResult) =>
- Future.sequence(fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId)))
- .map(_.flatten
- .headOption
- .fold[Ingredient.IngredientId](Ingredient.USDAId(fdcResult.foods.head.fdcId))((foodNode) => Ingredient.FoodNodeId(foodNode._id))
- )
- })
- }
- }
- case class Parser(
- titleExtractor: HtmlExtractor[Element, String],
- servingExtractor: HtmlExtractor[Element, Option[Float]],
- prepTimeExtractor: Option[HtmlExtractor[Element, String]],
- cookTimeExtractor: Option[HtmlExtractor[Element, String]],
- ingredientExtractor: HtmlExtractor[Element, Iterable[String]],
- instructionExtractor: HtmlExtractor[Element, Iterable[String]],
- )
- object Parser {
- val mccormick = Parser(
- text("h1"),
- text(".main-title .count").map(_.toFloatOption),
- Some(text(".prep_time .first_content")),
- cookTimeExtractor = Some(text(".ingredients .first_content")),
- ingredientExtractor = texts(".recipe-about-list li"),
- texts(".instructions-main span.para")
- )
- val epicurious = Parser(
- text("h1"),
- text("""div[data-testid="IngredientList"] > p""").map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
- None,
- None,
- texts("""div[data-testid="IngredientList"] > div > div"""),
- texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""")
- )
- }
|