package com.weEat.controllers import com.weEat.shared.models._ import javax.inject.{Inject,Singleton} import play.api.libs.json._ import play.api.mvc._ import scala.concurrent.Future import com.weEat.models.Authorization import scalaoauth2.provider.{AuthInfoRequest,OAuth2ProviderActionBuilders} import com.weEat.services.OAuth2Service import net.ruippeixotog.scalascraper.browser.JsoupBrowser import net.ruippeixotog.scalascraper.dsl.DSL._ import net.ruippeixotog.scalascraper.dsl.DSL.Extract._ import net.ruippeixotog.scalascraper.model.Element import net.ruippeixotog.scalascraper.scraper.HtmlExtractor import scala.util._ @Singleton class ParserController @Inject()( val controllerComponents: ControllerComponents, oauth: OAuth2Service, usdaController: USDAController, foodController: FoodController ) extends BaseController with OAuth2ProviderActionBuilders { implicit val ec = scala.concurrent.ExecutionContext.global private val _browser = JsoupBrowser() def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] => val url = request.body _findParser(url).fold(Future.successful(NotFound(s"No parser available for $url."))) { (parser) => val doc = _browser.get(url) val title = doc >> parser.titleExtractor val servings = doc >> parser.servingExtractor val prepTime = parser.prepTimeExtractor.map(doc >> _) val cookTime = parser.cookTimeExtractor.map(doc >> _) val ingredients = doc >> parser.ingredientExtractor val instructions = doc >> parser.instructionExtractor Future.sequence(ingredients.map(_parseIngredient _)) .map((ingredients) => Ok(Json.toJson(RecipeNodeNoId( title, servings.getOrElse(1.0f), 1.0f, UnitType.NUMBER, ingredients.toSeq, /* tflucke@[2023-10-26]: Do not pss along the instructions since this * could be a violation of the Recipe Author's copyright. */ Nil, //instructions.toSeq, None, None, Some(url) )))) } }) private def _findParser(url: String): Option[Parser] = { val host = new java.net.URL(url).getAuthority() val hostNoWWW = if (host.startsWith("www.")) host.substring("www.".length) else host Map( ("epicurious.com" -> Parser.epicurious), ("mccormick.com" -> Parser.mccormick), ("recipetineats.com" -> Parser.recipeTinEats) ).get(hostNoWWW) } private def _parseIngredient(ingredientLine: String): Future[Ingredient] = { val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r //println(ingredientLine) ingredientLine match { case numberPattern(amount, unit, rest) => _guessFoodFromStr(rest).map(Ingredient(_, amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count))) case fractionPattern(numerator, denominator, unit, rest) => _guessFoodFromStr(rest).map(Ingredient(_, numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count))) case noUnitLine => _guessFoodFromStr(noUnitLine).map(Ingredient(_, 1, Count)) } } private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = { import gov.usda.nal.fdc.models.DataType._ usdaController.fdc.getFoodsSearch(foodLine, Seq( Foundation, Survey, SRLegacy ), pageSize = Some(10))().flatMap({ (fdcResult) => Future.sequence(fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId))) .map(_.flatten .headOption .fold[Ingredient.IngredientId](Ingredient.USDAId(fdcResult.foods.head.fdcId))((foodNode) => Ingredient.FoodNodeId(foodNode._id)) ).transform({ case Success(x) => Success(x) case Failure(x) => println(foodLine);Failure(x) }) }) } } case class Parser( titleExtractor: HtmlExtractor[Element, String], servingExtractor: HtmlExtractor[Element, Option[Float]], prepTimeExtractor: Option[HtmlExtractor[Element, String]], cookTimeExtractor: Option[HtmlExtractor[Element, String]], ingredientExtractor: HtmlExtractor[Element, Iterable[String]], instructionExtractor: HtmlExtractor[Element, Iterable[String]], ) object Parser { val mccormick = Parser( text("h1"), text(".main-title .count").map(_.toFloatOption), Some(text(".prep_time .first_content")), cookTimeExtractor = Some(text(".ingredients .first_content")), ingredientExtractor = texts(".recipe-about-list li"), texts(".instructions-main span.para") ) val epicurious = Parser( text("h1"), text("""div[data-testid="IngredientList"] > p""").map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)), None, None, texts("""div[data-testid="IngredientList"] > div > div"""), texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""") ) val recipeTinEats = Parser( text("h2.wprm-recipe-name"), text("span.wprm-recipe-servings").map(_.toFloatOption), Some(text("span.wprm-recipe-prep_time-minutes")), Some(text("span.wprm-recipe-cook_time-minutes")), texts("li.wprm-recipe-ingredient").map(_.map(_.filter(_ <= 0x7f).filterNot(_ == '/').trim)), texts("div.wprm-recipe-instruction-text") ) }