package com.weEat.controllers import com.weEat.shared.models._ import javax.inject.{Inject,Singleton} import play.api.libs.json._ import play.api.mvc._ import scala.concurrent.Future import com.weEat.models.Authorization import scalaoauth2.provider.{AuthInfoRequest,OAuth2ProviderActionBuilders} import com.weEat.services.OAuth2Service import net.ruippeixotog.scalascraper.browser.JsoupBrowser import net.ruippeixotog.scalascraper.dsl.DSL._ import net.ruippeixotog.scalascraper.dsl.DSL.Extract._ //import net.ruippeixotog.scalascraper.dsl.DSL.Parse._ import net.ruippeixotog.scalascraper.model.Element import net.ruippeixotog.scalascraper.scraper.HtmlExtractor import scala.util._ @Singleton class ParserController @Inject()( val controllerComponents: ControllerComponents, oauth: OAuth2Service, usdaController: USDAController, foodController: FoodController ) extends BaseController with OAuth2ProviderActionBuilders { implicit val ec = scala.concurrent.ExecutionContext.global private val _browser = JsoupBrowser() def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] => val url = request.body _findParser(url).fold(Future.successful(NotFound(s"No parser available for $url."))) { (parser) => val doc = _browser.get(url) val title = doc >> parser.titleExtractor val servings = doc >> parser.servingExtractor val prepTime = parser.prepTimeExtractor.flatMap(doc >?> _) val cookTime = parser.cookTimeExtractor.flatMap(doc >?> _) val ingredients = doc >> parser.ingredientExtractor val instructions = doc >> parser.instructionExtractor Future.sequence(ingredients.map(_parseIngredient _)) .map((ingredients) => Ok(Json.toJson(RecipeNodeNoId( title, servings.getOrElse(1.0f), 1.0f, UnitType.NUMBER, ingredients.toSeq, /* tflucke@[2023-10-26]: Do not pss along the instructions since this * could be a violation of the Recipe Author's copyright. */ Nil, //instructions.toSeq, None, None, Some(url) )))) } }) private def _findParser(url: String): Option[Parser] = { val host = new java.net.URL(url).getAuthority() val hostNoWWW = if (host.startsWith("www.")) host.substring("www.".length) else host Map( ("epicurious.com" -> Parser.epicurious), ("mccormick.com" -> Parser.mccormick), ("recipetineats.com" -> Parser.recipeTinEats), ("mamalovestocook.com" -> Parser.recipeTinEats) ).get(hostNoWWW) } private def _parseIngredient(ingredientLine: String): Future[Ingredient] = { val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r ingredientLine match { case numberPattern(amount, unit, rest) => _guessFoodFromStr(rest).map(Ingredient( _, amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count) )) case fractionPattern(numerator, denominator, unit, rest) => _guessFoodFromStr(rest).map(Ingredient( _, numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count) )) case noUnitLine => _guessFoodFromStr(noUnitLine).map(Ingredient(_, 1, Count)) } } private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = { import gov.usda.nal.fdc.models.DataType._ usdaController.fdc.getFoodsSearch(foodLine.filterNot(_ == '/'), Seq( Foundation, Survey, SRLegacy ), pageSize = Some(10))().flatMap({ (fdcResult) => Future.sequence( fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId)) ).map(_.flatten .headOption .fold[Ingredient.IngredientId]( Ingredient.USDAId(fdcResult.foods.head.fdcId) )((foodNode) => Ingredient.FoodNodeId(foodNode._id)) ).transform({ case Success(x) => Success(x) case Failure(x) => println(foodLine);Failure(x) }) }) } } case class Parser( titleExtractor: HtmlExtractor[Element, String], servingExtractor: HtmlExtractor[Element, Option[Float]], prepTimeExtractor: Option[HtmlExtractor[Element, String]], cookTimeExtractor: Option[HtmlExtractor[Element, String]], ingredientExtractor: HtmlExtractor[Element, Iterable[String]], instructionExtractor: HtmlExtractor[Element, Iterable[String]], ) object Parser { val mccormick = Parser( text("h1"), // TODO use extractors text(".main-title .count").map(_.toFloatOption), Some(text(".prep_time .first_content")), cookTimeExtractor = Some(text(".ingredients .first_content")), ingredientExtractor = texts(".recipe-about-list li"), texts(".instructions-main span.para") ) val epicurious = Parser( text("h1"), text("""div[data-testid="IngredientList"] > p""") .map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)), None, None, texts("""div[data-testid="IngredientList"] > div > div"""), texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""") ) val recipeTinEats = Parser( text("h2.wprm-recipe-name"), text("span.wprm-recipe-servings").map(_.toFloatOption), Some(text("span.wprm-recipe-prep_time-minutes")), Some(text("span.wprm-recipe-cook_time-minutes")), texts("li.wprm-recipe-ingredient") .map(_.map(_ .replaceAll("\u00BD", "1/2") .replaceAll("\u00BC", "1/4") .replaceAll("\u00BE", "3/4") .replaceAll("\u2150", "1/7") .replaceAll("\u2151", "1/9") .replaceAll("\u2152", "1/10") .replaceAll("\u2153", "1/3") .replaceAll("\u2154", "2/3") .replaceAll("\u2155", "1/5") .replaceAll("\u2156", "2/5") .replaceAll("\u2157", "3/5") .replaceAll("\u2158", "4/5") .replaceAll("\u2159", "1/6") .replaceAll("\u215A", "5/6") .replaceAll("\u215B", "1/8") .replaceAll("\u215C", "3/8") .replaceAll("\u215D", "5/8") .replaceAll("\u215E", "7/8") .replaceAll("\u215F", "1/") .filter(_ <= 0x7f) .trim )), texts("div.wprm-recipe-instruction-text") ) }