Add recipe parser

2023-08-13 22:15:02 -04:00 · 2023-08-13 22:15:02 -04:00 · 5b4a232cb6
parent 5c9897958b
commit 5b4a232cb6
3 changed files with 118 additions and 3 deletions
--- a/11
+++ b/11
@ -0,0 +1,11 @@
+FROM python:3.9
+
+WORKDIR /code
+
+COPY ./requirements.txt /code/requirements.txt
+
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+
+COPY ./src /code/src
+
+CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "80"]
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,35 @@
+annotated-types==0.5.0
+anyio==3.7.1
+beautifulsoup4==4.12.2
+certifi==2023.7.22
+charset-normalizer==3.2.0
+click==8.1.6
+envyaml==1.10.211231
+exceptiongroup==1.1.2
+extruct==0.16.0
+fastapi==0.101.0
+h11==0.14.0
+html-text==0.5.2
+html5lib==1.1
+idna==3.4
+isodate==0.6.1
+jstyleson==0.0.2
+lxml==4.9.3
+mf2py==1.1.3
+pydantic==2.1.1
+pydantic_core==2.4.0
+pyparsing==3.1.1
+pyRdfa3==3.5.3
+PyYAML==6.0.1
+rdflib==7.0.0
+recipe-scrapers==14.42.0
+requests==2.31.0
+six==1.16.0
+sniffio==1.3.0
+soupsieve==2.4.1
+starlette==0.27.0
+typing_extensions==4.7.1
+urllib3==2.0.4
+uvicorn==0.23.2
+w3lib==2.1.2
+webencodings==0.5.1
--- a/src/routers/recipe.py
+++ b/src/routers/recipe.py
@ -1,11 +1,80 @@
 from fastapi import APIRouter, Depends, HTTPException
+from recipe_scrapers import scrape_html, scrape_me, WebsiteNotImplementedError, NoSchemaFoundInWildMode
+
+from typing import List, Optional
+from pydantic import BaseModel
+import logging

 router = APIRouter(
        prefix="/recipe",
-        tags=["NLP"],
+        tags=["Recipes", "Web scraping"],
        responses={404: {"description": "Not found"}}
    )

+class IngredientGroup(BaseModel):
+    ingredients: List[str]
+    purpose: Optional[str]
+
+class Nutrients(BaseModel):
+    calories: str
+    carbohydrateContent: str
+    proteinContent: str
+    fatContent: str
+    saturatedFatContent: str
+    cholesterolContent: str
+    sodiumContent: str
+    fiberContent: str
+    sugarContent: str
+    servingSize: str
+
+class Recipe(BaseModel):
+    author: Optional[str]
+    canonical_url: str
+    category: str
+    cook_time: int
+    cuisine: str
+    description: str
+    host: str
+    image: str
+    ingredient_groups: List[IngredientGroup]
+    ingredients: List[str]
+    instructions: str
+    instructions_list: List[str]
+    language: str
+    nutrients: Nutrients
+    prep_time: int
+    ratings: float
+    site_name: str
+    title: str
+    total_time: int
+    yields: str
+
@router.get("/")
-async def test():
-    return "Hello world!"
+async def json(url: str):
+    recipe_dict = await getRecipe(url)
+    ret = Recipe.parse_obj(recipe_dict)
+    return ret
+
+@router.get("/md")
+async def markdown(url: str):
+    pass
+
+async def getRecipe(url: str) -> str:
+    if len(url) == 0:
+        raise HttpException(status_code=400, detail="url cannot be empty")
+
+    ret = ""
+
+    try:
+        recipe = scrape_me(url, wild_mode=True)
+        recipe_dict = recipe.to_json()
+    except NoSchemaFoundInWildMode:
+        raise HTTPException(status_code=400, detail="Failed to find a recipe on the site. We may have failed to fetch it, or it might really not be a recipe site")
+    except Exception:
+        raise HTTPException(status_code=500, detail="Failed to obtain recipe")
+
+    logging.info("Recipe found")
+    logging.info(recipe_dict)
+
+    return recipe_dict
+