Add recipe parser
This commit is contained in:
parent
5c9897958b
commit
5b4a232cb6
|
@ -0,0 +1,11 @@
|
|||
FROM python:3.9
|
||||
|
||||
WORKDIR /code
|
||||
|
||||
COPY ./requirements.txt /code/requirements.txt
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
||||
|
||||
COPY ./src /code/src
|
||||
|
||||
CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "80"]
|
|
@ -0,0 +1,35 @@
|
|||
annotated-types==0.5.0
|
||||
anyio==3.7.1
|
||||
beautifulsoup4==4.12.2
|
||||
certifi==2023.7.22
|
||||
charset-normalizer==3.2.0
|
||||
click==8.1.6
|
||||
envyaml==1.10.211231
|
||||
exceptiongroup==1.1.2
|
||||
extruct==0.16.0
|
||||
fastapi==0.101.0
|
||||
h11==0.14.0
|
||||
html-text==0.5.2
|
||||
html5lib==1.1
|
||||
idna==3.4
|
||||
isodate==0.6.1
|
||||
jstyleson==0.0.2
|
||||
lxml==4.9.3
|
||||
mf2py==1.1.3
|
||||
pydantic==2.1.1
|
||||
pydantic_core==2.4.0
|
||||
pyparsing==3.1.1
|
||||
pyRdfa3==3.5.3
|
||||
PyYAML==6.0.1
|
||||
rdflib==7.0.0
|
||||
recipe-scrapers==14.42.0
|
||||
requests==2.31.0
|
||||
six==1.16.0
|
||||
sniffio==1.3.0
|
||||
soupsieve==2.4.1
|
||||
starlette==0.27.0
|
||||
typing_extensions==4.7.1
|
||||
urllib3==2.0.4
|
||||
uvicorn==0.23.2
|
||||
w3lib==2.1.2
|
||||
webencodings==0.5.1
|
|
@ -1,11 +1,80 @@
|
|||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from recipe_scrapers import scrape_html, scrape_me, WebsiteNotImplementedError, NoSchemaFoundInWildMode
|
||||
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel
|
||||
import logging
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/recipe",
|
||||
tags=["NLP"],
|
||||
tags=["Recipes", "Web scraping"],
|
||||
responses={404: {"description": "Not found"}}
|
||||
)
|
||||
|
||||
class IngredientGroup(BaseModel):
|
||||
ingredients: List[str]
|
||||
purpose: Optional[str]
|
||||
|
||||
class Nutrients(BaseModel):
|
||||
calories: str
|
||||
carbohydrateContent: str
|
||||
proteinContent: str
|
||||
fatContent: str
|
||||
saturatedFatContent: str
|
||||
cholesterolContent: str
|
||||
sodiumContent: str
|
||||
fiberContent: str
|
||||
sugarContent: str
|
||||
servingSize: str
|
||||
|
||||
class Recipe(BaseModel):
|
||||
author: Optional[str]
|
||||
canonical_url: str
|
||||
category: str
|
||||
cook_time: int
|
||||
cuisine: str
|
||||
description: str
|
||||
host: str
|
||||
image: str
|
||||
ingredient_groups: List[IngredientGroup]
|
||||
ingredients: List[str]
|
||||
instructions: str
|
||||
instructions_list: List[str]
|
||||
language: str
|
||||
nutrients: Nutrients
|
||||
prep_time: int
|
||||
ratings: float
|
||||
site_name: str
|
||||
title: str
|
||||
total_time: int
|
||||
yields: str
|
||||
|
||||
@router.get("/")
|
||||
async def test():
|
||||
return "Hello world!"
|
||||
async def json(url: str):
|
||||
recipe_dict = await getRecipe(url)
|
||||
ret = Recipe.parse_obj(recipe_dict)
|
||||
return ret
|
||||
|
||||
@router.get("/md")
|
||||
async def markdown(url: str):
|
||||
pass
|
||||
|
||||
async def getRecipe(url: str) -> str:
|
||||
if len(url) == 0:
|
||||
raise HttpException(status_code=400, detail="url cannot be empty")
|
||||
|
||||
ret = ""
|
||||
|
||||
try:
|
||||
recipe = scrape_me(url, wild_mode=True)
|
||||
recipe_dict = recipe.to_json()
|
||||
except NoSchemaFoundInWildMode:
|
||||
raise HTTPException(status_code=400, detail="Failed to find a recipe on the site. We may have failed to fetch it, or it might really not be a recipe site")
|
||||
except Exception:
|
||||
raise HTTPException(status_code=500, detail="Failed to obtain recipe")
|
||||
|
||||
logging.info("Recipe found")
|
||||
logging.info(recipe_dict)
|
||||
|
||||
return recipe_dict
|
||||
|
||||
|
|
Loading…
Reference in New Issue