{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2159418c-785c-41b9-a70e-93b68af860eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv, find_dotenv\n",
    "_ = load_dotenv(find_dotenv())\n",
    "openai_api_key = os.environ[\"OPENAI_API_KEY\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9a585baf-8c28-4ad5-8244-f97a9c1c5788",
   "metadata": {},
   "source": [
    "## Basic app with the Pydantic Output Parser\n",
    "* Previously we used the StructuredOutputParser to format the output into a JSON dictionary.\n",
    "* The StructuredOutputParser is a very simple parser that can only support strings and do not provide options for other data types such as lists or integers.\n",
    "* The PydanticOutput Parser is an advanced parser that admits many data types and other features like validators. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e0511c81-d855-4499-8c24-bb49f4d84755",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.output_parsers import PydanticOutputParser"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cd1196db-00b8-4a32-aca6-fcfc6f92def3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.pydantic_v1 import BaseModel, Field, validator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "053dfea3-5fbf-45ed-9a55-16a53e5e40fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import List"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "08694f04-1a3f-4810-919f-b5ab09f46bd7",
   "metadata": {},
   "source": [
    "**Define the desired output data structure**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2e8294ac-179b-44b7-bf03-eff1d26c726f",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Suggestions_Output_Structure(BaseModel):\n",
    "    words: List[str] = Field(\n",
    "        description=\"list of substitute words based on the context\"\n",
    "    )\n",
    "    reasons: List[str] = Field(\n",
    "        description=\"the reasoning of why this word fits the context\"\n",
    "    )\n",
    "\n",
    "    #Throw error if the substitute word starts with a number\n",
    "    @validator('words')\n",
    "    def not_start_with_number(cls, info):\n",
    "        for item in info:\n",
    "            if item[0].isnumeric():\n",
    "                raise ValueError(\"ERROR: The word cannot start with a number\")\n",
    "        return info\n",
    "\n",
    "    @validator('reasons')\n",
    "    def end_with_dot(cls, info):\n",
    "      for idx, item in enumerate(info):\n",
    "        if item[-1] != \".\":\n",
    "          info[idx] += \".\"\n",
    "      return info"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "684e0d6a-7d38-40a5-8b6f-9bc7f887ce83",
   "metadata": {},
   "source": [
    "**Create the parser**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4a45a456-4e73-429d-a1d8-2ca14174b153",
   "metadata": {},
   "outputs": [],
   "source": [
    "my_parser = PydanticOutputParser(\n",
    "    pydantic_object=Suggestions_Output_Structure\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e43294a2-690d-4cf6-847e-b732f82a7cdc",
   "metadata": {},
   "source": [
    "**Determine the input**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5a643143-b855-47ef-b09a-1644d2a84bf7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import PromptTemplate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e74c587b-b5a3-45d1-a97b-a0f7cac147ff",
   "metadata": {},
   "outputs": [],
   "source": [
    "my_template = \"\"\"\n",
    "Offer a list of suggestions to substitute the specified\n",
    "target_word based on the present context and the reasoning\n",
    "for each word.\n",
    "\n",
    "{format_instructions}\n",
    "\n",
    "target_word={target_word}\n",
    "context={context}\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "7cf7c200-76f4-4087-9d93-9dd2504ed983",
   "metadata": {},
   "outputs": [],
   "source": [
    "my_prompt = PromptTemplate(\n",
    "    template=my_template,\n",
    "    input_variables=[\"target_word\", \"context\"],\n",
    "    partial_variables={\n",
    "        \"format_instructions\": my_parser.get_format_instructions()\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c700df91-5747-482e-bf7b-5f7c9e7c676b",
   "metadata": {},
   "outputs": [],
   "source": [
    "user_input = my_prompt.format_prompt(\n",
    "    target_word=\"loyalty\",\n",
    "    context=\"\"\"\n",
    "    The loyalty of the soldier was so great that\n",
    "    even under severe torture, he refused to betray\n",
    "    his comrades.\n",
    "    \"\"\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "bd72d57e-9dd8-4103-b51e-858ac74d1b26",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_openai import OpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "38ea700b-8f06-4bf6-a211-e9bf95e987f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "337e9608-2159-4f30-bafa-e844407f86bf",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv021524/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `__call__` was deprecated in LangChain 0.1.7 and will be removed in 0.2.0. Use invoke instead.\n",
      "  warn_deprecated(\n"
     ]
    }
   ],
   "source": [
    "output = llm(user_input.to_string())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "435f9602-3f50-43ad-9a6b-bd951b086305",
   "metadata": {},
   "source": [
    "**Apply the parser to get the desired output structure**\n",
    "* Note: the next line of code may fail if the output of the LLM was not correctly formatted JSON. If this happens, running the notebook again should fix the problem. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "417115f0-db98-4acc-a5a9-632c0859c143",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Suggestions_Output_Structure(words=['devotion', 'allegiance', 'fidelity', 'faithfulness'], reasons=[\"These words all convey a strong sense of commitment and dedication, which aligns with the context of a soldier's loyalty.\", 'Allegiance specifically implies a sense of loyalty to a higher cause or authority, making it a fitting substitute in this context.', \"Fidelity and faithfulness both suggest a steadfast and unwavering loyalty, which is appropriate for a soldier's loyalty in the face of torture.\", \"Devotion conveys a deep and unwavering loyalty and dedication, which is fitting for a soldier's loyalty in the face of adversity.\"])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_parser.parse(output)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b0b74831-d438-4f55-977f-5ef95d5d8084",
   "metadata": {},
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
