{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2ca5bbbf-142b-48ed-89f2-18c63eac1eb0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv, find_dotenv\n",
    "_ = load_dotenv(find_dotenv())\n",
    "openai_api_key = os.environ[\"OPENAI_API_KEY\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "42f0647a-3384-4a8d-bc5a-446b58a9ec80",
   "metadata": {},
   "source": [
    "## Basic App: Question & Answering from a Document"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "196e2414-39ff-4643-9c31-b130697e3074",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_openai import OpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b191eb7d-a9df-47a6-95ac-fdec0b0b1969",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "567ecd8a-34b9-4489-9b6d-7b257919737a",
   "metadata": {},
   "source": [
    "**Load the text file**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e9801a4d-ecd7-460a-a4db-f0a1711163f9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.document_loaders import TextLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e01cbe49-38ce-40d2-8e01-01052483fbe5",
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = TextLoader(\"data/be-good-and-how-not-to-die.txt\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "afff80c1-1cd1-408e-ac28-2a8594af2de6",
   "metadata": {},
   "outputs": [],
   "source": [
    "document = loader.load()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4f6ff8b1-c63c-48b4-acaf-b959b98e770d",
   "metadata": {},
   "source": [
    "**The document is loaded as a Python list with metadata**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "684f2ccb-9ec4-4384-b875-3f89608292f8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'list'>\n"
     ]
    }
   ],
   "source": [
    "print(type(document))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3f712992-beac-420c-b94e-1db729bd9508",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n"
     ]
    }
   ],
   "source": [
    "print(len(document))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6cb6e6d1-765b-4762-8213-0c56e4829427",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'source': 'data/be-good-and-how-not-to-die.txt'}\n"
     ]
    }
   ],
   "source": [
    "print(document[0].metadata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "eef34dd2-aace-45c9-a87b-c3967370f44f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "You have 1 document.\n"
     ]
    }
   ],
   "source": [
    "print(f\"You have {len(document)} document.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "dfb653b8-8035-40bf-b73c-542c90499486",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Your document has 27419 characters\n"
     ]
    }
   ],
   "source": [
    "print(f\"Your document has {len(document[0].page_content)} characters\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7d8fbef1-4447-4ef5-ba21-9211b4486904",
   "metadata": {},
   "source": [
    "**Split the document in small chunks**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "18899367-8f8e-4d47-a61c-8041bafb4cd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.text_splitter import RecursiveCharacterTextSplitter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "65f31176-187b-4e0c-819a-dd35b7e61ee3",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_splitter = RecursiveCharacterTextSplitter(\n",
    "    chunk_size=3000,\n",
    "    chunk_overlap=400\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "0147e8c9-70fb-4990-8bf6-c77ea283200a",
   "metadata": {},
   "outputs": [],
   "source": [
    "document_chunks = text_splitter.split_documents(document)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "86820a0a-e307-4bb9-9b69-24284d9f0754",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Now you have 12 chunks.\n"
     ]
    }
   ],
   "source": [
    "print(f\"Now you have {len(document_chunks)} chunks.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1756c471-5af1-4ba4-8f90-cfc70e897155",
   "metadata": {},
   "source": [
    "**Convert text chunks in numeric vectors (called \"embeddings\")**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "2e40b2df-53ad-4984-8972-cf7258b762fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.embeddings.openai import OpenAIEmbeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "c1e7c4f6-2116-4ce1-8349-640fa6591970",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The class `langchain_community.embeddings.openai.OpenAIEmbeddings` was deprecated in langchain-community 0.1.0 and will be removed in 0.2.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import OpenAIEmbeddings`.\n",
      "  warn_deprecated(\n"
     ]
    }
   ],
   "source": [
    "embeddings = OpenAIEmbeddings()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "67881eb4-7418-4423-b4b9-b750e4a579cf",
   "metadata": {},
   "source": [
    "**Load the embeddings to a vector database**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "75acf1fc-25a4-437c-bd1b-7310a83bae9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.vectorstores import FAISS"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "014dc49b-498a-49b0-883c-f323769320b3",
   "metadata": {},
   "source": [
    "*Careful: the next operation is expensive in OpenAI*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "8a2e4dbc-af28-497f-877a-933347c747ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "stored_embeddings = FAISS.from_documents(document_chunks, embeddings)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "40ae930c-5d99-45c5-8597-52457d22b6d1",
   "metadata": {},
   "source": [
    "**Create a Retrieval Question & Answering Chain**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "e30338bd-58ae-467d-a01e-8bef3ae435b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chains import RetrievalQA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "d9bed1ac-d148-4fb1-9382-bc58db111d73",
   "metadata": {},
   "outputs": [],
   "source": [
    "QA_chain = RetrievalQA.from_chain_type(\n",
    "    llm=llm,\n",
    "    chain_type=\"stuff\",\n",
    "    retriever=stored_embeddings.as_retriever()\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "71a96c6d-a473-4ecc-885d-55f1618487d4",
   "metadata": {},
   "source": [
    "**Now we have a Question & Answering APP**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "c432c922-984b-49b9-b3c1-4a6c5d2ccd00",
   "metadata": {},
   "outputs": [],
   "source": [
    "question = \"\"\"\n",
    "What is this article about? \n",
    "Describe it in less than 100 words.\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "7d407274-69c0-4d7f-8def-5e2275027a41",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `run` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
      "  warn_deprecated(\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "' This article is about the importance of persistence and determination in the face of challenges and setbacks in the startup world. The author uses examples of successful and unsuccessful startups to illustrate the idea that avoiding failure is the key to achieving success. The article also explores the role of fear of failure in motivating founders to persevere through difficult times. Ultimately, the article encourages readers to not give up when facing obstacles and to stay committed to their goals in order to increase their chances of success.'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "QA_chain.run(question)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "87459203-207e-4180-af54-1a48457143b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "question2 = \"\"\"\n",
    "And how does it explain how to create somethin people want?\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "86947fdc-3d59-465c-b1a9-a061421bccb6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "' The context provided does not explicitly explain how to create something people want. However, the essay does mention some key points that could be useful in understanding what people want. These include the importance of focusing on users and their needs, the value of solving a problem or filling a gap in the market, and the role of benevolence and helping others in driving motivation and success. Ultimately, creating something people want may involve a combination of these factors and can vary depending on the specific product or service being offered.'"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "QA_chain.run(question2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "75c9440f-68e9-4f69-a075-0fc8ee62377a",
   "metadata": {},
   "source": [
    "## Connect RAG App with FastAPI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "085e78ee-45c4-4ac2-b540-98c8667ec1b1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting fastapi\n",
      "  Downloading fastapi-0.109.0-py3-none-any.whl (92 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages (from fastapi) (2.6.0)\n",
      "Collecting starlette<0.36.0,>=0.35.0 (from fastapi)\n",
      "  Downloading starlette-0.35.1-py3-none-any.whl (71 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.1/71.1 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: typing-extensions>=4.8.0 in /Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages (from fastapi) (4.9.0)\n",
      "Requirement already satisfied: annotated-types>=0.4.0 in /Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.6.0)\n",
      "Requirement already satisfied: pydantic-core==2.16.1 in /Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.16.1)\n",
      "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages (from starlette<0.36.0,>=0.35.0->fastapi) (4.2.0)\n",
      "Requirement already satisfied: idna>=2.8 in /Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages (from anyio<5,>=3.4.0->starlette<0.36.0,>=0.35.0->fastapi) (3.6)\n",
      "Requirement already satisfied: sniffio>=1.1 in /Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages (from anyio<5,>=3.4.0->starlette<0.36.0,>=0.35.0->fastapi) (1.3.0)\n",
      "Installing collected packages: starlette, fastapi\n",
      "Successfully installed fastapi-0.109.0 starlette-0.35.1\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "#!pip install fastapi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "06f64c8f-c8d2-48ec-befb-1c15fea2a97f",
   "metadata": {},
   "outputs": [],
   "source": [
    "from fastapi import FastAPI, HTTPException"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "b20e8ab9-6a1a-41df-a943-4478273937cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "app = FastAPI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "3b5cd5b2-fd3b-4d46-878c-df7ae9dc3c67",
   "metadata": {},
   "outputs": [],
   "source": [
    "@app.post(\"/conversation\")\n",
    "async def conversation(query: str):\n",
    "    try:\n",
    "        result = QA_chain.run(query=query)\n",
    "        return {\"response\": result}\n",
    "    except Exception as e:\n",
    "        raise HTTPException(detail=str(e), status_code=500)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "e0e6b517-00f3-406e-b88d-9bcd4d400a87",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting uvicorn\n",
      "  Downloading uvicorn-0.27.0.post1-py3-none-any.whl (60 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.7/60.7 kB\u001b[0m \u001b[31m980.5 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: click>=7.0 in /Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages (from uvicorn) (8.1.7)\n",
      "Requirement already satisfied: h11>=0.8 in /Users/juliocolomer/.pyenv/versions/3.11.4/envs/venv020124/lib/python3.11/site-packages (from uvicorn) (0.14.0)\n",
      "Installing collected packages: uvicorn\n",
      "Successfully installed uvicorn-0.27.0.post1\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.11 -m pip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "#!pip install uvicorn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "3d0e1e31-f701-47c4-9971-17eb1e312628",
   "metadata": {},
   "outputs": [],
   "source": [
    "import uvicorn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "6e591f8c-8aea-4a2c-ac7d-bce49d7f5be0",
   "metadata": {},
   "outputs": [],
   "source": [
    "config = uvicorn.Config(app, host=\"0.0.0.0\", port=5566)\n",
    "server = uvicorn.Server(config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33841913-d72f-4976-a91a-95fbd4bbba02",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:     Started server process [38268]\n",
      "INFO:     Waiting for application startup.\n",
      "INFO:     Application startup complete.\n",
      "INFO:     Uvicorn running on http://0.0.0.0:5566 (Press CTRL+C to quit)\n"
     ]
    }
   ],
   "source": [
    "await server.serve()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "337378e9-8622-4910-87d6-845b97387ce9",
   "metadata": {},
   "source": [
    "* Open http://0.0.0.0:5566/docs#\n",
    "* In Windows, open http://localhost:5566/docs#"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5861b8d4-8f61-4e2c-8c5d-ab9697303184",
   "metadata": {},
   "source": [
    "**If you are using VS Code editor, use the following instead and then execute python filename.py in your terminal:**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a48bcf3-e3c8-4dd0-8268-460c1f5a95eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# if __name__ == \"__main__\":\n",
    "#     import uvicorn\n",
    "\n",
    "#     uvicorn.run(app, host=\"0.0.0.0\", port=5566)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cd91d863-2aea-4d63-93a9-6a211d78f4f9",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
