code

2025-06-10 12:03:09 +05:30 · 2025-06-10 12:03:09 +05:30 · 5a29b6f238
commit 5a29b6f238
31 changed files with 66977 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,204 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # UV
 #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #uv.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 #poetry.toml
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 .conda
 # vectorstore
 products_vectorstore/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 # Abstra
 # Abstra is an AI-powered process automation framework.
 # Ignore directories containing user credentials, local state, and settings.
 # Learn more at https://abstra.io/docs
 .abstra/
 # Visual Studio Code
 #  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
 #  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
 #  and can be added to the global gitignore or merged into this file. However, if you prefer, 
 #  you could uncomment the following to ignore the entire vscode folder
 # .vscode/
 # Ruff stuff:
 .ruff_cache/
 # PyPI configuration file
 .pypirc
 # Cursor
 #  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
 #  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
 #  refer to https://docs.cursor.com/context/ignore-files
 .cursorignore
 .cursorindexingignore
 # Model files
 random_forest_model.pkl
 test.pkl
 train.pkl
--- a/README.md
+++ b/README.md
--- a/agents/agent.py
+++ b/agents/agent.py
@ -0,0 +1,33 @@
 import logging
 class Agent:
    """
    An abstract superclass for Agents
    Used to log messages in a way that can identify each Agent
    """
    # Foreground colors
    RED = '\033[31m'
    GREEN = '\033[32m'
    YELLOW = '\033[33m'
    BLUE = '\033[34m'
    MAGENTA = '\033[35m'
    CYAN = '\033[36m'
    WHITE = '\033[37m'
    # Background color
    BG_BLACK = '\033[40m'
    # Reset code to return to default color
    RESET = '\033[0m'
    name: str = ""
    color: str = '\033[37m'
    def log(self, message):
        """
        Log this as an info message, identifying the agent
        """
        color_code = self.BG_BLACK + self.color
        message = f"[{self.name}] {message}"
        logging.info(color_code + message + self.RESET)
--- a/agents/deals.py
+++ b/agents/deals.py
@ -0,0 +1,120 @@
 from pydantic import BaseModel
 from typing import List, Dict
 from bs4 import BeautifulSoup
 import re
 import feedparser
 from tqdm import tqdm
 import requests
 import time
 feeds = [
    "https://www.dealnews.com/c142/Electronics/?rss=1",
    "https://www.dealnews.com/c39/Computers/?rss=1",
    "https://www.dealnews.com/c238/Automotive/?rss=1",
    "https://www.dealnews.com/f1912/Smart-Home/?rss=1",
    "https://www.dealnews.com/c196/Home-Garden/?rss=1",
 ]
 def extract(html_snippet: str) -> str:
    """
    Use Beautiful Soup to clean up this HTML snippet and extract useful text
    """
    soup = BeautifulSoup(html_snippet, 'html.parser')
    snippet_div = soup.find('div', class_='snippet summary')
    if snippet_div:
        description = snippet_div.get_text(strip=True)
        description = BeautifulSoup(description, 'html.parser').get_text()
        description = re.sub('<[^<]+?>', '', description)
        result = description.strip()
    else:
        result = html_snippet
    return result.replace('\n', ' ')
 class ScrapedDeal:
    """
    A class to represent a Deal retrieved from an RSS feed
    """
    category: str
    title: str
    summary: str
    url: str
    details: str
    features: str
    def __init__(self, entry: Dict[str, str]):
        """
        Populate this instance based on the provided dict
        """
        self.title = entry['title']
        self.summary = extract(entry['summary'])
        self.url = entry['links'][0]['href']
        # Fetch page content and parse details and features
        stuff = requests.get(self.url).content
        soup = BeautifulSoup(stuff, 'html.parser')
        # Try to get deal content section text
        content_section = soup.find('div', class_='content-section')
        content = content_section.get_text() if content_section else ''
        content = content.replace('\nmore', '').replace('\n', ' ')
        if "Features" in content:
            self.details, self.features = content.split("Features", 1)
        else:
            self.details = content
            self.features = ""
    def __repr__(self):
        """
        Return a string to describe this deal
        """
        return f"<{self.title}>"
    def describe(self):
        """
        Return a longer string to describe this deal for use in calling a model
        """
        return (
            f"Title: {self.title}\n"
            f"Details: {self.details.strip()}\n"
            f"Features: {self.features.strip()}\n"
            f"URL: {self.url}"
        )
    @classmethod
    def fetch(cls, show_progress: bool = False) -> List['ScrapedDeal']:
        """
        Retrieve all deals from the selected RSS feeds
        """
        deals = []
        feed_iter = tqdm(feeds) if show_progress else feeds
        for feed_url in feed_iter:
            feed = feedparser.parse(feed_url)
            for entry in feed.entries[:10]:
                deals.append(cls(entry))
                time.sleep(0.5)
        return deals
 class Deal(BaseModel):
    """
    A class to Represent a Deal with a summary description
    """
    product_description: str
    price: float
    url: str
 class DealSelection(BaseModel):
    """
    A class to Represent a list of Deals
    """
    deals: List[Deal]
 class Opportunity(BaseModel):
    """
    A class to represent a possible opportunity: a Deal where we estimate
    it should cost more than it's being offered
    """
    deal: Deal
    estimate: float
    discount: float
--- a/agents/ensemble_agent.py
+++ b/agents/ensemble_agent.py
@ -0,0 +1,48 @@
 import pandas as pd
 from sklearn.linear_model import LinearRegression
 import joblib
 from agents.agent import Agent
 from agents.specialist_agent import SpecialistAgent
 from agents.frontier_agent import FrontierAgent
 from agents.random_forest_agent import RandomForestAgent
 class EnsembleAgent(Agent):
    name = "Ensemble Agent"
    color = Agent.YELLOW
    def __init__(self, collection):
        """
        Create an instance of Ensemble, by creating each of the models
        And loading the weights of the Ensemble
        """
        self.log("Initializing Ensemble Agent")
        self.specialist = SpecialistAgent()
        self.frontier = FrontierAgent(collection)
        self.random_forest = RandomForestAgent()
        self.model = joblib.load('ensemble_model.pkl')
        self.log("Ensemble Agent is ready")
    def price(self, description: str) -> float:
        """
        Run this ensemble model
        Ask each of the models to price the product
        Then use the Linear Regression model to return the weighted price
        :param description: the description of a product
        :return: an estimate of its price
        """
        self.log("Running Ensemble Agent - collaborating with specialist, frontier and random forest agents")
        specialist = self.specialist.price(description)
        frontier = self.frontier.price(description)
        random_forest = self.random_forest.price(description)
        X = pd.DataFrame({
            'Specialist': [specialist],
            'Frontier': [frontier],
            'RandomForest': [random_forest],
            'Min': [min(specialist, frontier, random_forest)],
            'Max': [max(specialist, frontier, random_forest)],
        })
        y = max(0, self.model.predict(X)[0])
        self.log(f"Ensemble Agent complete - returning ${y:.2f}")
        return y
--- a/agents/frontier_agent.py
+++ b/agents/frontier_agent.py
@ -0,0 +1,104 @@
 # imports
 import os
 import re
 import math
 import json
 from typing import List, Dict
 from sentence_transformers import SentenceTransformer
 from datasets import load_dataset
 import chromadb
 from items import Item
 from testing import Tester
 from agents.agent import Agent
 from groq import Groq  # Only Groq is used
 class FrontierAgent(Agent):
    name = "Frontier Agent"
    color = Agent.BLUE
    MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"
    def __init__(self, collection):
        """
        Set up this instance by connecting to Groq, 
        connect to the Chroma Datastore, and set up the vector encoding model.
        """
        self.log("Initializing Frontier Agent")
        groq_api_key = os.getenv("GROQ_API_KEY")
        if not groq_api_key:
            raise ValueError("GROQ_API_KEY environment variable not set.")
        self.client = Groq(api_key=groq_api_key)
        self.log("Frontier Agent is set up with Groq")
        self.collection = collection
        self.model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        self.log("Frontier Agent is ready")
    def make_context(self, similars: List[str], prices: List[float]) -> str:
        """
        Create context that can be inserted into the prompt
        """
        message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
        for similar, price in zip(similars, prices):
            message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
        return message
    def messages_for(self, description: str, similars: List[str], prices: List[float]) -> List[Dict[str, str]]:
        """
        Create the message list to be included in a call to the language model
        """
        system_message = "You estimate prices of items. Reply only with the price, no explanation"
        user_prompt = self.make_context(similars, prices)
        user_prompt += "And now the question for you:\n\n"
        user_prompt += "How much does this cost?\n\n" + description
        return [
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_prompt},
            {"role": "assistant", "content": "Price is $"}
        ]
    def find_similars(self, description: str):
        """
        Return a list of items similar to the given one by looking in the Chroma datastore
        """
        self.log("Frontier Agent is performing a RAG search of the Chroma datastore to find 5 similar products")
        vector = self.model.encode([description])
        results = self.collection.query(query_embeddings=vector.astype(float).tolist(), n_results=5)
        documents = results['documents'][0][:]
        prices = [m['price'] for m in results['metadatas'][0][:]]
        self.log("Frontier Agent has found similar products")
        return documents, prices
    def get_price(self, s) -> float:
        """
        A utility that plucks a floating point number out of a string
        """
        s = s.replace('$', '').replace(',', '')
        match = re.search(r"[-+]?\d*\.\d+|\d+", s)
        return float(match.group()) if match else 0.0
    def price(self, description: str) -> float:
        """
        Make a call to Groq to estimate the price of the described product,
        by looking up 5 similar products and including them in the prompt to give context
        """
        documents, prices = self.find_similars(description)
        self.log(f"Frontier Agent is calling {self.MODEL} via Groq with context including 5 similar products")
        response = self.client.chat.completions.create(
            model=self.MODEL,
            messages=self.messages_for(description, documents, prices),
            temperature=0.0,
            max_tokens=5,
            seed=42
        )
        reply = response.choices[0].message.content
        result = self.get_price(reply)
        self.log(f"Frontier Agent completed - predicting ${result:.2f}")
        return result
--- a/agents/messaging_agent.py
+++ b/agents/messaging_agent.py
@ -0,0 +1,79 @@
 import os
 from twilio.rest import Client
 from agents.deals import Opportunity
 import http.client
 import urllib
 from agents.agent import Agent
 from dotenv import load_dotenv
 load_dotenv()
 DO_TEXT = True
 DO_PUSH = False
 class MessagingAgent(Agent):
    name = "Messaging Agent"
    color = Agent.WHITE
    def __init__(self):
        """
        Set up this object to either do push notifications via Pushover,
        or SMS via Twilio,
        whichever is specified in the constants
        """
        self.log(f"Messaging Agent is initializing")
        if DO_TEXT:
            account_sid = os.getenv('TWILIO_ACCOUNT_SID', 'your-sid-if-not-using-env')
            auth_token = os.getenv('TWILIO_AUTH_TOKEN', 'your-auth-if-not-using-env')
            self.me_from = os.getenv('TWILIO_FROM', 'your-phone-number-if-not-using-env')
            self.me_to = os.getenv('MY_PHONE_NUMBER', 'your-phone-number-if-not-using-env')
            self.client = Client(account_sid, auth_token)
            self.log("Messaging Agent has initialized Twilio")
        if DO_PUSH:
            self.pushover_user = os.getenv('PUSHOVER_USER', 'your-pushover-user-if-not-using-env')
            self.pushover_token = os.getenv('PUSHOVER_TOKEN', 'your-pushover-user-if-not-using-env')
            self.log("Messaging Agent has initialized Pushover")
    def message(self, text):
        """
        Send an SMS message using the Twilio API
        """
        self.log("Messaging Agent is sending a text message")
        message = self.client.messages.create(
          from_=self.me_from,
          body=text,
          to=self.me_to
        )
    def push(self, text):
        """
        Send a Push Notification using the Pushover API
        """
        self.log("Messaging Agent is sending a push notification")
        conn = http.client.HTTPSConnection("api.pushover.net:443")
        conn.request("POST", "/1/messages.json",
          urllib.parse.urlencode({
            "token": self.pushover_token,
            "user": self.pushover_user,
            "message": text,
            "sound": "cashregister"
          }), { "Content-type": "application/x-www-form-urlencoded" })
        conn.getresponse()
    def alert(self, opportunity: Opportunity):
        """
        Make an alert about the specified Opportunity
        """
        text = f"Deal Alert! Price=${opportunity.deal.price:.2f}, "
        text += f"Estimate=${opportunity.estimate:.2f}, "
        text += f"Discount=${opportunity.discount:.2f} :"
        text += opportunity.deal.product_description[:10]+'... '
        text += opportunity.deal.url
        if DO_TEXT:
            self.message(text)
        if DO_PUSH:
            self.push(text)
        self.log("Messaging Agent has completed")
--- a/agents/planning_agent.py
+++ b/agents/planning_agent.py
@ -0,0 +1,57 @@
 from typing import Optional, List
 from agents.agent import Agent
 from agents.deals import ScrapedDeal, DealSelection, Deal, Opportunity
 from agents.scanner_agent import ScannerAgent
 from agents.ensemble_agent import EnsembleAgent
 from agents.messaging_agent import MessagingAgent
 class PlanningAgent(Agent):
    name = "Planning Agent"
    color = Agent.GREEN
    DEAL_THRESHOLD = 50
    def __init__(self, collection):
        """
        Create instances of the 3 Agents that this planner coordinates across
        """
        self.log("Planning Agent is initializing")
        self.scanner = ScannerAgent()
        self.ensemble = EnsembleAgent(collection)
        self.messenger = MessagingAgent()
        self.log("Planning Agent is ready")
    def run(self, deal: Deal) -> Opportunity:
        """
        Run the workflow for a particular deal
        :param deal: the deal, summarized from an RSS scrape
        :returns: an opportunity including the discount
        """
        self.log("Planning Agent is pricing up a potential deal")
        estimate = self.ensemble.price(deal.product_description)
        discount = estimate - deal.price
        self.log(f"Planning Agent has processed a deal with discount ${discount:.2f}")
        return Opportunity(deal=deal, estimate=estimate, discount=discount)
    def plan(self, memory: List[str] = []) -> Optional[Opportunity]:
        """
        Run the full workflow:
        1. Use the ScannerAgent to find deals from RSS feeds
        2. Use the EnsembleAgent to estimate them
        3. Use the MessagingAgent to send a notification of deals
        :param memory: a list of URLs that have been surfaced in the past
        :return: an Opportunity if one was surfaced, otherwise None
        """
        self.log("Planning Agent is kicking off a run")
        selection = self.scanner.scan(memory=memory)
        if selection:
            opportunities = [self.run(deal) for deal in selection.deals[:5]]
            opportunities.sort(key=lambda opp: opp.discount, reverse=True)
            best = opportunities[0]
            self.log(f"Planning Agent has identified the best deal has discount ${best.discount:.2f}")
            if best.discount > self.DEAL_THRESHOLD:
                self.messenger.alert(best)
            self.log("Planning Agent has completed a run")
            return best if best.discount > self.DEAL_THRESHOLD else None
        return None
--- a/agents/random_forest_agent.py
+++ b/agents/random_forest_agent.py
@ -0,0 +1,37 @@
 # imports
 import os
 import re
 from typing import List
 from sentence_transformers import SentenceTransformer
 import joblib
 from agents.agent import Agent
 class RandomForestAgent(Agent):
    name = "Random Forest Agent"
    color = Agent.MAGENTA
    def __init__(self):
        """
        Initialize this object by loading in the saved model weights
        and the SentenceTransformer vector encoding model
        """
        self.log("Random Forest Agent is initializing")
        self.vectorizer = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        self.model = joblib.load('random_forest_model.pkl')
        self.log("Random Forest Agent is ready")
    def price(self, description: str) -> float:
        """
        Use a Random Forest model to estimate the price of the described item
        :param description: the product to be estimated
        :return: the price as a float
        """        
        self.log("Random Forest Agent is starting a prediction")
        vector = self.vectorizer.encode([description])
        result = max(0, self.model.predict(vector)[0])
        self.log(f"Random Forest Agent completed - predicting ${result:.2f}")
        return result
--- a/agents/scanner_agent.py
+++ b/agents/scanner_agent.py
@ -0,0 +1,95 @@
 import os
 import json
 from typing import Optional, List
 from groq import Groq  # Requires `groq` Python package
 from agents.deals import ScrapedDeal, DealSelection
 from agents.agent import Agent
 class ScannerAgent(Agent):
    MODEL = "meta-llama/llama-4-scout-17b-16e-instruct"
    SYSTEM_PROMPT = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
    Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
    Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
    Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. 
    {"deals": [
        {
            "product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
            "price": 99.99,
            "url": "the url as provided"
        },
        ...
    ]}"""
    USER_PROMPT_PREFIX = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price that is greater than 0.
    Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
    Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
    Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. 
    Deals:
    """
    USER_PROMPT_SUFFIX = "\n\nStrictly respond in JSON and include exactly 5 deals, no more."
    name = "Scanner Agent"
    color = Agent.CYAN
    def __init__(self):
        """
        Set up this instance by initializing Groq
        """
        self.log("Scanner Agent is initializing")
        self.client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
        self.log("Scanner Agent is ready")
    def fetch_deals(self, memory) -> List[ScrapedDeal]:
        self.log("Scanner Agent is about to fetch deals from RSS feed")
        urls = [opp.deal.url for opp in memory]
        scraped = ScrapedDeal.fetch()
        result = [scrape for scrape in scraped if scrape.url not in urls]
        self.log(f"Scanner Agent received {len(result)} deals not already scraped")
        return result
    def make_user_prompt(self, scraped) -> str:
        user_prompt = self.USER_PROMPT_PREFIX
        user_prompt += '\n\n'.join([scrape.describe() for scrape in scraped])
        user_prompt += self.USER_PROMPT_SUFFIX
        return user_prompt
    def scan(self, memory: List[str] = []) -> Optional[DealSelection]:
        scraped = self.fetch_deals(memory)
        if not scraped:
            return None
        user_prompt = self.make_user_prompt(scraped)
        self.log("Scanner Agent is calling Groq for completions")
        response = self.client.chat.completions.create(
            model=self.MODEL,
            messages=[
                {"role": "system", "content": self.SYSTEM_PROMPT},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.0,
            max_tokens=2048
        )
        content = response.choices[0].message.content.strip()
        # Handle potential Markdown block
        if content.startswith("```"):
            content = content.strip("```").strip("json").strip()
        try:
            parsed = json.loads(content)
            result = DealSelection(**parsed)
            result.deals = [deal for deal in result.deals if deal.price > 0]
            self.log(f"Scanner Agent received {len(result.deals)} selected deals with price > 0")
            return result
        except json.JSONDecodeError as e:
            self.log(f"⚠️ JSON parsing error: {e}")
            self.log(f"🔎 Raw model output: {content}")
            return None
--- a/agents/specialist_agent.py
+++ b/agents/specialist_agent.py
@ -0,0 +1,29 @@
 import modal
 from agents.agent import Agent
 class SpecialistAgent(Agent):
    """
    An Agent that runs our fine-tuned LLM that's running remotely on Modal
    """
    name = "Specialist Agent"
    color = Agent.RED
    def __init__(self):
        """
        Set up this Agent by creating an instance of the modal class
        """
        self.log("Specialist Agent is initializing - connecting to modal")
        Pricer = modal.Cls.from_name("pricer-service", "Pricer")
        self.pricer = Pricer()
        self.log("Specialist Agent is ready")
    def price(self, description: str) -> float:
        """
        Make a remote call to return the estimate of the price of this item
        """
        self.log("Specialist Agent is calling remote fine-tuned model")
        result = self.pricer.price.remote(description)
        self.log(f"Specialist Agent completed - predicting ${result:.2f}")
        return result
--- a/app.ipynb
+++ b/app.ipynb
@ -0,0 +1,304 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "42ec1713",
   "metadata": {},
   "outputs": [],
   "source": [
    "import gradio as gr\n",
    "from deal_agent_framework import DealAgentFramework\n",
    "from agents.deals import Opportunity, Deal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "29dfdb7b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7860\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with gr.Blocks(title=\"The Price is Right\", fill_width=True) as ui:\n",
    "\n",
    "    with gr.Row():\n",
    "        gr.Markdown('<div style=\"text-align: center;font-size:24px\">The Price is Right - Deal Hunting Agentic AI</div>')\n",
    "    with gr.Row():\n",
    "        gr.Markdown('<div style=\"text-align: center;font-size:14px\">Autonomous agent framework that finds online deals, collaborating with a proprietary fine-tuned LLM deployed on Modal, and a RAG pipeline with a frontier model and Chroma.</div>')\n",
    "        \n",
    "\n",
    "ui.launch(inbrowser=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a131dd88",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7861\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7861/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with gr.Blocks(title=\"The Price is Right\", fill_width=True) as ui:\n",
    "\n",
    "    # Sample deal to populate initially\n",
    "    initial_deal = Deal(\n",
    "        product_description=\"Example description\",\n",
    "        price=100.0,\n",
    "        url=\"https://cnn.com\"\n",
    "    )\n",
    "    initial_opportunity = Opportunity(\n",
    "        deal=initial_deal,\n",
    "        estimate=200.0,\n",
    "        discount=100.0\n",
    "    )\n",
    "    opportunities = gr.State([initial_opportunity])\n",
    "\n",
    "    def get_table(opps):\n",
    "        return [\n",
    "            [opp.deal.product_description, opp.deal.price, opp.estimate, opp.discount, opp.deal.url]\n",
    "            for opp in opps\n",
    "        ]\n",
    "\n",
    "    with gr.Row():\n",
    "        gr.Markdown('<div style=\"text-align: center;font-size:24px\">\"The Price is Right\" - Deal Hunting Agentic AI</div>')\n",
    "    with gr.Row():\n",
    "        gr.Markdown('<div style=\"text-align: center;font-size:14px\">Deals surfaced so far:</div>')\n",
    "\n",
    "    # Scrollable table container using HTML\n",
    "    with gr.Row():\n",
    "        gr.HTML(\"<div style='max-height: 400px; overflow-y: auto;'>\")\n",
    "        opportunities_dataframe = gr.Dataframe(\n",
    "            headers=[\"Description\", \"Price\", \"Estimate\", \"Discount\", \"URL\"],\n",
    "            wrap=True,\n",
    "            column_widths=[4, 1, 1, 1, 2],\n",
    "            row_count=10,\n",
    "            col_count=5\n",
    "        )\n",
    "        gr.HTML(\"</div>\")\n",
    "\n",
    "    ui.load(get_table, inputs=[opportunities], outputs=[opportunities_dataframe])\n",
    "\n",
    "ui.launch(inbrowser=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "250e4890",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[44m\u001b[37m[Agent Framework] Initializing Agent Framework\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[44m\u001b[37m[Agent Framework] Initializing Agent Framework\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[32m[Planning Agent] Planning Agent is initializing\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[32m[Planning Agent] Planning Agent is initializing\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[36m[Scanner Agent] Scanner Agent is initializing\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[36m[Scanner Agent] Scanner Agent is initializing\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[36m[Scanner Agent] Scanner Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[36m[Scanner Agent] Scanner Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[33m[Ensemble Agent] Initializing Ensemble Agent\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[33m[Ensemble Agent] Initializing Ensemble Agent\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[31m[Specialist Agent] Specialist Agent is initializing - connecting to modal\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[31m[Specialist Agent] Specialist Agent is initializing - connecting to modal\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[31m[Specialist Agent] Specialist Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[31m[Specialist Agent] Specialist Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[34m[Frontier Agent] Initializing Frontier Agent\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[34m[Frontier Agent] Initializing Frontier Agent\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[34m[Frontier Agent] Frontier Agent is set up with Groq\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] \u001b[40m\u001b[34m[Frontier Agent] Frontier Agent is set up with Groq\u001b[0m\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] Use pytorch device_name: cpu\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] Use pytorch device_name: cpu\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n",
      "[2025-05-28 21:22:00 +0530] [Agents] [INFO] Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n",
      "[2025-05-28 21:22:05 +0530] [Agents] [INFO] \u001b[40m\u001b[34m[Frontier Agent] Frontier Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:05 +0530] [Agents] [INFO] \u001b[40m\u001b[34m[Frontier Agent] Frontier Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:05 +0530] [Agents] [INFO] \u001b[40m\u001b[35m[Random Forest Agent] Random Forest Agent is initializing\u001b[0m\n",
      "[2025-05-28 21:22:05 +0530] [Agents] [INFO] \u001b[40m\u001b[35m[Random Forest Agent] Random Forest Agent is initializing\u001b[0m\n",
      "[2025-05-28 21:22:05 +0530] [Agents] [INFO] Use pytorch device_name: cpu\n",
      "[2025-05-28 21:22:05 +0530] [Agents] [INFO] Use pytorch device_name: cpu\n",
      "[2025-05-28 21:22:05 +0530] [Agents] [INFO] Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n",
      "[2025-05-28 21:22:05 +0530] [Agents] [INFO] Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[35m[Random Forest Agent] Random Forest Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[35m[Random Forest Agent] Random Forest Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[33m[Ensemble Agent] Ensemble Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[33m[Ensemble Agent] Ensemble Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[37m[Messaging Agent] Messaging Agent is initializing\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[37m[Messaging Agent] Messaging Agent is initializing\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[37m[Messaging Agent] Messaging Agent has initialized Pushover\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[37m[Messaging Agent] Messaging Agent has initialized Pushover\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[32m[Planning Agent] Planning Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[40m\u001b[32m[Planning Agent] Planning Agent is ready\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[44m\u001b[37m[Agent Framework] Agent Framework is ready\u001b[0m\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] \u001b[44m\u001b[37m[Agent Framework] Agent Framework is ready\u001b[0m\n",
      "Running on local URL:  http://127.0.0.1:7862\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] HTTP Request: GET http://127.0.0.1:7862/startup-events \"HTTP/1.1 200 OK\"\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] HTTP Request: GET http://127.0.0.1:7862/startup-events \"HTTP/1.1 200 OK\"\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] HTTP Request: HEAD http://127.0.0.1:7862/ \"HTTP/1.1 200 OK\"\n",
      "[2025-05-28 21:22:11 +0530] [Agents] [INFO] HTTP Request: HEAD http://127.0.0.1:7862/ \"HTTP/1.1 200 OK\"\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7862/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2025-05-28 21:22:12 +0530] [Agents] [INFO] HTTP Request: GET https://api.gradio.app/pkg-version \"HTTP/1.1 200 OK\"\n",
      "[2025-05-28 21:22:12 +0530] [Agents] [INFO] HTTP Request: GET https://api.gradio.app/pkg-version \"HTTP/1.1 200 OK\"\n"
     ]
    }
   ],
   "source": [
    "agent_framework = DealAgentFramework()\n",
    "agent_framework.init_agents_as_needed()\n",
    "\n",
    "with gr.Blocks(title=\"The Price is Right\", fill_width=True) as ui:\n",
    "\n",
    "    initial_deal = Deal(product_description=\"Example description\", price=100.0, url=\"https://cnn.com\")\n",
    "    initial_opportunity = Opportunity(deal=initial_deal, estimate=200.0, discount=100.0)\n",
    "    opportunities = gr.State([initial_opportunity])\n",
    "\n",
    "    def get_table(opps):\n",
    "        return [[opp.deal.product_description, opp.deal.price, opp.estimate, opp.discount, opp.deal.url] for opp in opps]\n",
    "\n",
    "    def do_select(opportunities, selected_index: gr.SelectData):\n",
    "        row = selected_index.index[0]\n",
    "        opportunity = opportunities[row]\n",
    "        agent_framework.planner.messenger.alert(opportunity)\n",
    "\n",
    "    with gr.Row():\n",
    "        gr.Markdown('<div style=\"text-align: center;font-size:24px\">\"The Price is Right\" - Deal Hunting Agentic AI</div>')\n",
    "\n",
    "    with gr.Row():\n",
    "        gr.Markdown('<div style=\"text-align: center;font-size:14px\">Deals surfaced so far:</div>')\n",
    "\n",
    "    with gr.Row():\n",
    "        gr.HTML(\"<div style='max-height: 400px; overflow-y: auto;'>\")\n",
    "        opportunities_dataframe = gr.Dataframe(\n",
    "            headers=[\"Description\", \"Price\", \"Estimate\", \"Discount\", \"URL\"],\n",
    "            wrap=True,\n",
    "            column_widths=[4, 1, 1, 1, 2],\n",
    "            row_count=10,\n",
    "            col_count=5\n",
    "        )\n",
    "        gr.HTML(\"</div>\")\n",
    "\n",
    "    ui.load(get_table, inputs=[opportunities], outputs=[opportunities_dataframe])\n",
    "    opportunities_dataframe.select(do_select, inputs=[opportunities], outputs=[])\n",
    "\n",
    "ui.launch(inbrowser=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1cb83e9b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.22"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/deal_agent_framework.py
+++ b/deal_agent_framework.py
@ -0,0 +1,99 @@
 import os
 import sys
 import logging
 import json
 from typing import List, Optional
 from twilio.rest import Client
 from dotenv import load_dotenv
 import chromadb
 from agents.planning_agent import PlanningAgent
 from agents.deals import Opportunity
 from sklearn.manifold import TSNE
 import numpy as np
 # Colors for logging
 BG_BLUE = '\033[44m'
 WHITE = '\033[37m'
 RESET = '\033[0m'
 # Colors for plot
 CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']
 COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']
 def init_logging():
    root = logging.getLogger()
    root.setLevel(logging.INFO)
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter(
        "[%(asctime)s] [Agents] [%(levelname)s] %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S %z",
    )
    handler.setFormatter(formatter)
    root.addHandler(handler)
 class DealAgentFramework:
    DB = "products_vectorstore"
    MEMORY_FILENAME = "memory.json"
    def __init__(self):
        init_logging()
        load_dotenv()
        client = chromadb.PersistentClient(path=self.DB)
        self.memory = self.read_memory()
        self.collection = client.get_or_create_collection('products')
        self.planner = None
    def init_agents_as_needed(self):
        if not self.planner:
            self.log("Initializing Agent Framework")
            self.planner = PlanningAgent(self.collection)
            self.log("Agent Framework is ready")
    def read_memory(self) -> List[Opportunity]:
        if os.path.exists(self.MEMORY_FILENAME):
            with open(self.MEMORY_FILENAME, "r") as file:
                data = json.load(file)
            opportunities = [Opportunity(**item) for item in data]
            return opportunities
        return []
    def write_memory(self) -> None:
        data = [opportunity.dict() for opportunity in self.memory]
        with open(self.MEMORY_FILENAME, "w") as file:
            json.dump(data, file, indent=2)
    def log(self, message: str):
        text = BG_BLUE + WHITE + "[Agent Framework] " + message + RESET
        logging.info(text)
    def run(self) -> List[Opportunity]:
        self.init_agents_as_needed()
        logging.info("Kicking off Planning Agent")
        result = self.planner.plan(memory=self.memory)
        logging.info(f"Planning Agent has completed and returned: {result}")
        if result:
            self.memory.append(result)
            self.write_memory()
        return self.memory
    @classmethod
    def get_plot_data(cls, max_datapoints=10000):
        client = chromadb.PersistentClient(path=cls.DB)
        collection = client.get_or_create_collection('products')
        result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=max_datapoints)
        vectors = np.array(result['embeddings'])
        documents = result['documents']
        categories = [metadata['category'] for metadata in result['metadatas']]
        colors = [COLORS[CATEGORIES.index(c)] for c in categories]
        tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
        reduced_vectors = tsne.fit_transform(vectors)
        return documents, reduced_vectors, colors
 if __name__=="__main__":
    DealAgentFramework().run()
--- a/ensemble_model.pkl
+++ b/ensemble_model.pkl
--- a/ensemble_pricer.ipynb
+++ b/ensemble_pricer.ipynb
--- a/hello.py
+++ b/hello.py
@ -0,0 +1,29 @@
 import modal
 from modal import App, Image
 # Setup
 app = modal.App("hello")
 image = Image.debian_slim().pip_install("requests")
 # Hello!
@app.function(image=image)
 def hello() -> str:
    import requests
    response = requests.get('https://ipinfo.io/json')
    data = response.json()
    city, region, country = data['city'], data['region'], data['country']
    return f"Hello from {city}, {region}, {country}!!"
 # New - added thanks to student Tue H.!
@app.function(image=image, region="eu")
 def hello_europe() -> str:
    import requests
    response = requests.get('https://ipinfo.io/json')
    data = response.json()
    city, region, country = data['city'], data['region'], data['country']
    return f"Hello from {city}, {region}, {country}!!"
--- a/items.py
+++ b/items.py
@ -0,0 +1,101 @@
 from typing import Optional
 from transformers import AutoTokenizer
 import re
 BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
 MIN_TOKENS = 150
 MAX_TOKENS = 160
 MIN_CHARS = 300
 CEILING_CHARS = MAX_TOKENS * 7
 class Item:
    """
    An Item is a cleaned, curated datapoint of a Product with a Price
    """
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
    PREFIX = "Price is $"
    QUESTION = "How much does this cost to the nearest dollar?"
    REMOVALS = ['"Batteries Included?": "No"', '"Batteries Included?": "Yes"', '"Batteries Required?": "No"', '"Batteries Required?": "Yes"', "By Manufacturer", "Item", "Date First", "Package", ":", "Number of", "Best Sellers", "Number", "Product "]
    title: str
    price: float
    category: str
    token_count: int = 0
    details: Optional[str]
    prompt: Optional[str] = None
    include = False
    def __init__(self, data, price):
        self.title = data['title']
        self.price = price
        self.parse(data)
    def scrub_details(self):
        """
        Clean up the details string by removing common text that doesn't add value
        """
        details = self.details
        for remove in self.REMOVALS:
            details = details.replace(remove, "")
        return details
    def scrub(self, stuff):
        """
        Clean up the provided text by removing unnecessary characters and whitespace
        Also remove words that are 7+ chars and contain numbers, as these are likely irrelevant product numbers
        """
        stuff = re.sub(r'[:\[\]"{}【】\s]+', ' ', stuff).strip()
        stuff = stuff.replace(" ,", ",").replace(",,,",",").replace(",,",",")
        words = stuff.split(' ')
        select = [word for word in words if len(word)<7 or not any(char.isdigit() for char in word)]
        return " ".join(select)
    def parse(self, data):
        """
        Parse this datapoint and if it fits within the allowed Token range,
        then set include to True
        """
        contents = '\n'.join(data['description'])
        if contents:
            contents += '\n'
        features = '\n'.join(data['features'])
        if features:
            contents += features + '\n'
        self.details = data['details']
        if self.details:
            contents += self.scrub_details() + '\n'
        if len(contents) > MIN_CHARS:
            contents = contents[:CEILING_CHARS]
            text = f"{self.scrub(self.title)}\n{self.scrub(contents)}"
            tokens = self.tokenizer.encode(text, add_special_tokens=False)
            if len(tokens) > MIN_TOKENS:
                tokens = tokens[:MAX_TOKENS]
                text = self.tokenizer.decode(tokens)
                self.make_prompt(text)
                self.include = True
    def make_prompt(self, text):
        """
        Set the prompt instance variable to be a prompt appropriate for training
        """
        self.prompt = f"{self.QUESTION}\n\n{text}\n\n"
        self.prompt += f"{self.PREFIX}{str(round(self.price))}.00"
        self.token_count = len(self.tokenizer.encode(self.prompt, add_special_tokens=False))
    def test_prompt(self):
        """
        Return a prompt suitable for testing, with the actual price removed
        """
        return self.prompt.split(self.PREFIX)[0] + self.PREFIX
    def __repr__(self):
        """
        Return a String version of this Item
        """
        return f"<{self.title} = ${self.price}>"
--- a/llama.py
+++ b/llama.py
@ -0,0 +1,44 @@
 import modal
 from modal import App, Volume, Image
 # Setup
 app = modal.App("llama")
 image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate","hf-xet")
 secrets = [modal.Secret.from_name("hf-secret")]
 GPU = "T4"
 MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B"
@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
 def generate(prompt: str) -> str:
    import os
    import torch
    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
    # Quant Config
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4"
    )
    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME, 
        quantization_config=quant_config,
        device_map="auto"
    )
    set_seed(42)
    inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
    attention_mask = torch.ones(inputs.shape, device="cuda")
    outputs = model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
    return tokenizer.decode(outputs[0])
--- a/log_utils.py
+++ b/log_utils.py
@ -0,0 +1,35 @@
 # Foreground colors
 RED = '\033[31m'
 GREEN = '\033[32m'
 YELLOW = '\033[33m'
 BLUE = '\033[34m'
 MAGENTA = '\033[35m'
 CYAN = '\033[36m'
 WHITE = '\033[37m'
 # Background color
 BG_BLACK = '\033[40m'
 BG_BLUE = '\033[44m'
 # Reset code to return to default color
 RESET = '\033[0m'
 mapper = {
    BG_BLACK+RED: "#dd0000",
    BG_BLACK+GREEN: "#00dd00",
    BG_BLACK+YELLOW: "#dddd00",
    BG_BLACK+BLUE: "#0000ee",
    BG_BLACK+MAGENTA: "#aa00dd",
    BG_BLACK+CYAN: "#00dddd",
    BG_BLACK+WHITE: "#87CEEB",
    BG_BLUE+WHITE: "#ff7800"
 }
 def reformat(message):
    for key, value in mapper.items():
        message = message.replace(key, f'<span style="color: {value}">')
    message = message.replace(RESET, '</span>')
    return message
--- a/memory.json
+++ b/memory.json
@ -0,0 +1,29 @@
 [
  {
    "deal": {
      "product_description": "The Refurb Dell Latitude 5410 Laptops feature 10th-generation i5 and up to 16GB RAM. They come with a 100-day Dell warranty. Various configurations are available.",
      "price": 221.0,
      "url": "https://www.dealnews.com/Refurb-Dell-Latitude-5410-Laptops-From-221-free-shipping/21740363.html?iref=rss-c39"
    },
    "estimate": 461.8637435104997,
    "discount": 240.86374351049972
  },
  {
    "deal": {
      "product_description": "The Unlocked Google Pixel Fold256GB Android Smartphone features a Google Tensor G2 chipset, supports split-screen multitasking, and is compatible with most major carriers. It has an IPX8 water-resistant design, 12GB RAM, and 256GB storage, running on Android 13.0.",
      "price": 600.0,
      "url": "https://www.dealnews.com/products/Google/Unlocked-Google-Pixel-Fold-256-GB-Android-Smartphone/467718.html?iref=rss-c142"
    },
    "estimate": 1049.176593507867,
    "discount": 449.176593507867
  },
  {
    "deal": {
      "product_description": "The Dell Inspiron 15 13th-Gen. i7 15.6\" Laptop features a 13th-generation Intel Core i7-1355U 1.7GHz / 1.2GHz 10-Core CPU, 15.6\" 1920x1080 (1080p) 120Hz display, 16GB RAM, and a 1TB SSD. It comes with Windows 11 Home.",
      "price": 580.0,
      "url": "https://www.dealnews.com/Dell-Inspiron-15-13-th-Gen-i7-15-6-Laptop-for-580-free-shipping/21741290.html?iref=rss-c39"
    },
    "estimate": 958.1831970370052,
    "discount": 378.18319703700524
  }
 ]
--- a/messaging_planning.ipynb
+++ b/messaging_planning.ipynb
@ -0,0 +1,101 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "267ad308",
   "metadata": {},
   "outputs": [],
   "source": [
    "from dotenv import load_dotenv\n",
    "from agents.messaging_agent import MessagingAgent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "be624713",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent = MessagingAgent()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ab42de85",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent.push(\"MASSIVE NEWS!!!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "854b721c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import chromadb\n",
    "DB = \"products_vectorstore\"\n",
    "client = chromadb.PersistentClient(path=DB)\n",
    "collection = client.get_or_create_collection('products')\n",
    "from agents.planning_agent import PlanningAgent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "32ffc905",
   "metadata": {},
   "outputs": [],
   "source": [
    "planner = PlanningAgent(collection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "71b1c9b4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Opportunity(deal=Deal(product_description='The Polti Vaporetto Corded Smart Mop Electric Steam Cleaner features 12 multi-purpose attachments, 5-channel swivel mop, up to 50-PSI adjustable steam control, and a 156\" power cord. The model number is PTNA0018.', price=80.0, url='https://www.dealnews.com/products/Polti-Vaporetto-Corded-Smart-Mop-Electric-Steam-Cleaner/489869.html?iref=rss-f1912'), estimate=306.4524282644685, discount=226.4524282644685)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "planner.plan()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.22"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/price_is_right.py
+++ b/price_is_right.py
@ -0,0 +1,61 @@
 import gradio as gr
 from deal_agent_framework import DealAgentFramework
 from agents.deals import Opportunity, Deal
 class App:
    def __init__(self):    
        self.agent_framework = None
    def run(self):
        with gr.Blocks(title="The Price is Right", fill_width=True) as ui:
            def table_for(opps):
                return [[opp.deal.product_description, f"${opp.deal.price:.2f}", f"${opp.estimate:.2f}", f"${opp.discount:.2f}", opp.deal.url] for opp in opps]
            def start():
                self.agent_framework = DealAgentFramework()
                self.agent_framework.init_agents_as_needed()
                opportunities = self.agent_framework.memory
                table = table_for(opportunities)
                return table
            def go():
                self.agent_framework.run()
                new_opportunities = self.agent_framework.memory
                table = table_for(new_opportunities)
                return table
            def do_select(selected_index: gr.SelectData):
                opportunities = self.agent_framework.memory
                row = selected_index.index[0]
                opportunity = opportunities[row]
                self.agent_framework.planner.messenger.alert(opportunity)
            with gr.Row():
                gr.Markdown('<div style="text-align: center;font-size:24px">"The Price is Right" - Deal Hunting Agentic AI</div>')
            with gr.Row():
                gr.Markdown('<div style="text-align: center;font-size:14px">Autonomous agent framework that finds online deals, collaborating with a proprietary fine-tuned LLM deployed on Modal, and a RAG pipeline with a frontier model and Chroma.</div>')
            with gr.Row():
                gr.Markdown('<div style="text-align: center;font-size:14px">Deals surfaced so far:</div>')
            with gr.Row():
                opportunities_dataframe = gr.Dataframe(
                    headers=["Description", "Price", "Estimate", "Discount", "URL"],
                    wrap=True,
                    column_widths=[4, 1, 1, 1, 2],
                    row_count=10,
                    col_count=5
                )
            ui.load(start, inputs=[], outputs=[opportunities_dataframe])
            timer = gr.Timer(value=60)
            timer.tick(go, inputs=[], outputs=[opportunities_dataframe])
            opportunities_dataframe.select(do_select)
        ui.launch(share=False, inbrowser=True)
 if __name__=="__main__":
    App().run()
--- a/pricer_agent.ipynb
+++ b/pricer_agent.ipynb
@ -0,0 +1,934 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5c9cf754",
   "metadata": {},
   "outputs": [],
   "source": [
    "import modal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a73b738f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import modal\n",
    "\n",
    "app = modal.App(\"example-get-started\")\n",
    "\n",
    "\n",
    "@app.function()\n",
    "def square(x):\n",
    "    print(\"This code is running on a remote worker!\")\n",
    "    return x**2\n",
    "\n",
    "\n",
    "@app.local_entrypoint()\n",
    "def main():\n",
    "    print(\"the square is\", square.remote(42))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f8653ac7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from hello import app, hello"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae58c3a8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Hello from Delhi, Delhi, IN!!'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from hello import app, hello\n",
    "    reply=hello.local()\n",
    "reply"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9f178389",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama import app, generate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "03715973",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "925d0142b2374a918fd8fd170c415ac1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #008000; text-decoration-color: #008000\">✓</span> Initialized. <span style=\"color: #b2b2b2; text-decoration-color: #b2b2b2\">View run at </span><span style=\"color: #b2b2b2; text-decoration-color: #b2b2b2; text-decoration: underline\">https://modal.com/apps/ankushmehta243/main/ap-eokiQPPBn1jbB7S6coAKUc</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[32m✓\u001b[0m Initialized. \u001b[38;5;249mView run at \u001b[0m\u001b[4;38;5;249mhttps://modal.com/apps/ankushmehta243/main/ap-eokiQPPBn1jbB7S6coAKUc\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e292ee898b494254bd851d0d770ec240",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #008000; text-decoration-color: #008000\">✓</span> Created objects.\n",
       "<span style=\"color: #808080; text-decoration-color: #808080\">├── </span>🔨 Created mount c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\llama.py\n",
       "<span style=\"color: #808080; text-decoration-color: #808080\">└── </span>🔨 Created function generate.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[32m✓\u001b[0m Created objects.\n",
       "\u001b[38;5;244m├── \u001b[0m🔨 Created mount c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\llama.py\n",
       "\u001b[38;5;244m└── \u001b[0m🔨 Created function generate.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b68bee6063054b938ec071da309fba60",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1AFetching 4 files:   0%|          | 0/4 [00:00&lt;?, ?it/s]</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1AFetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1AFetching 4 files:  25%|██▌       | 1/4 [01:04&lt;03:14, 64.67s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1AFetching 4 files:  25%|██▌       | 1/4 [01:04<03:14, 64.67s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1AFetching 4 files:  75%|███████▌  | 3/4 [01:04&lt;00:16, 16.89s/it]Fetching 4 files: 100%|██████████| 4/4 [01:04&lt;00:00, 16.25s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1AFetching 4 files:  75%|███████▌  | 3/4 [01:04<00:16, 16.89s/it]Fetching 4 files: 100%|██████████| 4/4 [01:04<00:00, 16.25s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards:   0%|          | 0/4 [00:00&lt;?, ?it/s]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards:  25%|██▌       | 1/4 [00:04&lt;00:14,  4.72s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards:  25%|██▌       | 1/4 [00:04<00:14,  4.72s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards:  50%|█████     | 2/4 [00:09&lt;00:09,  4.95s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards:  50%|█████     | 2/4 [00:09<00:09,  4.95s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards:  75%|███████▌  | 3/4 [00:15&lt;00:05,  5.09s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards:  75%|███████▌  | 3/4 [00:15<00:05,  5.09s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards: 100%|██████████| 4/4 [00:15&lt;00:00,  3.25s/it]Loading checkpoint shards: 100%|██████████| 4/4 [00:15&lt;00:00,  3.88s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards: 100%|██████████| 4/4 [00:15<00:00,  3.25s/it]Loading checkpoint shards: 100%|██████████| 4/4 [00:15<00:00,  3.88s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31mSetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #808000; text-decoration-color: #808000\">Stopping app - local entrypoint completed.\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[33mStopping app - local entrypoint completed.\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #008000; text-decoration-color: #008000\">✓</span> App completed. <span style=\"color: #b2b2b2; text-decoration-color: #b2b2b2\">View run at </span><span style=\"color: #b2b2b2; text-decoration-color: #b2b2b2; text-decoration: underline\">https://modal.com/apps/ankushmehta243/main/ap-eokiQPPBn1jbB7S6coAKUc</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[32m✓\u001b[0m App completed. \u001b[38;5;249mView run at \u001b[0m\u001b[4;38;5;249mhttps://modal.com/apps/ankushmehta243/main/ap-eokiQPPBn1jbB7S6coAKUc\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "'<|begin_of_text|>Life is a mystery, everyone must stand alone, I hear you call my name,'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with modal.enable_output():\n",
    "    with app.run():\n",
    "        result=generate.remote(\"Life is a mystery, everyone must stand alone, I hear\")\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "eaf47ba6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pricer_ephemeral import app, price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3e3cbdc8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "49883cadfa514f4ba03efc3da05fe68f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #008000; text-decoration-color: #008000\">✓</span> Initialized. <span style=\"color: #b2b2b2; text-decoration-color: #b2b2b2\">View run at </span><span style=\"color: #b2b2b2; text-decoration-color: #b2b2b2; text-decoration: underline\">https://modal.com/apps/ankushmehta243/main/ap-zYROHiEK5Vkuc6Nxdjt8Wj</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[32m✓\u001b[0m Initialized. \u001b[38;5;249mView run at \u001b[0m\u001b[4;38;5;249mhttps://modal.com/apps/ankushmehta243/main/ap-zYROHiEK5Vkuc6Nxdjt8Wj\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "bd86728bceb1457981c92613dfdf95c2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #008000; text-decoration-color: #008000\">✓</span> Created objects.\n",
       "<span style=\"color: #808080; text-decoration-color: #808080\">├── </span>🔨 Created mount c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_ephemeral.py\n",
       "<span style=\"color: #808080; text-decoration-color: #808080\">└── </span>🔨 Created function price.\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[32m✓\u001b[0m Created objects.\n",
       "\u001b[38;5;244m├── \u001b[0m🔨 Created mount c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_ephemeral.py\n",
       "\u001b[38;5;244m└── \u001b[0m🔨 Created function price.\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "61caea8dea2847259e00ba021d6fff52",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Output()"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1AFetching 4 files:   0%|          | 0/4 [00:00&lt;?, ?it/s]</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1AFetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1AFetching 4 files:  25%|██▌       | 1/4 [00:58&lt;02:56, 58.99s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1AFetching 4 files:  25%|██▌       | 1/4 [00:58<02:56, 58.99s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1AFetching 4 files:  50%|█████     | 2/4 [01:01&lt;00:51, 25.66s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1AFetching 4 files:  50%|█████     | 2/4 [01:01<00:51, 25.66s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1AFetching 4 files: 100%|██████████| 4/4 [01:01&lt;00:00, 15.34s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1AFetching 4 files: 100%|██████████| 4/4 [01:01<00:00, 15.34s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards:   0%|          | 0/4 [00:00&lt;?, ?it/s]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards:  25%|██▌       | 1/4 [00:04&lt;00:12,  4.26s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards:  25%|██▌       | 1/4 [00:04<00:12,  4.26s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards:  50%|█████     | 2/4 [00:09&lt;00:09,  4.58s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards:  50%|█████     | 2/4 [00:09<00:09,  4.58s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards:  75%|███████▌  | 3/4 [00:13&lt;00:04,  4.68s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards:  75%|███████▌  | 3/4 [00:13<00:04,  4.68s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">\u001b[1ALoading checkpoint shards: 100%|██████████| 4/4 [00:14&lt;00:00,  2.99s/it]Loading checkpoint shards: 100%|██████████| 4/4 [00:14&lt;00:00,  3.57s/it]\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31m\u001b[1ALoading checkpoint shards: 100%|██████████| 4/4 [00:14<00:00,  2.99s/it]Loading checkpoint shards: 100%|██████████| 4/4 [00:14<00:00,  3.57s/it]\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800000; text-decoration-color: #800000\">Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[31mSetting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #808000; text-decoration-color: #808000\">Stopping app - local entrypoint completed.\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[33mStopping app - local entrypoint completed.\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #808000; text-decoration-color: #808000\">Runner terminated.\n",
       "</span></pre>\n"
      ],
      "text/plain": [
       "\u001b[33mRunner terminated.\n",
       "\u001b[0m"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #008000; text-decoration-color: #008000\">✓</span> App completed. <span style=\"color: #b2b2b2; text-decoration-color: #b2b2b2\">View run at </span><span style=\"color: #b2b2b2; text-decoration-color: #b2b2b2; text-decoration: underline\">https://modal.com/apps/ankushmehta243/main/ap-zYROHiEK5Vkuc6Nxdjt8Wj</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[32m✓\u001b[0m App completed. \u001b[38;5;249mView run at \u001b[0m\u001b[4;38;5;249mhttps://modal.com/apps/ankushmehta243/main/ap-zYROHiEK5Vkuc6Nxdjt8Wj\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "133.0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with modal.enable_output():\n",
    "    with app.run():\n",
    "        result=price.remote(\"Quadcast HyperX condenser mic, connects via usb-c to your computer for crystal clear audio\")\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "66706bed",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "- Creating objects...\n",
      "\\ Creating objects...\n",
      "/ Creating objects...\n",
      "└── - Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Uploaded \n",
      "    0/1 files\n",
      "\\ Creating objects...\n",
      "└── \\ Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Uploaded \n",
      "    0/1 files\n",
      "/ Creating objects...\n",
      "└── / Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Uploaded \n",
      "    0/1 files\n",
      "\\ Creating objects...\n",
      "└── \\ Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Uploaded \n",
      "    0/1 files\n",
      "/ Creating objects...\n",
      "└── / Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Uploaded \n",
      "    0/1 files\n",
      "\\ Creating objects...\n",
      "└── \\ Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Uploaded \n",
      "    0/1 files\n",
      "/ Creating objects...\n",
      "└── / Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Uploaded \n",
      "    0/1 files\n",
      "\\ Creating objects...\n",
      "└── \\ Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Uploaded \n",
      "    0/1 files\n",
      "/ Creating objects...\n",
      "└── / Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Uploaded \n",
      "    0/1 files\n",
      "\\ Creating objects...\n",
      "└── \\ Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Finalizing\n",
      "    index of 1 files\n",
      "/ Creating objects...\n",
      "└── / Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Finalizing\n",
      "    index of 1 files\n",
      "\\ Creating objects...\n",
      "└── \\ Creating mount \n",
      "    c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py: Finalizing\n",
      "    index of 1 files\n",
      "/ Creating objects...\n",
      "├── 🔨 Created mount \n",
      "│   c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py\n",
      "└── - Creating function Pricer.*...\n",
      "\\ Creating objects...\n",
      "├── 🔨 Created mount \n",
      "│   c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py\n",
      "└── 🔨 Created function Pricer.*.\n",
      "/ Creating objects...\n",
      "├── 🔨 Created mount \n",
      "│   c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py\n",
      "└── 🔨 Created function Pricer.*.\n",
      "\\ Creating objects...\n",
      "├── 🔨 Created mount \n",
      "│   c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py\n",
      "└── 🔨 Created function Pricer.*.\n",
      "| Creating objects...\n",
      "├── 🔨 Created mount \n",
      "│   c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py\n",
      "└── 🔨 Created function Pricer.*.\n",
      "\n",
      "✓ Created objects.\n",
      "├── 🔨 Created mount \n",
      "│   c:\\Users\\ANKUSH\\Desktop\\price-predictor-agent\\pricer_service.py\n",
      "└── 🔨 Created function Pricer.*.\n",
      "✓ App deployed in 7.903s! 🎉\n",
      "\n",
      "View Deployment: \n",
      "https://modal.com/apps/ankushmehta243/main/deployed/pricer-service\n"
     ]
    }
   ],
   "source": [
    "!modal deploy -m pricer_service"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "170d9ff0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "133.0\n"
     ]
    }
   ],
   "source": [
    "Pricer = modal.Cls.from_name(\"pricer-service\", \"Pricer\")\n",
    "pricer = Pricer()\n",
    "reply = pricer.price.remote(\"Quadcast HyperX condenser mic, connects via usb-c to your computer for crystal clear audio\")\n",
    "print(reply)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "e6101ef5",
   "metadata": {},
   "outputs": [],
   "source": [
    "from agents.specialist_agent import SpecialistAgent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "aabd1bcc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "299.0"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agent = SpecialistAgent()\n",
    "agent.price(\"iPad Pro 2nd generation\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4cb51854",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/pricer_ephemeral.py
+++ b/pricer_ephemeral.py
@ -0,0 +1,66 @@
 import modal
 from modal import App, Image
 # Setup
 app = modal.App("pricer")
 image = Image.debian_slim().pip_install("torch", "transformers", "bitsandbytes", "accelerate", "peft","hf-xet")
 secrets = [modal.Secret.from_name("hf-secret")]
 # Constants
 GPU = "T4"
 BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
 PROJECT_NAME = "pricer"
 HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
 RUN_NAME = "2024-09-13_13.04.39"
 PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
 REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
 FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
@app.function(image=image, secrets=secrets, gpu=GPU, timeout=1800)
 def price(description: str) -> float:
    import os
    import re
    import torch
    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
    from peft import PeftModel
    QUESTION = "How much does this cost to the nearest dollar?"
    PREFIX = "Price is $"
    prompt = f"{QUESTION}\n{description}\n{PREFIX}"
    # Quant Config
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_quant_type="nf4"
    )
    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    base_model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL, 
        quantization_config=quant_config,
        device_map="auto"
    )
    fine_tuned_model = PeftModel.from_pretrained(base_model, FINETUNED_MODEL, revision=REVISION)
    set_seed(42)
    inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
    attention_mask = torch.ones(inputs.shape, device="cuda")
    outputs = fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
    result = tokenizer.decode(outputs[0])
    contents = result.split("Price is $")[1]
    contents = contents.replace(',','')
    match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
    return float(match.group()) if match else 0
--- a/pricer_service.py
+++ b/pricer_service.py
@ -0,0 +1,84 @@
 import modal
 from modal import App, Volume, Image
 # Setup - define our infrastructure with code!
 app = modal.App("pricer-service")
 image = Image.debian_slim().pip_install("huggingface", "torch", "transformers", "bitsandbytes", "accelerate", "peft", "hf-xet")
 # This collects the secret from Modal.
 # Depending on your Modal configuration, you may need to replace "hf-secret" with "huggingface-secret"
 secrets = [modal.Secret.from_name("hf-secret")]
 # Constants
 GPU = "T4"
 BASE_MODEL = "meta-llama/Meta-Llama-3.1-8B"
 PROJECT_NAME = "pricer"
 HF_USER = "ed-donner" # your HF name here! Or use mine if you just want to reproduce my results.
 RUN_NAME = "2024-09-13_13.04.39"
 PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
 REVISION = "e8d637df551603dc86cd7a1598a8f44af4d7ae36"
 FINETUNED_MODEL = f"{HF_USER}/{PROJECT_RUN_NAME}"
 CACHE_DIR = "/cache"
 # Change this to 1 if you want Modal to be always running, otherwise it will go cold after 2 mins
 MIN_CONTAINERS = 0
 QUESTION = "How much does this cost to the nearest dollar?"
 PREFIX = "Price is $"
 hf_cache_volume = Volume.from_name("hf-hub-cache", create_if_missing=True)
@app.cls(
    image=image.env({"HF_HUB_CACHE": CACHE_DIR}),
    secrets=secrets, 
    gpu=GPU, 
    timeout=1800,
    min_containers=MIN_CONTAINERS,
    volumes={CACHE_DIR: hf_cache_volume}
 )
 class Pricer:
    @modal.enter()
    def setup(self):
        import torch
        from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
        from peft import PeftModel
        # Quant Config
        quant_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=torch.bfloat16,
            bnb_4bit_quant_type="nf4"
        )
        # Load model and tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.tokenizer.padding_side = "right"
        self.base_model = AutoModelForCausalLM.from_pretrained(
            BASE_MODEL, 
            quantization_config=quant_config,
            device_map="auto"
        )
        self.fine_tuned_model = PeftModel.from_pretrained(self.base_model, FINETUNED_MODEL, revision=REVISION)
    @modal.method()
    def price(self, description: str) -> float:
        import os
        import re
        import torch
        from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, set_seed
        from peft import PeftModel
        set_seed(42)
        prompt = f"{QUESTION}\n\n{description}\n\n{PREFIX}"
        inputs = self.tokenizer.encode(prompt, return_tensors="pt").to("cuda")
        attention_mask = torch.ones(inputs.shape, device="cuda")
        outputs = self.fine_tuned_model.generate(inputs, attention_mask=attention_mask, max_new_tokens=5, num_return_sequences=1)
        result = self.tokenizer.decode(outputs[0])
        contents = result.split("Price is $")[1]
        contents = contents.replace(',','')
        match = re.search(r"[-+]?\d*\.\d+|\d+", contents)
        return float(match.group()) if match else 0
--- a/rag_pipeline.ipynb
+++ b/rag_pipeline.ipynb
--- a/requirements.txt
+++ b/requirements.txt
--- a/scanning_agent.ipynb
+++ b/scanning_agent.ipynb
@ -0,0 +1,336 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6478cf67",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from dotenv import load_dotenv\n",
    "from agents.deals import ScrapedDeal, DealSelection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b1926a62",
   "metadata": {},
   "outputs": [],
   "source": [
    "load_dotenv(override=True)\n",
    "api_key = os.getenv(\"GROQ_API_KEY\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "12a606ac",
   "metadata": {},
   "outputs": [],
   "source": [
    "from groq import Groq\n",
    "\n",
    "groq_client = Groq(api_key=api_key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "c9f53242",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/5 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 5/5 [03:44<00:00, 44.86s/it]\n"
     ]
    }
   ],
   "source": [
    "deals = ScrapedDeal.fetch(show_progress=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "6a875195",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "50"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(deals)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a5c72fe7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"Title: Craftsman Blow Gun Kit for $17 + free shipping w/ $35\\nDetails: Bag this nice low as part of today's Daily Deals. It's currently listed at $25 on Amazon. Buy Now at Lowe's\\nFeatures: \\nURL: https://www.dealnews.com/Craftsman-Blow-Gun-Kit-for-17-free-shipping-w-35/21740415.html?iref=rss-c196\""
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "deals[44].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "878d0428",
   "metadata": {},
   "outputs": [],
   "source": [
    "system_prompt = \"\"\"You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.\n",
    "Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.\n",
    "Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.\n",
    "Be careful with products that are described as \"$XXX off\" or \"reduced by $XXX\" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. \n",
    "\n",
    "{\"deals\": [\n",
    "    {\n",
    "        \"product_description\": \"Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.\",\n",
    "        \"price\": 99.99,\n",
    "        \"url\": \"the url as provided\"\n",
    "    },\n",
    "    ...\n",
    "]}\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "1b414e02",
   "metadata": {},
   "outputs": [],
   "source": [
    "user_prompt = \"\"\"Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.\n",
    "Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.\n",
    "Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.\n",
    "Be careful with products that are described as \"$XXX off\" or \"reduced by $XXX\" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. \n",
    "\n",
    "Deals:\n",
    "\n",
    "\"\"\"\n",
    "user_prompt += '\\n\\n'.join([deal.describe() for deal in deals])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "aa33b1b6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.\n",
      "Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.\n",
      "Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.\n",
      "Be careful with products that are described as \"$XXX off\" or \"reduced by $XXX\" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. \n",
      "\n",
      "Deals:\n",
      "\n",
      "Title: Samsung S90F 77\" 4K OLED Vision AI Smart TV: Up to $400 off + free shipping\n",
      "Details: Save at least $100 and as much as $400 on these three sizes. The deals:48\" for $1,399.99 ($100 off)77\" for $3,299.99 ($200 off)83\" for $4,999.99 ($400 off) Shop Now at Samsung\n",
      "Features: \n",
      "URL: https://www.dealnews.com/Samsung-S90-F-77-4-K-OLED-Vision-AI-Smart-TV-Up-to-400-off-free-shipping/21740356.html?iref=rss-c142\n",
      "\n",
      "Title: Samsung The Frame LS03D 4K HDR QLED TVs: Up to $1,800 off + free shipping\n",
      "Details: Save at least $50 and as much as $1,800 on these seven sizes. The deals:32\" for $549.99 ($50 off)43\" for $749.99 ($250 off)50\" for $899.99 ($400 off)55\" for $999.99 ($500 off)65\" for $1,299.99 ($700 off)75\" for $2,199.99 ($800 off)85\" for $2,499.99 ($1,800 off) Shop Now at Samsung\n",
      "Features: \n",
      "URL: https://www.dealnews.com/Samsung-The-Frame-LS03-D-4-K-HDR-QLED-TVs-Up-to-1-800-off-free-shipping/21740352.html?iref=rss-c142\n",
      "\n",
      "Title: Unlocked Samsung Galaxy S25+ Android Smartphones: Up to $530 off w/ trade + free shipping\n",
      "Details: Samsung's offers up to a $530 credit on the Galaxy S25+ when you trade in your current device. That puts starting prices as low as $470 after the trade-in. Shop Now at Samsung\n",
      "Features: \n",
      "URL: https://www.dealnews.com/Unlocked-Samsung-Galaxy-S25-Android-Smartphones-Up-to-530-off-w-trade-free-shipping/21740347.html?iref=rss-c142\n",
      "\n",
      "Title: Samsung\n"
     ]
    }
   ],
   "source": [
    "print(user_prompt[:2000])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "84ab67e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_recommendations():\n",
    "    completion = groq_client.chat.completions.create(\n",
    "        model=\"meta-llama/llama-4-scout-17b-16e-instruct\",\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": system_prompt},\n",
    "            {\"role\": \"user\", \"content\": user_prompt}\n",
    "        ],\n",
    "        temperature=0.0,\n",
    "        max_tokens=2048\n",
    "    )\n",
    "\n",
    "    content = completion.choices[0].message.content\n",
    "\n",
    "    # Strip Markdown code block markers\n",
    "    if content.startswith(\"```\"):\n",
    "        content = content.strip(\"```\").strip(\"json\").strip()\n",
    "\n",
    "    # Now parse the cleaned content\n",
    "    try:\n",
    "        result = json.loads(content)\n",
    "        return DealSelection(**result)\n",
    "    except json.JSONDecodeError as e:\n",
    "        print(\"⚠️ Failed to parse JSON:\", e)\n",
    "        print(\"🔎 Raw content:\", content)\n",
    "        return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "74d744c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "result = get_recommendations()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "7cbbda37",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(result.deals)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "78ec26f5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Deal(product_description='The Open-box Apple MacBook Air M3 13\" Laptop (2024) features a 13.6\" Liquid Retina Display, Apple M3 8-Core CPU w/ 10-Core GPU, 16GB Unified Memory, and 256GB SSD. This model is identified as MC8K4LL/A and is a great option for those looking for a high-performance laptop.', price=722.0, url='https://www.dealnews.com/products/Apple/Apple-Mac-Book-Air-M3-13-Laptop-2024-w-16-GB-RAM-256-GB-SSD/484646.html?iref=rss-c39')"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.deals[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "909b23bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "from agents.scanner_agent import ScannerAgent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "cbc2b0a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "agent = ScannerAgent()\n",
    "result = agent.scan()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "bae84d27",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DealSelection(deals=[Deal(product_description='The Dell Inspiron 16 Plus 7640 is a laptop featuring a 16-inch 1920x1200 IPS touchscreen, Intel Core Ultra 9 185H 2.3GHz Meteor Lake H 16-core CPU, 16GB RAM, and 512GB NVMe M.2 SSD. It runs on Windows 11 Pro.', price=900.0, url='https://www.dealnews.com/Dell-Inspiron-16-Plus-7640-Core-Ultra-9-185-H-16-Touch-Laptop-for-900-free-shipping/21740394.html?iref=rss-c142'), Deal(product_description='The Dell Tower ECT1250 Core Ultra 7 265 Desktop PC features an Intel Core Ultra 7 265 20-Core 1.8GHz to 5.3GHz CPU, 16GB RAM, and 512GB NVMe M.2 SSD. It comes with a wired keyboard and mouse and runs on Windows 11 Pro.', price=700.0, url='https://www.dealnews.com/Dell-Tower-ECT1250-Core-Ultra-7-265-Desktop-PC-for-700-free-shipping/21740397.html?iref=rss-c142'), Deal(product_description='The Open-box Apple MacBook Air M3 13\" Laptop (2024) features a 13.6\" Liquid Retina Display, Apple M3 8-Core CPU w/ 10-Core GPU, 16GB Unified Memory, and 256GB SSD. The model number is MC8K4LL/A.', price=722.0, url='https://www.dealnews.com/products/Apple/Apple-Mac-Book-Air-M3-13-Laptop-2024-w-16-GB-RAM-256-GB-SSD/484646.html?iref=rss-c39'), Deal(product_description='The Polti Vaporetto Corded Smart Mop Electric Steam Cleaner features 12 multi-purpose attachments, 5-channel swivel mop, up to 50-PSI adjustable steam control, and a 156\" power cord. The model number is PTNA0018.', price=80.0, url='https://www.dealnews.com/products/Polti-Vaporetto-Corded-Smart-Mop-Electric-Steam-Cleaner/489869.html?iref=rss-f1912'), Deal(product_description='The Jackery E5000 Plus Whole-Home Backup Kit includes two Jackery 5000 Plus Portable Power Stations, two Jackery Battery Pack 5000 Plus Units, and a Jackery Smart Transfer Switch.', price=9024.0, url='https://www.dealnews.com/Jackery-E5000-Plus-Whole-Home-Backup-Kit-for-9-024-free-shipping/21738592.html?iref=rss-f1912')])"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.22"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/testing.py
+++ b/testing.py
@ -0,0 +1,75 @@
 import math
 import matplotlib.pyplot as plt
 GREEN = "\033[92m"
 YELLOW = "\033[93m"
 RED = "\033[91m"
 RESET = "\033[0m"
 COLOR_MAP = {"red":RED, "orange": YELLOW, "green": GREEN}
 class Tester:
    def __init__(self, predictor, data, title=None, size=250):
        self.predictor = predictor
        self.data = data
        self.title = title or predictor.__name__.replace("_", " ").title()
        self.size = size
        self.guesses = []
        self.truths = []
        self.errors = []
        self.sles = []
        self.colors = []
    def color_for(self, error, truth):
        if error<40 or error/truth < 0.2:
            return "green"
        elif error<80 or error/truth < 0.4:
            return "orange"
        else:
            return "red"
    def run_datapoint(self, i):
        datapoint = self.data[i]
        guess = self.predictor(datapoint)
        truth = datapoint.price
        error = abs(guess - truth)
        log_error = math.log(truth+1) - math.log(guess+1)
        sle = log_error ** 2
        color = self.color_for(error, truth)
        title = datapoint.title if len(datapoint.title) <= 40 else datapoint.title[:40]+"..."
        self.guesses.append(guess)
        self.truths.append(truth)
        self.errors.append(error)
        self.sles.append(sle)
        self.colors.append(color)
        print(f"{COLOR_MAP[color]}{i+1}: Guess: ${guess:,.2f} Truth: ${truth:,.2f} Error: ${error:,.2f} SLE: {sle:,.2f} Item: {title}{RESET}")
    def chart(self, title):
        max_error = max(self.errors)
        plt.figure(figsize=(12, 8))
        max_val = max(max(self.truths), max(self.guesses))
        plt.plot([0, max_val], [0, max_val], color='deepskyblue', lw=2, alpha=0.6)
        plt.scatter(self.truths, self.guesses, s=3, c=self.colors)
        plt.xlabel('Ground Truth')
        plt.ylabel('Model Estimate')
        plt.xlim(0, max_val)
        plt.ylim(0, max_val)
        plt.title(title)
        plt.show()
    def report(self):
        average_error = sum(self.errors) / self.size
        rmsle = math.sqrt(sum(self.sles) / self.size)
        hits = sum(1 for color in self.colors if color=="green")
        title = f"{self.title} Error=${average_error:,.2f} RMSLE={rmsle:,.2f} Hits={hits/self.size*100:.1f}%"
        self.chart(title)
    def run(self):
        self.error = 0
        for i in range(self.size):
            self.run_datapoint(i)
        self.report()
    @classmethod
    def test(cls, function, data):
        cls(function, data).run()
--- a/vector_db.ipynb
+++ b/vector_db.ipynb
@ -0,0 +1,348 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "4263f6ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import math\n",
    "import json\n",
    "from tqdm import tqdm\n",
    "import random\n",
    "from dotenv import load_dotenv\n",
    "from huggingface_hub import login\n",
    "import numpy as np\n",
    "import pickle\n",
    "from sentence_transformers import SentenceTransformer\n",
    "from datasets import load_dataset\n",
    "import chromadb\n",
    "from items import Item\n",
    "from sklearn.manifold import TSNE\n",
    "import plotly.graph_objects as go"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "b91d1bf7",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('train.pkl', 'rb') as file:\n",
    "    train = pickle.load(file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "2b7a04b6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'How much does this cost to the nearest dollar?\\n\\nDelphi FG0166 Fuel Pump Module\\nDelphi brings 80 years of OE Heritage into each Delphi pump, ensuring quality and fitment for each Delphi part. Part is validated, tested and matched to the right vehicle application Delphi brings 80 years of OE Heritage into each Delphi assembly, ensuring quality and fitment for each Delphi part Always be sure to check and clean fuel tank to avoid unnecessary returns Rigorous OE-testing ensures the pump can withstand extreme temperatures Brand Delphi, Fit Type Vehicle Specific Fit, Dimensions LxWxH 19.7 x 7.7 x 5.1 inches, Weight 2.2 Pounds, Auto Part Position Unknown, Operation Mode Mechanical, Manufacturer Delphi, Model FUEL PUMP, Dimensions 19.7\\n\\nPrice is $227.00'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train[0].prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "e9d7cb72",
   "metadata": {},
   "outputs": [],
   "source": [
    "DB = \"products_vectorstore\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "40bc2796",
   "metadata": {},
   "outputs": [],
   "source": [
    "client = chromadb.PersistentClient(path=DB)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9303c73",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check if the collection exists and delete it if it does\n",
    "collection_name = \"products\"\n",
    "\n",
    "# For old versions of Chroma, use this line instead of the subsequent one\n",
    "# existing_collection_names = [collection.name for collection in client.list_collections()]\n",
    "existing_collection_names = client.list_collections()\n",
    "\n",
    "if collection_name in existing_collection_names:\n",
    "    client.delete_collection(collection_name)\n",
    "    print(f\"Deleted existing collection: {collection_name}\")\n",
    "\n",
    "collection = client.create_collection(collection_name)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "586dd619",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "337adc93",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-9.46715921e-02,  4.27619852e-02,  5.51620908e-02, -5.10982354e-04,\n",
       "        1.16203353e-02, -6.80130422e-02,  2.76406445e-02,  6.06974810e-02,\n",
       "        2.88530681e-02, -1.74128301e-02, -4.94346023e-02,  2.30993200e-02,\n",
       "       -1.28614539e-02, -4.31402288e-02,  2.17510201e-02,  4.26548794e-02,\n",
       "        5.10500222e-02, -7.79727176e-02, -1.23247258e-01,  3.67455557e-02,\n",
       "        4.54121176e-03,  9.47937518e-02, -5.53098992e-02,  1.70641821e-02,\n",
       "       -2.92872526e-02, -4.47125323e-02,  2.06785351e-02,  6.39319792e-02,\n",
       "        2.27427743e-02,  4.87789363e-02, -2.33508484e-03,  4.72859591e-02,\n",
       "       -2.86258645e-02,  2.30625179e-02,  2.45130546e-02,  3.95680927e-02,\n",
       "       -4.33176570e-02, -1.02316678e-01,  2.79876147e-03,  2.39304081e-02,\n",
       "        1.61556378e-02, -8.99088103e-03,  2.07255036e-02,  6.40122816e-02,\n",
       "        6.89178854e-02, -6.98361173e-02,  2.89762835e-03, -8.10989514e-02,\n",
       "        1.71122309e-02,  2.50655250e-03, -1.06529057e-01, -4.87733409e-02,\n",
       "       -1.67761575e-02, -2.28662733e-02,  1.14816569e-01,  4.87412885e-02,\n",
       "       -1.64961927e-02, -6.90832511e-02,  1.13612078e-01, -7.18486458e-02,\n",
       "       -9.01571363e-02,  3.93730397e-06, -8.66768882e-02, -4.05915864e-02,\n",
       "        3.71316932e-02, -1.77618898e-02, -5.57463802e-02, -4.57097068e-02,\n",
       "       -5.43141440e-02, -4.00611497e-02, -4.60227989e-02,  2.82194242e-02,\n",
       "       -2.33606212e-02,  1.44406464e-02, -1.52777331e-02, -4.34328392e-02,\n",
       "        6.81274012e-02,  8.21894854e-02,  7.83894584e-03, -2.85973307e-02,\n",
       "        6.14309609e-02, -4.92684729e-02,  3.27059231e-03, -2.72683222e-02,\n",
       "       -4.59346585e-02, -2.60167439e-02,  5.70650063e-02, -5.86422451e-04,\n",
       "       -4.37076800e-02, -2.47870252e-04, -8.73550028e-02,  3.48776542e-02,\n",
       "        1.39328036e-02, -1.60043947e-02,  2.86958888e-02, -9.48595777e-02,\n",
       "        9.40611809e-02,  5.92685640e-02, -8.65013823e-02,  1.45011336e-01,\n",
       "        2.35388372e-02,  3.43324915e-02,  2.51512363e-04,  7.15541244e-02,\n",
       "       -3.12182605e-02,  3.86666618e-02, -2.47745272e-02,  6.52673766e-02,\n",
       "       -8.28818083e-02, -2.80246828e-02,  9.34542995e-03, -7.85537530e-03,\n",
       "        5.30727953e-02,  2.96895131e-02,  3.27329263e-02,  4.94736545e-02,\n",
       "        2.52207797e-02,  4.67067957e-02,  5.03403731e-02, -7.23745152e-02,\n",
       "        2.54435204e-02, -3.67217027e-02,  1.27570834e-02,  1.46063920e-02,\n",
       "        2.11491529e-02, -5.56909665e-02, -9.18515865e-03, -5.63395203e-34,\n",
       "        9.58770216e-02,  5.94577529e-02,  5.11444733e-02,  3.36359628e-02,\n",
       "       -1.34414285e-02, -2.77203768e-02, -3.48436199e-02,  1.80352516e-02,\n",
       "       -2.53210664e-02,  6.73579378e-03,  5.47839850e-02, -3.60573716e-02,\n",
       "       -5.20869531e-02, -2.90346015e-02,  4.38963762e-03,  6.50022700e-02,\n",
       "        3.07485145e-02,  2.00219769e-02,  1.73045334e-03,  2.96729826e-03,\n",
       "        3.40948463e-03, -6.78144172e-02,  3.41305025e-02,  8.37866776e-03,\n",
       "        5.39903976e-02,  2.70389207e-02,  7.84119666e-02, -1.30136281e-01,\n",
       "        4.84650210e-02,  5.14179468e-02, -7.94673618e-03,  5.57883270e-03,\n",
       "       -5.31024933e-02,  3.81298959e-02, -3.05512622e-02, -7.69778490e-02,\n",
       "        1.20531330e-02, -4.08992879e-02, -8.69358629e-02,  6.38055578e-02,\n",
       "        1.68675203e-02,  1.68741110e-03,  6.28894791e-02, -1.67711601e-02,\n",
       "        2.15586051e-02,  7.10061751e-04,  2.81031057e-03, -8.89795925e-03,\n",
       "       -1.80887170e-02, -2.16217209e-02, -5.59149943e-02,  1.78774614e-02,\n",
       "       -9.27094072e-02,  7.27912458e-03, -1.27753600e-01, -4.86938767e-02,\n",
       "        1.45872040e-02, -1.62750706e-02,  6.75623193e-02,  3.87702435e-02,\n",
       "        7.23295733e-02,  9.14992169e-02, -9.65292305e-02,  4.84791510e-02,\n",
       "       -1.06274128e-01, -1.05042122e-02,  8.90350118e-02, -8.07525739e-02,\n",
       "        7.87081793e-02, -2.04917695e-02, -5.55080362e-02, -3.31532545e-02,\n",
       "       -2.14428790e-02,  4.94698957e-02, -7.05119073e-02,  6.63998201e-02,\n",
       "        7.39670321e-02, -2.70114886e-03,  1.62262768e-02, -3.98229063e-02,\n",
       "        5.94091974e-02, -7.14365626e-03, -3.33479904e-02,  2.30419226e-02,\n",
       "        1.87185612e-02, -6.15725555e-02, -8.55416001e-04, -1.05786592e-01,\n",
       "       -8.43618810e-02, -3.92993018e-02, -3.16446945e-02,  6.60644248e-02,\n",
       "        9.41816568e-02, -8.35982710e-02,  9.50875413e-03,  1.25501758e-34,\n",
       "        6.38197511e-02,  2.11371575e-02, -1.65899675e-02,  1.88641343e-02,\n",
       "       -5.57019338e-02,  1.82812882e-03, -1.37587301e-02,  8.16278681e-02,\n",
       "       -9.13296789e-02,  7.06855804e-02,  6.79991096e-02, -5.44536524e-02,\n",
       "        3.80394794e-02,  3.80513002e-03,  1.03689805e-01,  7.32862041e-04,\n",
       "        2.95660980e-02,  4.19424325e-02, -1.20444320e-01,  1.24932900e-02,\n",
       "       -5.53505979e-02,  1.75228491e-02, -2.28164997e-02, -5.79300001e-02,\n",
       "        9.42930207e-03, -5.42277237e-03, -3.94948432e-03,  2.82348841e-02,\n",
       "       -1.28066897e-01, -1.31304078e-02,  7.42957667e-02, -1.74529944e-02,\n",
       "       -9.72759053e-02,  8.25622305e-03,  2.06900500e-02, -5.29765012e-03,\n",
       "       -1.37695735e-02, -3.50973941e-02,  1.74978618e-02, -1.76233463e-02,\n",
       "       -6.50825202e-02, -3.84675600e-02, -8.76396373e-02,  3.21291536e-02,\n",
       "        2.55025318e-03, -2.09378786e-02,  5.55309318e-02,  2.57095862e-02,\n",
       "       -2.94735916e-02,  1.25047630e-02, -6.83466420e-02, -8.00623894e-02,\n",
       "       -1.46906180e-02,  1.03744986e-02, -8.51863623e-02, -1.10538909e-02,\n",
       "        2.14596596e-02,  4.08609137e-02,  3.31647359e-02, -2.76757330e-02,\n",
       "       -2.01877356e-02,  8.98887776e-03,  3.92048731e-02,  1.15103371e-01,\n",
       "        5.50441183e-02,  2.72755288e-02, -1.09526664e-01, -1.72623191e-02,\n",
       "        1.33438576e-02, -1.73701961e-02, -5.04375855e-03, -2.00292692e-02,\n",
       "        1.16672337e-01, -1.84322931e-02,  3.70627306e-02,  1.60885453e-02,\n",
       "        3.48830558e-02,  5.50573654e-02, -6.60796463e-03,  7.06828311e-02,\n",
       "        4.07849178e-02, -1.43314470e-02, -2.85443966e-03,  2.74251904e-02,\n",
       "       -4.26768474e-02,  1.26583306e-02,  3.34343836e-02,  1.62644740e-02,\n",
       "        1.19262813e-02, -2.92118434e-02,  2.73977909e-02,  3.44305374e-02,\n",
       "        2.52833236e-02,  3.07514146e-02,  3.22558023e-02, -1.74628472e-08,\n",
       "       -1.52690308e-02,  5.37678273e-03,  1.41246557e-01,  5.08366488e-02,\n",
       "        5.32255769e-02,  9.67938229e-02,  4.33674268e-02, -6.48313668e-03,\n",
       "        1.58604365e-02,  4.05631550e-02,  6.94984868e-02,  6.04905970e-02,\n",
       "       -6.26188368e-02, -3.96144390e-02,  1.10648818e-01,  1.67735666e-02,\n",
       "       -7.68705085e-03,  2.59616226e-02, -5.28793186e-02, -2.22318973e-02,\n",
       "        1.74595993e-02,  4.75340039e-02,  3.27674821e-02, -4.59684506e-02,\n",
       "        2.01770663e-02, -1.60875786e-02, -1.58614144e-02, -1.66657437e-02,\n",
       "       -3.05246655e-02, -3.87907699e-02, -1.27654579e-02,  6.57610893e-02,\n",
       "       -2.22502407e-02, -9.44991410e-03,  2.32080836e-02,  2.66038626e-02,\n",
       "        2.14203075e-02, -7.54577760e-03,  8.84752721e-02, -9.43514556e-02,\n",
       "       -5.74871078e-02, -7.77098387e-02,  1.95802352e-03, -1.50347678e-02,\n",
       "       -8.08493048e-03,  1.88217331e-02,  8.42517056e-03, -3.78592350e-02,\n",
       "        1.24534788e-02, -7.94995204e-02, -2.15789191e-02,  1.20276539e-02,\n",
       "        1.74870472e-02,  8.74479711e-02,  6.64091185e-02,  3.13737318e-02,\n",
       "       -1.00628426e-02,  2.07700673e-02, -5.20163700e-02, -8.91336147e-03,\n",
       "        1.48542315e-01, -2.51266640e-03,  9.93156359e-02,  2.34929807e-02],\n",
       "      dtype=float32)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vector = model.encode([\"Well hi there\"])[0]\n",
    "vector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "626add8d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def cosine_similarity(a, b):\n",
    "    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))\n",
    "\n",
    "def how_similar(text1, text2):\n",
    "    vector1, vector2 = model.encode([text1, text2])\n",
    "    similarity = cosine_similarity(vector1, vector2)\n",
    "    print(f\"Similarity between {text1} and {text2} is {similarity*100:.1f}%\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "7f1e7f04",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Similarity between Java and C++ is 50.7%\n",
      "Similarity between Java and mug is 25.8%\n",
      "Similarity between Cup of Java and mug is 49.3%\n"
     ]
    }
   ],
   "source": [
    "how_similar(\"Java\", \"C++\")\n",
    "how_similar(\"Java\", \"mug\")\n",
    "how_similar(\"Cup of Java\", \"mug\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "d60e8a2f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def description(item):\n",
    "    text = item.prompt.replace(\"How much does this cost to the nearest dollar?\\n\\n\", \"\")\n",
    "    return text.split(\"\\n\\nPrice is $\")[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "37665268",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Delphi FG0166 Fuel Pump Module\\nDelphi brings 80 years of OE Heritage into each Delphi pump, ensuring quality and fitment for each Delphi part. Part is validated, tested and matched to the right vehicle application Delphi brings 80 years of OE Heritage into each Delphi assembly, ensuring quality and fitment for each Delphi part Always be sure to check and clean fuel tank to avoid unnecessary returns Rigorous OE-testing ensures the pump can withstand extreme temperatures Brand Delphi, Fit Type Vehicle Specific Fit, Dimensions LxWxH 19.7 x 7.7 x 5.1 inches, Weight 2.2 Pounds, Auto Part Position Unknown, Operation Mode Mechanical, Manufacturer Delphi, Model FUEL PUMP, Dimensions 19.7'"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "description(train[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8592f7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "NUMBER_OF_DOCUMENTS = len(train)\n",
    "\n",
    "# Uncomment if you'd rather not wait for the full 400,000\n",
    "# NUMBER_OF_DOCUMENTS = 20000\n",
    "\n",
    "for i in tqdm(range(0, NUMBER_OF_DOCUMENTS, 1000)):\n",
    "    documents = [description(item) for item in train[i: i+1000]]\n",
    "    vectors = model.encode(documents).astype(float).tolist()\n",
    "    metadatas = [{\"category\": item.category, \"price\": item.price} for item in train[i: i+1000]]\n",
    "    ids = [f\"doc_{j}\" for j in range(i, i+len(documents))]\n",
    "    collection.add(\n",
    "        ids=ids,\n",
    "        documents=documents,\n",
    "        embeddings=vectors,\n",
    "        metadatas=metadatas\n",
    "    )"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.22"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/viz.ipynb
+++ b/viz.ipynb