LeadGen/backend/temp.py
2025-06-06 21:22:18 +05:30

421 lines
17 KiB
Python

# import os
# import pandas as pd
# from typing import List, Dict, Any
# import argparse
# from crewai import Agent, Task, Crew, LLM
# from tools import (
# GetUserLocationTool,
# SearchNearPointTool,
# PlaceSnapTool,
# GetPlaceDetailsTool,
# SearchNearRegionTool
# )
# # Initialize the LLM
# llm = LLM(
# model="gemini/gemini-2.0-flash",
# )
# # Create the agents
# location_finder = Agent(
# role="Location Specialist",
# goal="Determine the user's location or understand the location mentioned in the query",
# backstory="You are an expert in location services and geodata. You can accurately determine where users are located or understand locations they're interested in.",
# tools=[GetUserLocationTool()],
# llm=llm,
# verbose=True
# )
# place_researcher = Agent(
# role="Place Information Researcher",
# goal="Research and provide detailed information about surrounding places",
# backstory="You are an expert at finding and analyzing information about places. You can discover what's around a location and gather complete details about each place. Do not use the word 'place' in query",
# tools=[SearchNearRegionTool(), SearchNearPointTool(), PlaceSnapTool(), GetPlaceDetailsTool()],
# llm=llm,
# verbose=True
# )
# data_processor = Agent(
# role="Data Processing Specialist",
# goal="Process place data and organize it for Excel export",
# backstory="You are specialized in processing location data and organizing it into structured formats suitable for export. You ensure all required fields are captured and formatted properly.",
# llm=llm,
# verbose=True
# )
# def extract_place_details(place_data: Dict[Any, Any]) -> Dict[str, str]:
# """Extract relevant details from place data for Excel export"""
# # Initialize with default values
# details = {
# "name": "",
# "phone": "",
# "email": "",
# "address": "",
# "distance": "",
# "rating": "",
# "category": "",
# "website": ""
# }
# try:
# # Extract basic info
# if "name" in place_data:
# details["name"] = place_data["name"]
# # Extract contact info
# if "tel" in place_data:
# details["phone"] = place_data["tel"]
# # Extract location/address
# if "location" in place_data:
# loc = place_data["location"]
# address_parts = []
# if "address" in loc:
# address_parts.append(loc["address"])
# if "locality" in loc:
# address_parts.append(loc["locality"])
# if "region" in loc:
# address_parts.append(loc["region"])
# if "postcode" in loc:
# address_parts.append(loc["postcode"])
# if "country" in loc:
# address_parts.append(loc["country"])
# details["address"] = ", ".join(filter(None, address_parts))
# # Extract distance info (if available)
# if "distance" in place_data:
# details["distance"] = f"{place_data['distance']} meters"
# # Extract rating (if available)
# if "rating" in place_data:
# details["rating"] = f"{place_data['rating']}/10"
# # Extract category
# if "categories" in place_data and len(place_data["categories"]) > 0:
# details["category"] = place_data["categories"][0].get("name", "")
# # Extract website
# if "website" in place_data:
# details["website"] = place_data["website"]
# # Email may not be directly available in the Foursquare API
# # It might be included in the description or other fields depending on the data
# except Exception as e:
# print(f"Error extracting place details: {e}")
# return details
# def search_surrounding_places(radius: int = 1000, limit: int = 20, query: str = "") -> List[Dict[str, str]]:
# """Search for places surrounding the user's location and collect their details"""
# # Task to get user's location
# get_location_task = Task(
# description="Determine the user's current location by getting their coordinates.",
# agent=location_finder,
# expected_output="Latitude and longitude coordinates of the user's current location."
# )
# # Task to search for surrounding places
# search_places_task = Task(
# description=f"Using the coordinates, search for {query if query else 'places'} within {radius} meters of the user's location.",
# agent=place_researcher,
# expected_output="JSON data of surrounding places including fsq_id, name, category, and other available information.",
# context=[get_location_task]
# )
# # Task to get detailed information about each place
# get_details_task = Task(
# description="For each place found, gather detailed information including name, phone, email, address, and distance.",
# agent=place_researcher,
# expected_output="Complete JSON data with detailed information about each place.",
# context=[search_places_task]
# )
# # Task to process the data for Excel export
# process_data_task = Task(
# description="Process the gathered data and prepare it for Excel export.",
# agent=data_processor,
# expected_output="A list of dictionaries containing cleaned and formatted place details ready for Excel export.",
# context=[get_details_task]
# )
# # Create and execute the crew
# crew = Crew(
# agents=[location_finder, place_researcher, data_processor],
# tasks=[get_location_task, search_places_task, get_details_task, process_data_task],
# )
# result = crew.kickoff()
# place_data = []
# try:
# # Extract the location coordinates
# coordinates = get_location_task.output
# if coordinates and "," in coordinates:
# lat, lng = coordinates.split(",")
# # Use SearchNearPointTool directly to get places
# search_tool = SearchNearPointTool()
# places_result = search_tool._run(
# query=query if query else "business OR restaurant OR shop OR cafe OR store",
# ll=coordinates,
# radius=radius,
# limit=limit
# )
# if places_result and "results" in places_result:
# for place in places_result["results"]:
# # Get details for each place
# if "fsq_id" in place:
# details_tool = GetPlaceDetailsTool()
# place_details = details_tool._run(id=place["fsq_id"])
# if place_details:
# # Combine the search result and details
# combined_data = {**place, **place_details}
# # Extract required fields
# extracted_details = extract_place_details(combined_data)
# place_data.append(extracted_details)
# except Exception as e:
# print(f"Error processing place data: {e}")
# return place_data
# def export_to_excel(place_data: List[Dict[str, str]], filename: str = "surrounding_places.xlsx"):
# """Export the place data to an Excel file"""
# if not place_data:
# print("No place data to export")
# return False
# try:
# df = pd.DataFrame(place_data)
# df.to_excel(filename, index=False)
# print(f"Successfully exported {len(place_data)} places to {filename}")
# return True
# except Exception as e:
# print(f"Error exporting to Excel: {e}")
# return False
# def main():
# parser = argparse.ArgumentParser(description="Search for surrounding places and export to Excel")
# parser.add_argument("--radius", type=int, default=1000, help="Search radius in meters (default: 1000)")
# parser.add_argument("--limit", type=int, default=20, help="Maximum number of places to search (default: 20)")
# parser.add_argument("--query", type=str, default="", help="Optional search query (e.g., 'restaurant', 'cafe')")
# parser.add_argument("--output", type=str, default="surrounding_places.xlsx", help="Output Excel file name")
# args = parser.parse_args()
# print(f"Searching for {args.query if args.query else 'places'} within {args.radius}m...")
# place_data = search_surrounding_places(radius=args.radius, limit=args.limit, query=args.query)
# if place_data:
# export_to_excel(place_data, args.output)
# print(f"Found {len(place_data)} places. Data exported to {args.output}")
# else:
# print("No places found or error occurred during search.")
# if __name__ == "__main__":
# main()
import pandas as pd
import json
import re
from typing import List, Dict, Any
import argparse
from crewai import Agent, Task, Crew, LLM
from tools import (
GetUserLocationTool,
SearchNearPointTool,
PlaceSnapTool,
GetPlaceDetailsTool,
SearchNearRegionTool
)
# Initialize the LLM
llm = LLM(
model="gemini/gemini-2.0-flash",
)
task_picker = Agent(
role="Expert Decision Maker",
goal="Decide what information the user wants based on the query and choose the appropriate task to be performed",
backstory="As an expert decision maker, you are able to take accurate decisions on what task the user is asking you to perform (whether the user has asked to get information on a location based on their location, or if they have given the name of the location and want information based on that place)",
llm=llm,
verbose=True
)
# Create the agents
location_finder = Agent(
role="Location Specialist",
goal="Determine the user's location or understand the location mentioned in the query",
backstory="You are an expert in location services and geodata. You can accurately determine where users are located or understand locations they're interested in.",
tools=[GetUserLocationTool()],
llm=llm,
verbose=True
)
place_researcher = Agent(
role="Place Information Researcher",
goal="Research and provide detailed information about surrounding places",
backstory="You are an expert at finding and analyzing information about places. You can discover what's around a location and gather complete details about each place.",
tools=[SearchNearRegionTool(), SearchNearPointTool(), PlaceSnapTool(), GetPlaceDetailsTool()],
llm=llm,
verbose=True
)
data_processor = Agent(
role="Data Processing Specialist",
goal="Process place data and organize it for Excel export",
backstory="You are specialized in processing location data and organizing it into structured formats suitable for export. You ensure all required fields are captured and formatted properly. You will return a properly formatted JSON array of places with all the required details.",
llm=llm,
verbose=True
)
class JsonExtractor:
"""Helper class to extract JSON from agent output text"""
@staticmethod
def extract_json(text):
"""Extract JSON data from text that might contain markdown code blocks"""
# Try to find JSON in code blocks
json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
if json_match:
json_str = json_match.group(1)
else:
# If no code blocks, try to find anything that looks like a JSON array or object
json_match = re.search(r'(\[[\s\S]*\]|\{[\s\S]*\})', text)
if json_match:
json_str = json_match.group(1)
else:
return None
try:
return json.loads(json_str)
except json.JSONDecodeError:
try:
# Sometimes the JSON might have trailing commas which are invalid
# Try to clean it up by removing trailing commas
cleaned_json = re.sub(r',\s*([}\]])', r'\1', json_str)
return json.loads(cleaned_json)
except:
return None
def search_surrounding_places(prompt) -> List[Dict[str, str]]:
"""Search for places surrounding the user's location and collect their details"""
task_analysis = Task(
description="Analyze the user query: {user_query} and determine what type of location information they are seeking. And also check if they have defined any radius or limits or queries",
agent=task_picker,
expected_output="A clear determination of what the user is asking for: their current location info, recommendations near them, or info about a specific named location."
)
task_locate = Task(
description="Based on the user query: {user_query}, determine the location to focus on. Either get the user's current location or identify the location mentioned in the query.",
agent=location_finder,
expected_output="Coordinates (latitude,longitude) or a location name that will be used for subsequent tasks."
)
# Task to get user's location
# get_location_task = Task(
# description="Determine the user's current location by getting their coordinates.",
# agent=location_finder,
# expected_output="Latitude and longitude coordinates of the user's current location.",
# context=[task_analysis]
# )
# Task to search for surrounding places
search_places_task = Task(
description="Using the location from the previous task, gather relevant information about places based on the user query: {user_query}",
agent=place_researcher,
expected_output="JSON data of surrounding places including fsq_id, name, category, and other available information.",
context=[task_locate]
)
# Task to get detailed information about each place
get_details_task = Task(
description="For each place found, gather detailed information including name, phone, email, address, and distance.",
agent=place_researcher,
expected_output="Complete JSON data with detailed information about each place.",
context=[search_places_task]
)
# Task to process the data for Excel export
process_data_task = Task(
description="""Process the gathered data and prepare it for Excel export.
You MUST return a valid JSON array containing objects with these fields:
- name: Name of the business/place
- fsq_id: Foursquare ID
- distance: Distance from user's location in meters (numeric value only)
- address: Complete address
- phone: Phone number if available
- email: Email if available (can be empty)
- website: Website URL if available
Format your response as a valid JSON array inside a code block.
""",
agent=data_processor,
expected_output="A JSON array containing normalized place details ready for Excel export.",
context=[get_details_task]
)
# Create and execute the crew
crew = Crew(
agents=[location_finder, place_researcher, data_processor],
tasks=[task_analysis, task_locate, search_places_task, get_details_task, process_data_task],
# verbose=2
)
result = crew.kickoff(inputs={"user_query": prompt})
# Extract the JSON data from the result
place_data = JsonExtractor.extract_json(result.raw)
# If we got valid data from the agent's output, return it
if place_data and isinstance(place_data, list) and len(place_data) > 0:
# Ensure the data is standardized
standardized_data = []
for place in place_data:
standardized_place = {
"name": place.get("name", ""),
"fsq_id": place.get("fsq_id", ""),
"distance": place.get("distance", ""),
"address": place.get("address", ""),
"phone": place.get("phone", ""),
"email": place.get("email", ""),
"website": place.get("website", "")
}
standardized_data.append(standardized_place)
return standardized_data
def export_to_excel(place_data: List[Dict[str, str]], filename: str = "surrounding_places.xlsx"):
"""Export the place data to an Excel file"""
if not place_data:
print("No place data to export")
return False
try:
df = pd.DataFrame(place_data)
# Reorder columns for better readability
column_order = ["name", "address", "distance", "phone", "email", "website", "fsq_id"]
available_columns = [col for col in column_order if col in df.columns]
# Add any columns that might be in the data but not in our order list
available_columns.extend([col for col in df.columns if col not in column_order])
df = df[available_columns]
df.to_excel(filename, index=False)
print(f"Successfully exported {len(place_data)} places to {filename}")
return True
except Exception as e:
print(f"Error exporting to Excel: {e}")
return False
def main(prompt):
place_data = search_surrounding_places(prompt=prompt)
if place_data:
export_to_excel(place_data)
print(f"Found {len(place_data)} places. Data exported to surrounding_places.xlsx")
else:
print("No places found or error occurred during search.")