Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Tests

on:
push:
branches: [dev, main]
pull_request:
branches: [dev, main]

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11", "3.12", "3.13", "3.14"]

steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v5

- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}

- name: Install dependencies
run: uv sync --all-groups --python ${{ matrix.python-version }}

- name: Run tests with coverage
run: uv run pytest -v --cov=suggests --cov-report=term-missing
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
*__pycache__
.claude
.coverage
.venv
archive
build
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ Reduce to new information obtained in suggestions. E.g. `abortion -> abortion la

```py
In [5]: edges = suggests.add_parent_nodes(edges)
In [6]: edges = edges.apply(suggests.add_metanodes, axis=1)
In [6]: edges = suggests.add_metanodes(edges)
In [7]: show_cols = ['source','target','grandparent','parent','source_add','target_add']
In [8]: edges[show_cols].head()
Out[9]:
Expand Down
17 changes: 12 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
[project]
name = "suggests"
version = "0.3.0"
version = "0.3.1a1"
description = "Algorithm auditing tools for search engine autocomplete"
license = "MIT"
readme = "README.md"
authors = [{ name = "Ronald E. Robertson", email = "rer@acm.org" }]
keywords = ["suggestions", "autocomplete", "google", "bing"]
requires-python = ">=3.10"
keywords = ["suggestions", "autocomplete", "google", "bing", "search engine", "search queries"]
requires-python = ">=3.11"
dependencies = [
"requests>=2.28",
"pandas>=2.0",
"numpy>=2.0",
"polars>=1.0",
"beautifulsoup4>=4.11",
]

Expand All @@ -20,6 +19,14 @@ homepage = "http://github.com/gitronald/suggests"
[project.scripts]
demo = 'scripts.demo:main'

[dependency-groups]
dev = [
"networkx>=3.0",
"pytest>=6.2",
"pytest-cov>=4.0",
"ruff>=0.15.4",
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
32 changes: 19 additions & 13 deletions scripts/demo.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,35 @@
import json
"""Demo script for suggests package."""

import datetime
import json

import polars as pl

import suggests
import pandas as pd

def main():

def main() -> None:
crawl_id = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
get_suggests_tree_args = {
'root': 'dog',
'source': 'bing',
'max_depth': 1,
'crawl_id': crawl_id,
'save_to': f'./data/tests/suggests-{crawl_id}.json',
'sesh_headers': {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0'
}
"root": "dog",
"source": "bing",
"max_depth": 1,
"crawl_id": crawl_id,
"save_to": f"./data/tests/suggests-{crawl_id}.json",
"sesh_headers": {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0"
},
}
print(json.dumps(get_suggests_tree_args, indent=2))
tree = suggests.get_suggests_tree(**get_suggests_tree_args)
tree_df = pd.DataFrame(tree)
tree_df = pl.DataFrame(tree)
print(f"\nSuggestion Tree: ({tree_df.shape[0]:,}, {tree_df.shape[1]})")
print(tree_df.head())

edges = suggests.to_edgelist(tree)
print(f"Suggestion Network Edges: ({edges.shape[0]:,}, {edges.shape[1]})")
print(edges.head())


if __name__ == "__main__":
main()
main()
27 changes: 19 additions & 8 deletions suggests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
__version__ = "0.3.0"
"""Algorithm auditing tools for search engine autocomplete."""

from .suggests import get_suggests
from .suggests import get_suggests_tree
__version__ = "0.3.1a1"

from .parsing import parse_bing
from .parsing import parse_google
from .parsing import to_edgelist
from .parsing import (
add_metanodes,
add_parent_nodes,
parse_bing,
parse_google,
to_edgelist,
)
from .suggests import get_suggests, get_suggests_tree

from .parsing import add_parent_nodes
from .parsing import add_metanodes
__all__ = [
"add_metanodes",
"add_parent_nodes",
"get_suggests",
"get_suggests_tree",
"parse_bing",
"parse_google",
"to_edgelist",
]
123 changes: 69 additions & 54 deletions suggests/logger.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,94 @@
""" Configure a logger using a dictionary
"""
"""Configure a logger using a dictionary."""

import logging
import logging.config

# Formatters: change what gets logged
minimal = '%(message)s'
detailed = '%(asctime)s | %(process)d | %(levelname)s | %(name)s | %(message)s '
formatters = {
'minimal': {'format': minimal},
'detailed': {'format': detailed}
}
minimal = "%(message)s"
detailed = "%(asctime)s | %(process)d | %(levelname)s | %(name)s | %(message)s "
formatters = {"minimal": {"format": minimal}, "detailed": {"format": detailed}}

class Logger(object):
""" Get logger and set console and file outputs

Ex:
```
from logger import Summary
log = Logger('summary.log').get_logger('mylogger')

```
class Logger:
"""Get logger and set console and file outputs.

Args:
file_name: Path for file logging output
file_format: Format type for file output ('minimal' or 'detailed')
file_mode: File open mode
console: Whether to enable console logging
console_format: Format type for console output ('minimal' or 'detailed')
console_level: Logging level for console output
"""
def __init__(self,
file_name='', file_format='detailed', file_mode='w',
console=True, console_format='detailed', console_level='DEBUG'):


def __init__(
self,
file_name: str = "",
file_format: str = "detailed",
file_mode: str = "w",
console: bool = True,
console_format: str = "detailed",
console_level: str = "DEBUG",
) -> None:
# Handlers: change file and console logging details
handlers = {}
handlers: dict[str, dict] = {}
if console:
assert console_format in formatters.keys(), \
f'Must select formatting type from {list(formatters.keys())}'
assert console_format in formatters, (
f"Must select formatting type from {list(formatters.keys())}"
)

handlers['console_handle'] = {
'class': 'logging.StreamHandler',
'level': 'DEBUG',
'formatter': console_format,
handlers["console_handle"] = {
"class": "logging.StreamHandler",
"level": "DEBUG",
"formatter": console_format,
}

if file_name:
assert type(file_name) is str, 'Must provide name for file logging'
assert file_format in formatters.keys(), \
f'Must select formatting type from {list(formatters.keys())}'
assert isinstance(file_name, str), "Must provide name for file logging"
assert file_format in formatters, (
f"Must select formatting type from {list(formatters.keys())}"
)

handlers['file_handle'] = {
'class': 'logging.FileHandler',
'level': 'INFO',
'formatter': file_format,
'filename': file_name,
'mode': file_mode
handlers["file_handle"] = {
"class": "logging.FileHandler",
"level": "INFO",
"formatter": file_format,
"filename": file_name,
"mode": file_mode,
}

# Loggers: change logging options for root and other packages
loggers = {
# Package logger (not root)
'suggests': {
'handlers': list(handlers.keys()),
'level': 'DEBUG',
'propagate': False
"suggests": {
"handlers": list(handlers.keys()),
"level": "DEBUG",
"propagate": False,
},
# External loggers
'requests': {'level': 'WARNING'},
'urllib3': {'level': 'WARNING'},
'matplotlib': {'level': 'WARNING'},
'chardet.charsetprober': {'level': 'INFO'},
'parso': {'level': 'INFO'} # Fix for ipython autocomplete bug
"requests": {"level": "WARNING"},
"urllib3": {"level": "WARNING"},
"matplotlib": {"level": "WARNING"},
"chardet.charsetprober": {"level": "INFO"},
"parso": {"level": "INFO"}, # Fix for ipython autocomplete bug
}

self.log_config = {
'version': 1,
'disable_existing_loggers': False,
'formatters': formatters,
'handlers': handlers,
'loggers': loggers
self.log_config = {
"version": 1,
"disable_existing_loggers": False,
"formatters": formatters,
"handlers": handlers,
"loggers": loggers,
}

def start(self, name="suggests"):

def start(self, name: str = "suggests") -> logging.Logger:
"""Initialize and return a named logger.

Args:
name: Logger name

Returns:
Configured logger instance
"""
logging.config.dictConfig(self.log_config)
return logging.getLogger(name)
Loading
Loading