spydy/setup.py at main · superjcd/spydy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from setuptools import setup, find_packages
from codecs import open
from os import path


__version__ = "0.1.24"

here = path.abspath(path.dirname(__file__))

long_description_for_pypi = """
# Spydy

spydy is a light-weight high-level web-crawling framework for fast-devlopment and high performance, which is inspired by unix pipeline.

---

[Code](https://github.com/superjcd/spydy)

[Document](https://superjcd.github.io/spydy/)

---

## Install

```
pip install spydy
```


## How to use

There are two ways of running spydy:

- one way is to prepare a configuration file, and run spydy from cmd:

```
spydy myconfig.cfg
```

`myconfig.cfg` may looks like below:

```
[Globals]
run_mode = async_forever
nworkers = 4

[PipeLine]
url = DummyUrls
request = AsyncHttpRequest
parser = DmozParser
log = MessageLog
store = CsvStore

[url]
url = https://dmoz-odp.org
repeat = 10

[store]
file_name = result.csv
```


- or run it from a python file(e.g. ` spider.py`):

```
from spydy.engine import Engine
from spydy.utils import check_configs
from spydy import urls, request, parsers, logs, store

myconfig = {
    "Globals":{
        "run_mode": "async_forever",
        "nworkers": "4"
    },
    "PipeLine":[urls.DummyUrls(url="https://dmoz-odp.org", repeat=10),
                request.AsyncHttpRequest(), parsers.DmozParser(), logs.MessageLog(), store.CsvStore(file_name=FILE_NAME)]
    }

check_configs(myconfig)
spider = Engine.from_dict(myconfig)
spider.run()
```

then run it :

```
$ python spider.py
```
"""

# Get the long description from the README file
with open(path.join(here, "README.md"), encoding="utf-8") as f:
    long_description = f.read()

# get the dependencies and installs
with open(path.join(here, "requirements.txt"), encoding="utf-8") as f:
    all_reqs = f.read().split("\n")

install_requires = [x.strip() for x in all_reqs if "git+" not in x]
dependency_links = [
    x.strip().replace("git+", "") for x in all_reqs if x.startswith("git+")
]

setup(
    name="spydy",
    version=__version__,
    description="light-weight high-level web-crawling framework",
    long_description=long_description_for_pypi,
    long_description_content_type="text/markdown",
    url="https://github.com/superjcd/spydy",
    download_url="https://github.com/superjcd/spydy/tarball/" + __version__,
    license="BSD",
    classifiers=[
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Developers",
        "Programming Language :: Python :: 3",
    ],
    keywords="",
    entry_points={
        "console_scripts": [
            "spydy=spydy.main:fire",
        ],
    },
    packages=find_packages(exclude=["docs", "tests*"]),
    include_package_data=True,
    author="Jiang Chaodi",
    install_requires=install_requires,
    dependency_links=dependency_links,
    author_email="929760274@qq.com",
)