Python_Repo/FinancialExtractTransformLoad.py at master · EveryTimeIWill18/Python_Repo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import csv
from itertools import chain
import pprint


"""
William Murphy
4/18/2018
"""

# using  sequential decorators to mimic a data pipeline structure
# this acts as a (extract->transform->load) process
# So far, this really only demonstrates the extract portion
# I'm avoiding pandas on purpose so as to demonstrate what can be done with pure python

def extract(f):
    def wrapper(path, *args, **kwargs):

        files = f(path, *args, **kwargs)

        dirname = os.path.dirname(path)
        if os.getcwd() is not dirname:
            try:
                os.chdir(dirname)
                print(os.getcwd())
            except FileExistsError as e:
                print(str(e))
            except FileNotFoundError as e:
                print(str(e))
            except IOError as e:
                print(str(e))
            except Exception as e:
                print(str(e))

        # --- walk through files to find files of interest
        dir_files = [l for _, _, l in os.walk(dirname) if len(l) > 0]
        # --- flatten dir_files to a single list, rather than nested lists
        flattened_files = list(chain(*dir_files))
        reduce_dir_files = [a for a in args if a in flattened_files]
        return reduce_dir_files
    return wrapper

def extract_to_csv(f):
    def wrapper(*args, **kwargs):

        files = f(*args, **kwargs)
        CSV = list(map(lambda x: str(x), files))
        CSV_FILES = list()
        header = 0
        for _csv in CSV:
            with open(str(_csv), 'r') as file:
                reader = csv.reader(file)
                for row in reader:
                   CSV_FILES.append(row)

        return CSV_FILES
    return wrapper


@extract_to_csv
@extract
def set_files(path, *args):
    p = path
    fl = [str(a) for a in args]
    return (p, fl)

pprint.pprint(set_files("E:\\", "^OVX.csv", "^VIX.csv", "^SP500TR.csv"))