diff --git a/.gitignore b/.gitignore index 7be4849..bef7aec 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,9 @@ +venv __pycache__/ .DS_Store edgefl/file_write/ +edgefl/tmp_dir/ +edgefl/env_files/mnist +EdgeLake .idea/ edgefl/logs/ diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 13566b8..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml deleted file mode 100644 index 381c8f6..0000000 --- a/.idea/inspectionProfiles/Project_Default.xml +++ /dev/null @@ -1,38 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 105ce2d..0000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index a5250d8..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1dd..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/EdgeLake/docker_makefile/docker-compose-template.yaml b/EdgeLake/docker_makefile/docker-compose-template.yaml index ab0e9f5..9c79482 100644 --- a/EdgeLake/docker_makefile/docker-compose-template.yaml +++ b/EdgeLake/docker_makefile/docker-compose-template.yaml @@ -8,10 +8,10 @@ services: container_name: ${NODE_NAME} stdin_open: true tty: true - network_mode: host -# ports: -# - ${EDGELAKE_SERVER_PORT}:${EDGELAKE_SERVER_PORT} -# - ${EDGELAKE_REST_PORT}:${EDGELAKE_REST_PORT} + network_mode: bridge + ports: + - ${EDGELAKE_SERVER_PORT}:${EDGELAKE_SERVER_PORT} + - ${EDGELAKE_REST_PORT}:${EDGELAKE_REST_PORT} volumes: - ${NODE_NAME}-anylog:/app/EdgeLake/anylog - ${NODE_NAME}-blockchain:/app/EdgeLake/blockchain @@ -21,4 +21,4 @@ volumes: ${NODE_NAME}-anylog: ${NODE_NAME}-blockchain: ${NODE_NAME}-data: - ${NODE_NAME}-local-scripts: + ${NODE_NAME}-local-scripts: \ No newline at end of file diff --git a/EdgeLake/docker_makefile/edgelake_operator1.env b/EdgeLake/docker_makefile/edgelake_operator1.env index 6749bcc..e12e6ad 100644 --- a/EdgeLake/docker_makefile/edgelake_operator1.env +++ b/EdgeLake/docker_makefile/edgelake_operator1.env @@ -58,7 +58,7 @@ MEMORY=false # TCP connection information for Master Node ## Use your Master node's docker IP ## ## Command docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' edgelake-master ## -LEDGER_CONN=172.18.0.2:32048 +LEDGER_CONN=172.17.0.2:32048 # How often to sync from blockchain BLOCKCHAIN_SYNC=10 seconds @@ -117,4 +117,4 @@ SYSLOG_MONITORING=false #--- Advanced Settings --- # Whether to automatically run a local (or personalized) script at the end of the process -DEPLOY_LOCAL_SCRIPT=false +DEPLOY_LOCAL_SCRIPT=false \ No newline at end of file diff --git a/EdgeLake/docker_makefile/edgelake_operator2.env b/EdgeLake/docker_makefile/edgelake_operator2.env index 4e55b11..3079e9c 100644 --- a/EdgeLake/docker_makefile/edgelake_operator2.env +++ b/EdgeLake/docker_makefile/edgelake_operator2.env @@ -58,7 +58,7 @@ MEMORY=false # TCP connection information for Master Node ## Use your Master node's docker IP ## ## Command docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' edgelake-master ## -LEDGER_CONN=172.18.0.2:32048 +LEDGER_CONN=172.17.0.2:32048 # How often to sync from blockchain BLOCKCHAIN_SYNC=10 seconds @@ -117,4 +117,4 @@ SYSLOG_MONITORING=false #--- Advanced Settings --- # Whether to automatically run a local (or personalized) script at the end of the process -DEPLOY_LOCAL_SCRIPT=false +DEPLOY_LOCAL_SCRIPT=false \ No newline at end of file diff --git a/EdgeLake/docker_makefile/edgelake_operator3.env b/EdgeLake/docker_makefile/edgelake_operator3.env index d8a899d..858bad4 100644 --- a/EdgeLake/docker_makefile/edgelake_operator3.env +++ b/EdgeLake/docker_makefile/edgelake_operator3.env @@ -58,7 +58,7 @@ MEMORY=false # TCP connection information for Master Node ## Use your Master node's docker IP ## ## Command docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' edgelake-master ## -LEDGER_CONN=172.18.0.2:32048 +LEDGER_CONN=172.17.0.2:32048 # How often to sync from blockchain BLOCKCHAIN_SYNC=10 seconds @@ -117,4 +117,4 @@ SYSLOG_MONITORING=false #--- Advanced Settings --- # Whether to automatically run a local (or personalized) script at the end of the process -DEPLOY_LOCAL_SCRIPT=false +DEPLOY_LOCAL_SCRIPT=false \ No newline at end of file diff --git a/EdgeLake/docker_makefile/edgelake_operator4.env b/EdgeLake/docker_makefile/edgelake_operator4.env index 8910873..c2ac2c9 100644 --- a/EdgeLake/docker_makefile/edgelake_operator4.env +++ b/EdgeLake/docker_makefile/edgelake_operator4.env @@ -28,7 +28,7 @@ DB_USER="demo" # Password correlated to database user DB_PASSWD="passwd" # Database IP address -DB_IP=192.168.1.148 +DB_IP=192.168.56.1 # Database port number DB_PORT=5432 # Whether to set autocommit data @@ -58,7 +58,7 @@ MEMORY=false # TCP connection information for Master Node ## Use your Master node's docker IP ## ## Command docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' edgelake-master ## -LEDGER_CONN=74.207.235.89:32048 +LEDGER_CONN=172.19.0.2:32048 # How often to sync from blockchain BLOCKCHAIN_SYNC=10 seconds diff --git a/EdgeLake/docker_makefile/master.env b/EdgeLake/docker_makefile/master.env index 10609f1..5c8a23c 100644 --- a/EdgeLake/docker_makefile/master.env +++ b/EdgeLake/docker_makefile/master.env @@ -1,5 +1,5 @@ EDGELAKE_TYPE=master -TAG=1.3.2412.8-roy-arm64 +TAG=1.3.2501-arm64 EDGELAKE_SERVER_PORT=32048 EDGELAKE_REST_PORT=32049 NODE_NAME=master diff --git a/EdgeLake/docker_makefile/operator1.env b/EdgeLake/docker_makefile/operator1.env index 07465e5..35bf2e4 100644 --- a/EdgeLake/docker_makefile/operator1.env +++ b/EdgeLake/docker_makefile/operator1.env @@ -1,5 +1,5 @@ EDGELAKE_TYPE=operator -TAG=1.3.2412.8-roy-arm64 +TAG=1.3.2501-arm64 EDGELAKE_SERVER_PORT=32148 EDGELAKE_REST_PORT=32149 NODE_NAME=operator1 diff --git a/EdgeLake/docker_makefile/operator2.env b/EdgeLake/docker_makefile/operator2.env index fc8563b..36b04b6 100644 --- a/EdgeLake/docker_makefile/operator2.env +++ b/EdgeLake/docker_makefile/operator2.env @@ -1,5 +1,5 @@ EDGELAKE_TYPE=operator -TAG=1.3.2412.8-roy-arm64 +TAG=1.3.2501-arm64 EDGELAKE_SERVER_PORT=32248 EDGELAKE_REST_PORT=32249 NODE_NAME=operator2 diff --git a/EdgeLake/docker_makefile/operator3.env b/EdgeLake/docker_makefile/operator3.env index 9d9a9b5..2c172ce 100644 --- a/EdgeLake/docker_makefile/operator3.env +++ b/EdgeLake/docker_makefile/operator3.env @@ -1,5 +1,5 @@ EDGELAKE_TYPE=operator -TAG=1.3.2412.8-roy-arm64 +TAG=1.3.2501-arm64 EDGELAKE_SERVER_PORT=32348 EDGELAKE_REST_PORT=32349 NODE_NAME=operator3 diff --git a/README.md b/README.md index 78b1ec1..b9919e5 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ consisting of three training nodes and one aggregator node. Each node will utili own EdgeLake node, such that we will deploy four EdgeLake nodes, three of which have operator roles and one with the master role. The master role is a normal EdgeLake operator node but also emulates the same blockchain-like functionality of the blockchain-back shared -metadata layer. For more infomation about EdgeLake and how it operates, check the [EdgeLake website](https://edgelake.github.io/). +metadata layer. For more information about EdgeLake and how it operates, check the [EdgeLake website](https://edgelake.github.io/). The simulation includes the MNIST dataset, where three nodes collaboratively train a global model with MNIST data local to each node (i.e., there is no data movement.). Since the simulation instructions @@ -16,7 +16,7 @@ so emulate physically distributed data. Nevertheless, each node will utilize its operator node (running in a Docker container) to truly simulate a distributed environment. In addition, there is another example custom data handle from our Winniio partners. This dataset -is comprised of room temperature data used to predict the temperature of a classroom in two hours. +consists of room temperature data used to predict the temperature of a classroom in two hours. Before you get started, please follow the configuration steps precisely. @@ -28,45 +28,45 @@ Assumptions: Install all necessary Python packages. Tested on Python3.12. ```bash cd Anylog-Edgelake-Federated-Learning-Platform -pip install -r edgefl/requirements.txt +pip install -r requirements.txt ``` ## Deploy Postgres container Postgres will become available on your inet IP address. You can determine this through the `ifconfig` command. ```bash * Start Docker * -cd edgefl/EdgeLake/postgres +cd EdgeLake/postgres docker compose up -d ``` ## Deploy EdgeLake Master node ```bash -cd edgefl/EdgeLake -make up EDGELAKE_TYPE=master TAG=1.3.2412.8-roy-arm64 EDGELAKE_SERVER_PORT=32048 EDGELAKE_REST_PORT=32049 NODE_NAME=master +cd EdgeLake/ +make up EDGELAKE_TYPE=master TAG=1.3.2501 EDGELAKE_SERVER_PORT=32048 EDGELAKE_REST_PORT=32049 NODE_NAME=master ``` Now we need to determine the Master node's Docker IP address. Issue the following command ```bash -cd edgefl/EdgeLake +cd EdgeLake/ docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' master ``` With this IP, we can now deploy our three EdgeLake operator nodes. For example, let's assume it's `192.1.1.1`. ## Deploy EdgeLake Operator node -Update line 61 (LEDGER_CONN) value in the file `edgefl/EdgeLake/docker_makefile/edgelake_operator1.env` +Update line 61 (LEDGER_CONN) value in the file `EdgeLake/docker_makefile/edgelake_operator1.env` to be `LEDGER_CONN=192.1.1.1:32048` (note that you do not need to change the port). -In addition, update the `DB_IP` in line 31 with the `192.1.1.1` IP. +In addition, update the `DB_IP` in line 31 with the Docker network IP of the Postgres container. Do the same for the following files: -- `edgefl/EdgeLake/docker_makefile/edgelake_operator2.env` -- `edgefl/EdgeLake/docker_makefile/edgelake_operator3.env` +- `EdgeLake/docker_makefile/edgelake_operator2.env` +- `EdgeLake/docker_makefile/edgelake_operator3.env` Now we can start the operator nodes. ```bash -cd edgefl/EdgeLake -make up EDGELAKE_TYPE=operator TAG=1.3.2412.8-roy-arm64 EDGELAKE_SERVER_PORT=32148 EDGELAKE_REST_PORT=32149 NODE_NAME=operator1 -make up EDGELAKE_TYPE=operator TAG=1.3.2412.8-roy-arm64 EDGELAKE_SERVER_PORT=32248 EDGELAKE_REST_PORT=32249 NODE_NAME=operator2 -make up EDGELAKE_TYPE=operator TAG=1.3.2412.8-roy-arm64 EDGELAKE_SERVER_PORT=32348 EDGELAKE_REST_PORT=32349 NODE_NAME=operator3 +cd EdgeLake/ +make up EDGELAKE_TYPE=operator TAG=1.3.2501 EDGELAKE_SERVER_PORT=32148 EDGELAKE_REST_PORT=32149 NODE_NAME=operator1 +make up EDGELAKE_TYPE=operator TAG=1.3.2501 EDGELAKE_SERVER_PORT=32248 EDGELAKE_REST_PORT=32249 NODE_NAME=operator2 +make up EDGELAKE_TYPE=operator TAG=1.3.2501 EDGELAKE_SERVER_PORT=32348 EDGELAKE_REST_PORT=32349 NODE_NAME=operator3 ``` ## Validating your EdgeLake network is properly setup @@ -97,7 +97,7 @@ please contact the EdgeLake maintainers through [EdgeLake's Slack Channel](https (the join link is at the bottom of the page). ## Setting up training node and aggregator configurations -We now need to update the env files in the `edgefl/env_files/` directory. To do this, we need to update +We now need to update the env files in the `edgefl/env_files/mnist/` directory. To do this, we need to update the listed variables in the following files `mnist1.env`, `mnist2.env`, `mnist3.env` with the inet IP (e.g., `192.1.1.1`): - `EXTERNAL_IP` - `EXTERNAL_TCP_IP_PORT` @@ -117,7 +117,7 @@ Note that this only needs to be done once. This will create 3 tables `node_node1 in the `mnist_fl` database. ```bash cd edgefl -dotenv -f env_files/mnist1.env run -- python -m data.mnist.mnist_db_script +dotenv -f env_files/mnist/mnist1.env run -- python -m data.mnist.mnist_db_script ``` If you want to train on more (or less) data edit lines `153` and `154` in `edgefl/data/mnist/mnist_db_script.py`. @@ -130,14 +130,14 @@ Now that data is loaded into the database continue to the next step. Note to execute the below commands in a new terminal. ```bash cd edgefl -dotenv -f env_files/mnist-agg.env run -- python -m platform_components.aggregator.aggregator_server -dotenv -f env_files/mnist1.env run -- python -m platform_components.node.node_server --p 8081 -dotenv -f env_files/mnist2.env run -- python -m platform_components.node.node_server --p 8082 -dotenv -f env_files/mnist3.env run -- python -m platform_components.node.node_server --p 8083 +dotenv -f env_files/mnist/mnist-agg.env run -- uvicorn platform_components.aggregator.aggregator_server:app --host 0.0.0.0 --port 8080 +dotenv -f env_files/mnist/mnist1.env run -- uvicorn platform_components.node.node_server:app --host 0.0.0.0 --port 8081 +dotenv -f env_files/mnist/mnist2.env run -- uvicorn platform_components.node.node_server:app --host 0.0.0.0 --port 8082 +dotenv -f env_files/mnist/mnist3.env run -- uvicorn platform_components.node.node_server:app --host 0.0.0.0 --port 8083 ``` Once all the nodes are running. We can start the training process. Note that you can view the -predefined training application file here: `edgefl/platform_components/data_handlers/custom_data_handler.py`. +predefined training application file here: `custom_data_handler.py`. ## Initialize model parameters and training application, start training, executing inference Execute the following `curl` command to initialize training. As a result of this command, @@ -154,7 +154,11 @@ curl -X POST http://localhost:8080/init \ "http://localhost:8081", "http://localhost:8082", "http://localhost:8083" - ] + ], + "index": "test-index", + "module": "MnistDataHandler", + "module_file": "custom_data_handler.py", + "db_name": "mnist_fl" }' ``` After, start the training process: @@ -163,28 +167,116 @@ curl -X POST http://localhost:8080/start-training \ -H "Content-Type: application/json" \ -d '{ "totalRounds": 10, - "minParams": 3 + "minParams": 3, + "index": "test-index" }' ``` `totalRounds` defines how many continuous rounds to train for. `minParams` defines how many parameters -the aggregator should wait for before starting the next round. +the aggregator should wait for before starting the next round. + +At any point during the training process, you can add additional nodes to the process by calling initialization again on the new nodes +(must use the same `index`) and `minParams` will be dynamically adjusted as necessary. +```bash +curl -X POST http://localhost:8080/init \ +-H "Content-Type: application/json" \ +-d '{ + "nodeUrls": [ + "http://localhost:8084" + ], + "index": "test-index", + "module": "MnistDataHandler", + "module_file": "custom_data_handler.py", + "db_name": "mnist_fl" +}' +``` + +You can also update `minParams` as well during the training process (or anytime after node initialization). The specified `index` +must exist in order to update `minParams`. +```bash +curl -X POST http://localhost:8080/update-minParams \ +-H "Content-Type: application/json" \ +-d '{ + "updatedMinParams": 3, + "index": "test-index" +}' +``` + +Once the training process is complete, you may choose to do additional rounds of training on the same model. +```bash +curl -X POST http://localhost:8080/continue-training \ + -H "Content-Type: application/json" \ + -d '{ + "additionalRounds": 3, + "minParams": 4, + "index": "test-index" + }' + ``` At any point, you can execute edge inference directly on the node. This can be done on each training node. The output will be the accuracy based on the local test data held out from training. ```bash -curl -X POST http://localhost:8081/inference -curl -X POST http://localhost:8082/inference -curl -X POST http://localhost:8083/inference +curl -X POST http://localhost:8081/inference/test-index +curl -X POST http://localhost:8082/inference/test-index +curl -X POST http://localhost:8083/inference/test-index +curl -X POST http://localhost:8084/inference/test-index ``` An example output looks like this: ```bash -curl -X POST http://localhost:8081/inference ; curl -X POST http://localhost:8082/inference ; curl -X POST http://localhost:8083/inference -{"message":"Inference completed successfully","model_accuracy":"92.0","status":"success"} -{"message":"Inference completed successfully","model_accuracy":"88.0","status":"success"} -{"message":"Inference completed successfully","model_accuracy":"86.0","status":"success"} +curl -X POST http://localhost:8081/inference/test-index ; curl -X POST http://localhost:8082/inference/test-index ; curl -X POST http://localhost:8083/inference/test-index ; curl -X POST http://localhost:8084/inference/test-index +{"index":"test-index","message":"Inference completed successfully","model_accuracy":"92.0","status":"success"} +{"index":"test-index","message":"Inference completed successfully","model_accuracy":"88.0","status":"success"} +{"index":"test-index","message":"Inference completed successfully","model_accuracy":"86.0","status":"success"} +{"index":"test-index","message":"Inference completed successfully","model_accuracy":"84.0","status":"success"} +``` + +You can also do a direct inference on the aggregator which requires inputting test data and its test +labels (i.e. expected predictions). The label must correspond to the given input and will be used to +compare against the actual predictions of the model's testing output. The data type of the test data +can be anything as long as fits the data type of the dataset. Below is an example for MNIST: +```bash +curl -X POST http://localhost:8080/direct-inference/test-index \ +-H "Content-Type: application/json" \ +-d '{ + "input": [ + [0,244,281,...(780 more numbers),0] + ], + "labels": [ + 3 + ] +}' +``` + +Here is one for the WINNIIO dataset: +```bash +curl -X POST http://localhost:8080/direct-inference/test-index \ +-H "Content-Type: application/json" \ +-d '{ + "input": [ + { + "actuatorState": "9770", + "co2Value": "418", + "eventCount": "0", + "humidity": "33.8", + "switchStatus": "0.021216271052304", + "temperature": "22.02" + }, + { + "actuatorState": "0", + "co2Value": "425.8333333333333", + "eventCount": "0", + "humidity": "48.22", + "switchStatus": "0.0137001034912", + "temperature": "21.67" + } + ], + "labels": [ + "22.02", + "21.59" + ] +}' ``` ## Resolving common issues @@ -228,26 +320,154 @@ If not, then your IP may be wrong. Note, to detach from EdgeLake, press ctrl+p+q simultaneously. +### Chest-Xray Bounding Box Model/Data Handler + +- Ensure that the kaggle package is installed via requirements.txt +- Go to [Kaggle](kaggle.com) and create/sign-in to your Kaggle account +- Go to your account settings +- Scroll down to PAI and "create new token" and download the JSON +- Add the json to /home/{user}/.config/kaggle/kaggle.json + ## Redoing simulation / Clean up To redo the simulation, you need to delete the `edgefl/file_write` directory. In addition, you need to kill and restart the EdgeLake operators and master node. To do so, follow the following instructions: ```bash -cd edgefl/EdgeLake -make clean EDGELAKE_TYPE=master TAG=1.3.2412.8-roy-arm64 EDGELAKE_SERVER_PORT=32048 EDGELAKE_REST_PORT=32049 NODE_NAME=master -make clean EDGELAKE_TYPE=operator TAG=1.3.2412.8-roy-arm64 EDGELAKE_SERVER_PORT=32148 EDGELAKE_REST_PORT=32149 NODE_NAME=operator1 -make clean EDGELAKE_TYPE=operator TAG=1.3.2412.8-roy-arm64 EDGELAKE_SERVER_PORT=32248 EDGELAKE_REST_PORT=32249 NODE_NAME=operator2 -make clean EDGELAKE_TYPE=operator TAG=1.3.2412.8-roy-arm64 EDGELAKE_SERVER_PORT=32348 EDGELAKE_REST_PORT=32349 NODE_NAME=operator3 +cd EdgeLake/ +make clean EDGELAKE_TYPE=master TAG=1.3.2501 EDGELAKE_SERVER_PORT=32048 EDGELAKE_REST_PORT=32049 NODE_NAME=master +make clean EDGELAKE_TYPE=operator TAG=1.3.2501 EDGELAKE_SERVER_PORT=32148 EDGELAKE_REST_PORT=32149 NODE_NAME=operator1 +make clean EDGELAKE_TYPE=operator TAG=1.3.2501 EDGELAKE_SERVER_PORT=32248 EDGELAKE_REST_PORT=32249 NODE_NAME=operator2 +make clean EDGELAKE_TYPE=operator TAG=1.3.2501 EDGELAKE_SERVER_PORT=32348 EDGELAKE_REST_PORT=32349 NODE_NAME=operator3 ``` Note that you do not need to restart Postgres. After this step, if you want to restart the simulation follow the Deploy EdgeLake Operator/Master Node from above. To stop Postgres: ```bash -cd edgefl/EdgeLake/postgres +cd EdgeLake/postgres docker compose down ``` +## Docker Containerization of APIs + +The APIs are containerized using Docker. Before starting the APIs, ensure that +```bash +edgefl/env_files/mnist-docker/mnist1.env +edgefl/env_files/mnist-docker/mnist2.env +edgefl/env_files/mnist-docker/mnist3.env +``` +are configured like this: +```bash +GITHUB_DIR=/app/edgefl + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=MnistDataHandler + +PORT= #(aggregator port num + operator num = operator port num) +SERVER_TYPE=node +TMP_DIR=tmp_dir/node/ +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP=":32149" +EXTERNAL_TCP_IP_PORT=":32148" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST= +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" +``` +The aggregator env file, +```bash +edgefl/env_files/mnist-docker/mnist-agg.env +``` + +Should be configured like this: +```bash +GITHUB_DIR=/app/edgefl/ + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=MnistDataHandler + +PORT=8080 +SERVER_TYPE=aggregator + +TMP_DIR=tmp_dir/agg/ +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP=":32049" +EXTERNAL_TCP_IP_PORT=":32048" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST= +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" +AGG_NAME=agg + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME=master +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" +``` +To build the image, run the following command from the root directory of the project: + +```bash +docker build -t edgefl:latest -f api-containers/Dockerfile . +``` + +You can run any of the APIs using Docker Compose. The `docker-compose.yml` file in the `api-containers` directory defines the services for the aggregator and nodes. + +To run all of the APIs: +```bash +cd api-containers +docker compose up -d +``` + +The run only specific API services in the `docker-compose.yml` file, you can add the `--no-deps` flag to avoid starting dependent services. This is useful for testing or development purposes. +The template for running a set of services is as follows: +```bash +cd api-containers +docker compose up -d --no-deps ... +``` +Where ``, ``, etc. are the names of services defined in the `docker-compose.yml` file. + + +Example A: to run only the Aggregator +```bash +cd api-containers +docker compose up --no-deps -d aggregator +``` +Example B: to run the aggregator and two nodes +```bash +cd api-containers +docker compose up --no-deps -d aggregator node1 node2 +``` +You can then add a node by running the following command: +```bash +docker compose up --no-deps -d node3 +``` + +To see the endpoints and interact with the APIs, you can use the following URLs: +```bash +127.0.0.1:8080/docs # aggregator + +127.0.0.1:8081/docs # nodes +127.0.0.1:8082/docs +127.0.0.1:8083/docs +``` + +To take down the containers, simply run: +```bash +docker compose down +``` @@ -467,4 +687,4 @@ mongosh --version ```bash brew services start mongodb-community mongod --dbpath /usr/local/bin/mongodb/var/mongodb --logpath /usr/local/bin/mongodb/log/mongodb/mongo.log -``` \ No newline at end of file +``` diff --git a/anylog_calls/README.md b/anylog_calls/README.md new file mode 100644 index 0000000..9627134 --- /dev/null +++ b/anylog_calls/README.md @@ -0,0 +1,22 @@ +# Store data into AnyLog + +**Steps** +1. Connect to database - this can be done by updating the configurations when running deploymnent-scripts or +running the commands in [database_connection.al](database_connection.al) + +I used docker containers, and updated the [operator configurations](https://github.com/AnyLog-co/docker-compose/tree/os-dev/docker-makefile/operator-configs) +as follows: +* In _basic configs_ + * Enable NoSQL + * Default database to `mnist_fl` + * store data in NoSQL database + * reuse images +* In advance configs + * Enable using Remote CLI -- this is for querying the data + +2. Declare mapping policy on the blockchain - [anylog_policy.al](anylog_policy.al) + +3. Insert data using [publish_data.py](publish_data.py) + +4. [Query Data](get_data.py) - The example utilizes EdgeLake REST command to get information about the relevant files +and then access them directly from MongoDB. \ No newline at end of file diff --git a/anylog_calls/anylog_policy.al b/anylog_calls/anylog_policy.al new file mode 100644 index 0000000..9f1f6cc --- /dev/null +++ b/anylog_calls/anylog_policy.al @@ -0,0 +1,166 @@ +#----------------------------------------------------------------------------------------------------------------------# +# :steps: +# 1. check if policy exists +# 2. create policy +# 3. declare policy +# 4. `run msg client` to accept policies via REST POST +# :sample policy: +# {'mapping': { +# 'id' : 'ai-mnist-fl', +# 'dbms' : 'bring [dbms]', +# 'table' : 'bring [table]', +# 'schema' : { +# 'timestamp' : { +# 'type' : 'timestamp', +# 'default' : 'now()', +# 'bring' : '[timestamp]'}, +# 'round_number' : { +# 'type' : 'int', +# 'default' : -1, +# 'bring' : '[round_number]' +# }, +# 'data_type' : { +# 'type' : 'string', +# 'default' : 'train', +# 'bring' : '[data_type]'}, +# 'label' : { +# 'type' : 'int', +# 'default' : 1, +# 'bring' : '[label]' +# }, +# 'file' : { +# 'blob' : True, +# 'bring' : '[image]', +# 'extension' : 'png', +# 'apply' : 'opencv', +# 'hash' : 'md5', +# 'type' : 'varchar' +# } +# }, +# 'date' : '2025-01-15T03:55:00.280413Z', +# 'ledger' : 'global' +# }} +# +# :sample data: +# { +# 'dbms': 'mnist_fl', +# 'table': 'train_node1', +# 'timestamp': '2025-01-13T19:03:17.780102', +# 'round_number': 1, +# 'data_type': 'train', +# 'image': [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +# ... +# 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], +# 'label': 5 +# } +# } +#----------------------------------------------------------------------------------------------------------------------# + + +on error ignore +set create_policy = false + +:set-params: +policy_id = ai-mnist-fl +policy_id = blockchain get mapping where id = !policy_id +if !policy_id goto run-mqtt +if not !policy_id and !create_policy == true then goto policy-error + + + + +:publish-policy: +blockchain prepare policy !new_policy + +# when using a master node +blockchain insert where policy=!new_policy and local=true and master=!ledger_conn + +# when using a real blockchain (ex. optimism) +# blockchain insert where policy=!new_policy and local=true and blockchain=optimism + +if !error_code == 1 then goto sign-policy-error +if !error_code == 2 then goto prepare-policy-error +if !error_code == 3 then goto declare-policy-error + +set create_policy = true +goto set-params + +:run-mqtt: +on error goto msg-error + + +:end-script: +end script + +:terminate-scripts: +exit scripts + +:test-policy-error: +echo "Invalid JSON format, cannot declare policy" +goto end-script + +:sign-policy-error: +print "Failed to sign cluster policy" +goto terminate-scripts + +:prepare-policy-error: +print "Failed to prepare mapping policy for publishing on blockchain" +goto terminate-scripts + +:declare-policy-error: +print "Failed to declare mapping policy on blockchain" +goto terminate-scripts + +:policy-error: +print "Failed to publish mapping policy for an unknown reason" +goto terminate-scripts + + +:msg-error: +echo "Failed to deploy MQTT process" +goto end-script + + + + + + diff --git a/anylog_calls/database_connection.al b/anylog_calls/database_connection.al new file mode 100644 index 0000000..2f9fd16 --- /dev/null +++ b/anylog_calls/database_connection.al @@ -0,0 +1,74 @@ +#----------------------------------------------------------------------------------------------------------------------# +# Connect to database(s) and accept blob data +#----------------------------------------------------------------------------------------------------------------------# +on error ignore + +:set-params: +default_dbms = mnist_fl +db_type = psql +db_user = admin +db_passwd = demo +db_ip = 10.0.0.131 +db_port = 5432 + +partition_column = insert_timestamp +partition_interval = day + +nosql_type = mongo +nosql_ip = 10.0.0.131 +nosql_port = 27017 +nosql_user = admin +nosql_passwd = passwd + +:postgres-conn: +on error goto psql-conn-error + + +:set-partition: +on error call set-partition-error +partition !default_dbms * using !partition_column by day + + +:mongo-conn: +on error goto mongo-conn-error + +if !nosql_user and !nosql_passwd then + +else connect dbms !default_dbms where type=!nosql_type and ip=!nosql_ip and port=!nosql_port + +on error call blobs-archiver-error + + +:end-script: +end script + +:terminate-scripts: +exit scripts + +:operator-db-error: +echo "Error: Unable to connect to almgm database with db type: " !db_type ". Cannot continue" +goto terminate-scripts + +:set-partition-error: +echo "Error: Failed to set partitioning" +return + + diff --git a/anylog_calls/declare_policy.py b/anylog_calls/declare_policy.py new file mode 100644 index 0000000..1b9c3ba --- /dev/null +++ b/anylog_calls/declare_policy.py @@ -0,0 +1,110 @@ +import json +import requests +from tensorflow.tools.pip_package.v2.setup import headers + +POLICY_ID = "ai-mnist-fl" +POLICY = { + "mapping": { + "id": POLICY_ID, + "dbms": "bring [dbms]", + "table": "bring [table]", + "schema": { + "timestamp": { + "type": "timestamp", + "default": "now()", + "bring": "[timestamp]" + }, + "round_number": { + "type": "int", + "default": -1, + "bring": "[round_number]" + }, + "data_type": { + "type": "string", + "default": "train", + "bring": "[data_type]" + }, + "label": { + "type": "int", + "default": 1, + "bring": "[label]" + }, + "file": { + "blob": True, + "bring": "[image]", + "extension": "png", + "apply": "opencv", + "hash": "md5", + "type": "varchar" + } + } + } +} +CONN = '10.0.0.131:32149' +LEDGER_CONN = '10.0.0.131:32048' + + +def get_policy(): + headers = { + "command": f"blockchain get mapping where id={POLICY} bring.count", + "User-Agent": "AnyLog/1.23" + } + status = None + try: + r = requests.get(url=f"http://{CONN}", headers=headers) + except Exception as error: + raise Exception(f"Failed to get policy from {CONN} (Error: {error})") + else: + if not 200 <= int(r.status_code) < 300: + raise ConnectionError(f"Failed to get policy from {CONN} (Error: {r.status_code})") + status = True if r.text == 1 else False + return status + + +def declare_policy(): + headers = { + "command": "blockchain insert where policy=!new_policy and local=true and master=!ledger_conn", + "User-Agent": "AnyLog/1.23", + "destination": LEDGER_CONN + } + + policy = f"" + try: + r = requests.post(url=f"http://{CONN}", headers=headers, data=policy) + except Exception as error: + raise Exception(f"Failed to declare policy via {CONN} (Error: {error})") + else: + if not 200 <= int(r.status_code) < 300: + raise ConnectionError(f"Failed to declare policy via {CONN} (Error: {r.status_code})") + +def declare_mapping(): + command = f""" + run msg client where broker=rest and port=!anylog_rest_port and user-agent=anylog and log=false and topic=( + name={POLICY_ID} and + policy={POLICY_ID} + ) + """.replace("\n","") + headers = { + "command": command, + "User-Agent": "AnyLog/1.23" + } + + + try: + r = requests.post(url=f"http://{CONN}", headers=headers) + except Exception as error: + raise Exception(f"Failed to declare MQTT client via {CONN} (Error: {error})") + else: + if not 200 <= int(r.status_code) < 300: + raise ConnectionError(f"Failed to declare MQTT client via {CONN} (Error: {r.status_code})") + + +def main(): + status = get_policy() + if status is False: + declare_policy() + declare_mapping() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/anylog_calls/get_data.py b/anylog_calls/get_data.py new file mode 100644 index 0000000..958c7c1 --- /dev/null +++ b/anylog_calls/get_data.py @@ -0,0 +1,61 @@ +#----------------------------------------------------------------------------------------------------------------------# +# @Roy - in Remote-CLI:djangoProject/views.py +# - line 1311 shows how we pull data for storage +# - line 290 shows how we pull data for streaming +#----------------------------------------------------------------------------------------------------------------------# +import os.path + +import requests +from pymongo import MongoClient +import gridfs + + +# Multiple table(s) +LOCAL_DIR = os.path.join(os.path.expanduser(os.path.expandvars(__file__)).split("get_data")[0], 'blobs') +if not os.path.isdir(LOCAL_DIR): + os.makedirs(LOCAL_DIR) + +# "sql mnist_fl extend=(+node_name, @ip, @port, @dbms_name, @table_name) and include=(test_node1) format = json and stat=false and timezone=Europe/Dublin select timestamp, file, round_number, data_type, label from train_node1 order by timestamp desc limit 1 --> selection (columns: ip using ip and port using port and dbms using dbms_name and table using table_name and file using file)" +QUERY = "sql mnist_fl extend=(+node_name, @ip, @port, @dbms_name, @table_name) and format = json and stat=false and timezone=Europe/Dublin select timestamp, file, round_number, data_type, label from train_node1 order by timestamp desc --> selection (columns: ip using ip and port using port and dbms using dbms_name and table using table_name and file using file)" +URL = "10.0.0.131:32149" + +MONGODB_IP = "10.0.0.131" +MONGODB_PORT = 27017 # Default MongoDB port +USERNAME = "admin" +PASSWORD = "passwd" + +def get_data(query:str=QUERY): + headers = { + "command": query, + "User-Agent": "AnyLog/1.23", + "destination": "network" # this is instead of `run client ()` + } + + try: + r = requests.get(url=f"http://{URL}", headers=headers) + except Exception as error: + raise Exception + else: + if not 200 <= int(r.status_code) < 300: + raise ConnectionError(r.status_code) + + return r.json()['Query'] + +def mongo_get_data(db_name, local_dir, filename): + with MongoClient(f"mongodb://{USERNAME}:{PASSWORD}@{MONGODB_IP}:{MONGODB_PORT}/") as conn: + fs = gridfs.GridFS(conn[f'blobs_{db_name}']) + file_cursor = fs.find_one({"filename": filename}) + if file_cursor: + with open(os.path.join(local_dir, filename), 'wb') as f: + f.write(file_cursor.read()) + +def main(): + data = get_data() + for result in data: + # {'node_name': 'ori-test-operatorX', 'ip': '172.19.0.2', 'port': '32148', 'dbms_name': 'mnist_fl', 'table_name': 'train_node1', 'timestamp': '2025-01-14 19:56:53.074069', 'file': '0d7234a2184494cc6809c07768d4ebce.png', 'round_number': 12, 'data_type': 'train', 'label': 7} + filename = f"{result['table_name']}.{result['file']}" + mongo_get_data(db_name=result['dbms_name'], local_dir=LOCAL_DIR, filename=filename) + +if __name__ == '__main__': + main() + diff --git a/anylog_calls/publish_data.py b/anylog_calls/publish_data.py new file mode 100644 index 0000000..6c87112 --- /dev/null +++ b/anylog_calls/publish_data.py @@ -0,0 +1,113 @@ +import base64 +import cv2 +import datetime +import json +import numpy as np +from torchvision import datasets +import requests + +NUM_NODES = 2 +NUM_ROUNDS = 12 +TRAIN_SAMPLES_PER_ROUND = 50 +TEST_SAMPLES_PER_ROUND = 10 + +NODE_NAME = "node%s" +CONN = '10.0.0.131:32149' +# CONN = "127.0.0.1:32549" + +def __image_to_base64(image): + """Convert a PyTorch image tensor to a Base64 string.""" + # Convert to NumPy array + image_np = image.numpy() + # Ensure the image has three dimensions (H, W, C) + if len(image_np.shape) == 2: # Grayscale images + image_np = np.expand_dims(image_np, axis=-1) + # Encode to JPEG format + _, buffer = cv2.imencode('.jpg', image_np) + # Convert to Base64 + image_base64 = base64.b64encode(buffer).decode('utf-8') + return image_base64 + + +def insert_round_data(table_name, round_num, images, labels, data_type, db_name="mnist_fl"): + """Generate JSON data for a specific round instead of inserting into the database.""" + try: + # Process in batches + BATCH_SIZE = 10 + result = [] + + for i in range(0, len(images), BATCH_SIZE): + batch_images = images[i:i + BATCH_SIZE] + batch_labels = labels[i:i + BATCH_SIZE] + + for img, lbl in zip(batch_images, batch_labels): + # img_array = img.numpy().flatten().tolist() + result.append(json.dumps({ + "dbms": db_name, + "table": table_name, + "timestamp": datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f'), + "round_number": round_num, + "data_type": data_type, + "image": img.numpy().flatten().tolist(), + "label": int(lbl) + + })) + + # print(f"Prepared {len(images)} {data_type} samples for round {round_num} in {table_name}") + return result + + except Exception as e: + raise Exception(f"Error preparing data: {str(e)}") + + +def publish_data(data): + headers = { + 'command': 'data', + 'topic': "ai-mnist-fl", + 'User-Agent': 'AnyLog/1.23', + 'Content-Type': 'text/plain' + } + for result in data: + try: + r = requests.post(url=f"http://{CONN}", headers=headers, data=result) + except Exception as error: + raise Exception(f"Failed to POST data against {CONN} (Error: {error})") + else: + if not 200 <= int(r.status_code) < 300: + raise ConnectionError(f"Failed to POST data against {CONN} (Error: {r.status_code})") + + +def run_rounds(): + train_dataset = datasets.MNIST('data', train=True, download=True) + test_dataset = datasets.MNIST('data', train=False, download=True) + train_idx = 0 + test_idx = 0 + + for node in range(1, NUM_NODES + 1): + node_name = NODE_NAME % node + + for round_num in range(1, NUM_ROUNDS + 1): + train_end = train_idx + TRAIN_SAMPLES_PER_ROUND + train_images = train_dataset.data[train_idx:train_end] + train_labels = train_dataset.targets[train_idx:train_end] + train_images_output = insert_round_data(table_name=f"train_{node_name}", round_num=round_num, + images=train_images, labels=train_labels, data_type='train') + train_idx = train_end + + # Insert test data for this round + test_end = test_idx + TEST_SAMPLES_PER_ROUND + test_images = test_dataset.data[test_idx:test_end] + test_labels = test_dataset.targets[test_idx:test_end] + test_images_output = insert_round_data(table_name=f"test_{node_name}", round_num=round_num, + images=test_images, labels=test_labels, data_type='test') + test_idx = test_end + + publish_data(data=train_images_output) + publish_data(data=test_images_output) + + +if __name__ == '__main__': + run_rounds() + + + diff --git a/api-containers/.dockerignore b/api-containers/.dockerignore new file mode 100644 index 0000000..e69de29 diff --git a/api-containers/Dockerfile b/api-containers/Dockerfile new file mode 100644 index 0000000..5137a53 --- /dev/null +++ b/api-containers/Dockerfile @@ -0,0 +1,26 @@ +# Use an official Python runtime as a parent image +FROM python:3.12-slim + +# Set the working directory in the container +WORKDIR /app + +# Copy the requirements file first to leverage Docker cache +COPY requirements.txt /app/requirements.txt + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the current directory contents into the container at /app +COPY api-containers/ /app/api-containers + +# Copy the sibling edgefl directory (using a relative path) +COPY edgefl/ /app/edgefl + +# List directories to debug +RUN ls -la /app +RUN ls -la /app/api-containers + +# Define environment variables +ENV PYTHONUNBUFFERED=1 +ENV PYTHONPATH=/app + diff --git a/api-containers/Makefile b/api-containers/Makefile new file mode 100644 index 0000000..b46591e --- /dev/null +++ b/api-containers/Makefile @@ -0,0 +1,24 @@ +# Parameters with defaults +CONTAINER_NAME ?= aggregator_server +IMAGE ?= edgefl:latest +PORT ?= 8080 +ENV_FILE ?= edgefl/env_files/mnist-docker/mnist-agg.env + +# Run container using docker-compose +run-container: + @echo "Deploying container: $(CONTAINER_NAME) with image: $(IMAGE) on port: $(PORT) using env: $(ENV_FILE)" + @mkdir -p docker_makefile + @CONTAINER_NAME=$(CONTAINER_NAME) \ + IMAGE=$(IMAGE) \ + PORT=$(PORT) \ + ENV_FILE=$(ENV_FILE) \ + envsubst < docker-compose-template.yaml > docker-compose.yaml + @docker-compose -f docker-compose.yaml up -d + @echo "Container $(CONTAINER_NAME) started" + @rm -f docker-compose.yaml + +# Stop and remove the container +stop-container: + @echo "Stopping container: $(CONTAINER_NAME)" + @docker stop $(CONTAINER_NAME) + @docker rm $(CONTAINER_NAME) \ No newline at end of file diff --git a/api-containers/README.md b/api-containers/README.md new file mode 100644 index 0000000..d0600a1 --- /dev/null +++ b/api-containers/README.md @@ -0,0 +1,19 @@ +### example make command: + - make run-container CONTAINER_NAME=mnist_node1 IMAGE=edgefl:5-5 PORT=9090 ENV_FILE=edgefl/env_files/mnist-docker/mnist-node.env + + + + + + + + + + + + + + + + + diff --git a/api-containers/app.py b/api-containers/app.py new file mode 100644 index 0000000..219f2fb --- /dev/null +++ b/api-containers/app.py @@ -0,0 +1,52 @@ +import os +import sys +import argparse +import uvicorn +from dotenv import load_dotenv + +# Add edgefl directory to Python path +PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +EDGEFL_PATH = os.path.join(PROJECT_ROOT, "edgefl") +sys.path.append(EDGEFL_PATH) + +def main(): + parser = argparse.ArgumentParser(description="Run server based on environment variables") + parser.add_argument('--env-file', type=str, help='Path to .env file to load') + parser.add_argument('--port', type=int, help='Override port from env file') + args = parser.parse_args() + + # Load environment variables from file if specified + if args.env_file and os.path.exists(args.env_file): + load_dotenv(args.env_file) + else: + load_dotenv() # Load from default .env if it exists + + # Determine which server to run based on SERVER_TYPE + server_type = os.getenv('SERVER_TYPE', '').lower() + + if not server_type: + print("ERROR: SERVER_TYPE environment variable not set. Set to 'aggregator' or 'node'") + sys.exit(1) + + # Get port from args, env var, or default + port = args.port if args.port else int(os.getenv('PORT', 8080)) + host = os.getenv('HOST', '0.0.0.0') + reload = os.getenv('RELOAD', 'False').lower() == 'true' + + print(f"Starting {server_type} server on {host}:{port}") + + if server_type == 'aggregator': + app_module = "platform_components.aggregator.aggregator_server:app" + uvicorn.run(app_module, host=host, port=port, reload=reload) + + elif server_type == 'node': + app_module = "platform_components.node.node_server:app" + uvicorn.run(app_module, host=host, port=port, reload=reload) + + else: + print(f"ERROR: Invalid SERVER_TYPE '{server_type}'. Must be 'aggregator' or 'node'") + sys.exit(1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/api-containers/docker-compose-agg.yaml b/api-containers/docker-compose-agg.yaml new file mode 100644 index 0000000..7c85e08 --- /dev/null +++ b/api-containers/docker-compose-agg.yaml @@ -0,0 +1,31 @@ +#services: +# server: +# image: edgefl:5-7 +# restart: always +# env_file: +# - /home/miguel06/PycharmProjects/Anylog-Edgelake-Federated-Learning-Platform/edgefl/env_files/mnist-docker/mnist-agg.env +# container_name: aggregator_server_8080 +# ports: +# - 8080:8080 +# +# # Fix for Docker socket access +# volumes: +# - /var/run/docker.sock:/var/run/docker.sock +# +# # GPU configuration +# deploy: +# resources: +# reservations: +# devices: +# - driver: nvidia +# count: all +# capabilities: [gpu] +# +# # Docker and GPU environment settings +# environment: +# - NVIDIA_VISIBLE_DEVICES=all +# - NVIDIA_DRIVER_CAPABILITIES=all +# - TF_FORCE_GPU_ALLOW_GROWTH=true +# - DOCKER_HOST=unix:///var/run/docker.sock +# +# command: ["python", "/app/api-containers/app.py", "--env-file", "/app/edgefl/env_files/mnist-docker/mnist-agg.env"] \ No newline at end of file diff --git a/api-containers/docker-compose-template.yaml b/api-containers/docker-compose-template.yaml new file mode 100644 index 0000000..6497ff3 --- /dev/null +++ b/api-containers/docker-compose-template.yaml @@ -0,0 +1,14 @@ +services: + ${NODE_NAME}: + image: ${IMAGE} + restart: always + env_file: + - ${ENV_FILE} + container_name: ${CONTAINER_NAME} + ports: + - ${PORT}:${PORT} + command: ["python", "/app/api-containers/app.py", "--env-file", "/app/edgefl/env_files/mnist-docker/mnist-agg.env"] + + + + diff --git a/api-containers/docker-compose.yaml b/api-containers/docker-compose.yaml new file mode 100644 index 0000000..63b81b3 --- /dev/null +++ b/api-containers/docker-compose.yaml @@ -0,0 +1,109 @@ +services: + # Aggregator Server + aggregator: + image: edgefl:latest + restart: always + env_file: + - ../edgefl/env_files/mnist-docker/mnist-agg.env + container_name: aggregator_server_8080 + ports: + - 8080:8080 + volumes: + - /var/run/docker.sock:/var/run/docker.sock +# deploy: +# resources: +# reservations: +# devices: +# - driver: nvidia +# count: all +# capabilities: [gpu] +# environment: +# - NVIDIA_VISIBLE_DEVICES=all +# - NVIDIA_DRIVER_CAPABILITIES=all +# - TF_FORCE_GPU_ALLOW_GROWTH=false +# - DOCKER_HOST=unix:///var/run/docker.sock +# - PATH=/usr/local/cuda-12.2/bin:$PATH +# - LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64:$LD_LIBRARY_PATH + command: ["python", "/app/api-containers/app.py", "--env-file", "/app/edgefl/env_files/mnist-docker/mnist-agg.env"] + + # Node Server 1 + node1: + image: edgefl:latest + restart: always + env_file: + - ../edgefl/env_files/mnist-docker/mnist1.env + container_name: node_server_8081 + ports: + - 8081:8081 + volumes: + - /var/run/docker.sock:/var/run/docker.sock +# deploy: +# resources: +# reservations: +# devices: +# - driver: nvidia +# count: all +# capabilities: [gpu] +## environment: +## - NVIDIA_VISIBLE_DEVICES=all +## - NVIDIA_DRIVER_CAPABILITIES=all +## - TF_FORCE_GPU_ALLOW_GROWTH=false +## - DOCKER_HOST=unix:///var/run/docker.sock +## - FILE_WRITE_DESTINATION=edgefl/file_write +## - PATH=/usr/local/cuda-12.2/bin:$PATH +## - LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64:$LD_LIBRARY_PATH + command: ["python", "/app/api-containers/app.py", "--env-file", "/app/edgefl/env_files/mnist-docker/mnist1.env"] + + # Node Server 2 + node2: + image: edgefl:latest + restart: always + env_file: + - ../edgefl/env_files/mnist-docker/mnist2.env + container_name: node_server_8082 + ports: + - 8082:8082 + volumes: + - /var/run/docker.sock:/var/run/docker.sock +# deploy: +# resources: +# reservations: +# devices: +# - driver: nvidia +# count: all +# capabilities: [gpu] +# environment: +# - NVIDIA_VISIBLE_DEVICES=all +# - NVIDIA_DRIVER_CAPABILITIES=all +# - TF_FORCE_GPU_ALLOW_GROWTH=false +# - DOCKER_HOST=unix:///var/run/docker.sock +# - PATH=/usr/local/cuda-12.2/bin:$PATH +# - LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64:$LD_LIBRARY_PATH + command: ["python", "/app/api-containers/app.py", "--env-file", "/app/edgefl/env_files/mnist-docker/mnist2.env"] + + # Node Server 3 + node3: + image: edgefl:latest + restart: always + env_file: + - ../edgefl/env_files/mnist-docker/mnist3.env + container_name: node_server_8083 + ports: + - 8083:8083 + volumes: + - /var/run/docker.sock:/var/run/docker.sock +# deploy: +# resources: +# reservations: +# devices: +# - driver: nvidia +# count: all +# capabilities: [gpu] +# environment: +# - NVIDIA_VISIBLE_DEVICES=all +# - NVIDIA_DRIVER_CAPABILITIES=all +# - TF_FORCE_GPU_ALLOW_GROWTH=false +# - DOCKER_HOST=unix:///var/run/docker.sock +# - PATH=/usr/local/cuda-12.2/bin:$PATH +# - LD_LIBRARY_PATH=/usr/local/cuda-12.2/lib64:$LD_LIBRARY_PATH + command: ["python", "/app/api-containers/app.py", "--env-file", "/app/edgefl/env_files/mnist-docker/mnist3.env"] diff --git a/api-containers/requirements.txt b/api-containers/requirements.txt new file mode 100644 index 0000000..f47e193 --- /dev/null +++ b/api-containers/requirements.txt @@ -0,0 +1,26 @@ +numpy~=1.26.0 +skorch>=0.15.0 +psycopg2-binary~=2.9.9 +scikit-learn~=1.6.1 +python-dotenv~=1.0.1 +requests~=2.32.3 +FastAPI~=0.115.8 +uvicorn~=0.34.0 +keras~=3.7.0 +joblib~=1.4.2 +gensim~=4.3.2 +diffprivlib~=0.6.5 +pycloudmessenger~=0.8.2 +pandas~=2.2.1 +PyYAML~=6.0.2 +tqdm~=4.67.1 +python-dateutil~=2.9.0.post0 +matplotlib~=3.9.4 +docker~=7.1.0 + +torch~=2.2.2 +tensorflow~=2.16.2 +cython~=3.0.12 +pydantic~=2.11.0a2 + +torchvision~=0.17.2 \ No newline at end of file diff --git a/edgefl/data/README.md b/edgefl/data/README.md new file mode 100644 index 0000000..ea0834b --- /dev/null +++ b/edgefl/data/README.md @@ -0,0 +1,170 @@ +# Publishing data + +An application can publish data into AnyLog/EdgeLake using an array of southbound connectors, including: +- MQTT +- REST: _put_ or _post_ +- Kafka +- gRPC +- etc. + +## Backend Process +When a user sends data into AnyLog/EdgeLake (Operator node type) the following process occurs: +1. Data is mapped to have correct key/value pairs +When data is sent into AnyLog/EdgeLake, **not** using REST _PUT_, a mapping policy creates an association between the +JSON data coming and user-defined preferred naming logic. This is done based on a mapping policy. +When data is sent via a REST _PUT_, then, the information is stored as-is. + +2. If DNE - Blockchain policies defining the location of the data (cluster) and table definition are created. +The table definition is based on the JSON object(s) being analyzed from the user input. +```json +{"cluster" : {"company" : "test", + "name" : "test-cluster1", + "status" : "active", + "id" : "9b7d228178e18fe8512babb228a26912", + "date" : "2025-03-30T04:41:38.202741Z", + "ledger" : "global"}}, + +{"cluster" : {"parent" : "9b7d228178e18fe8512babb228a26912", + "name" : "test-cluster1", + "company" : "test", + "table" : [{"dbms" : "test", + "name" : "rand_data", + "status" : "active"}], + "source" : "Node at 170.187.157.30:32148", + "id" : "1899ef4f168431d454db947a804c7b98", + "date" : "2025-04-03T17:28:48.215004Z", + "status" : "active", + "ledger" : "global"}}, + +{"table" : {"name" : "rand_data", + "dbms" : "test", + "create" : "CREATE TABLE IF NOT EXISTS rand_data( row_id SERIAL PRIMARY KEY" + ", insert_timestamp TIMESTAMP NOT NULL DEFAULT NOW(), tsd_name " + "CHAR(3), tsd_id INT, timestamp timestamp not null default now(" + "), value float ); CREATE INDEX rand_data_timestamp_index ON ran" + "d_data(timestamp); CREATE INDEX rand_data_tsd_index ON rand_data" + "(tsd_name, tsd_id); CREATE INDEX rand_data_insert_timestamp_inde" + "x ON rand_data(insert_timestamp);", + "source" : "Processing JSON file", + "id" : "1ef91f4b46560e289a9e6be869ccfe7c", + "date" : "2025-04-03T16:31:24.861749Z", + "ledger" : "global"}} +``` +3. If DNE - create a table definition, based on the blockchain policy, on the appropriate operator(s). + +4. Convert JSON to SQL insert statement + +5. Store data in a (partitioned) table. + +6. If blobs (ex. images and videos) are included as part of the data, then the stored SQL data will contain a reference +to the location of the blob(s), and store them either in (local) file or in MongoDB. + +7. A reference of the data (row_id, tsd_name, tsd_id) is stored in `almgm.tsd_info`, which then is used for: + * Validation of the data + * HA across operator nodes (AnyLog only) + +## User-Defined Process + +### REST PUT +A _PUT_ command is simply specifying the logical information as the REST HEADERS and pre-mapped data in the payload. +An example can be found in [winniio](winniio-rooms/publish_data.py). + +**Sample Call**: +```shell +python3 $HOME/Anylog-Edgelake-Federated-Learning-Platform/edgefl/data/winniio-rooms/publish_data.py 74.207.235.89:32149 $HOME/Anylog-Edgelake-Federated-Learning-Platform/edgefl/data/winniio-rooms/room_12004.csv --db-name new_company +``` + +### non-PUT +All other processes (_POST_, Kafka, MQTT, etc.) require an extra steps **before** publishing data - declaring th mapping + +1. Create a mapping policy - the example uses mp4 blob data +```anylog + + +if !blockchain_source == master then blockchain insert where policy=!mapping_policy and local=true and master=!ledger_conn +else blockchain insert where policy=!mapping_policy and local=true and blockchain=optimism +``` + +2. Create a message client (example with REST) +```anylog + +``` + +When data is not complex (ie without blobs), users can utilize the command with parameters as part of the message client +```anylog + +``` + +3. Once a message client is set, data can be sent into AnyLog/EdgeLake. An example for publishing via POST can be found +in [mnist](mnist/publish_data.py) + + +### Query examples + +The following is supposed to generate the ability to select images and labels (for viewing) via Remote-CLI. However, +I'm not 100% sure we support `gz` files and/or I'm missing something. Sample publish process for blobs can be found [here](https://github.com/AnyLog-co/Sample-Data-Generator/blob/master/data_generator/video_processing.py). +The other reason is beecause we're processing a gzip file, which we aren't able to view. +```anylog +sql new_company extend=(+node_name, @ip, @port, @dbms_name, @table_name) and format = json and timezone=Europe/Dublin select timestamp, image_name, image, label_name, label from mnist order by timestamp desc --> selection (columns: ip using ip and port using port and dbms using dbms_name and table using table_name and image using file and label using file) +``` + +To view expected behaivor check out the `demo` section in http://23.239.12.151:31800. + diff --git a/edgefl/data/chest_xrays_bbox/chest_xrays_bbox_db_script.py b/edgefl/data/chest_xrays_bbox/chest_xrays_bbox_db_script.py new file mode 100644 index 0000000..19524b6 --- /dev/null +++ b/edgefl/data/chest_xrays_bbox/chest_xrays_bbox_db_script.py @@ -0,0 +1,219 @@ +""" +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/ +""" + +""" +Script to create PostgreSQL database and load MNIST data organized by node and round +""" +import os +import json +import numpy as np +import pandas as pd +import kaggle +from sklearn.model_selection import train_test_split +import psycopg2 +from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT +# from dotenv import load_dotenv +import time + +# load_dotenv() + +conn = psycopg2.connect( + database=os.getenv("PSQL_DB_NAME"), + user=os.getenv("PSQL_DB_USER"), + password=os.getenv("PSQL_DB_PASSWORD"), + host=os.getenv("PSQL_HOST"), + port=os.getenv("PSQL_PORT"), +) +conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) +cur = conn.cursor() + +db_name = os.getenv("PSQL_DB_NAME") +dir_path_prefix = os.getenv("GITHUB_DIR") + +def create_database(): + """Create PostgreSQL database if it doesn't exist.""" + + # conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + # cur = conn.cursor() + + cur.execute("SELECT 1 FROM pg_database WHERE datname = %s;", (f'{db_name}',)) + + exists = cur.fetchone() + if not exists: + cur.execute(f"CREATE DATABASE {db_name}") + print(f"Database '{db_name}' created") + else: + print(f"Database '{db_name}' already exists") + + # conn.close() + + +def create_node_table(conn, node_name): + """Create a single table for a node that will contain all rounds of data.""" + + table_name = f"node_{node_name}" + + # Prevents duped entries in the tables + cur.execute(f"DROP TABLE IF EXISTS {table_name}") + + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {table_name} ( + id SERIAL PRIMARY KEY, + round_number INTEGER NOT NULL, + data_type VARCHAR(10) NOT NULL, -- 'train' or 'test' + image CHAR(16) NOT NULL, -- path to image in local directory + width INTEGER NOT NULL DEFAULT 1024, + height INTEGER NOT NULL DEFAULT 1024, + class VARCHAR(16) NOT NULL, -- the label + x_min DOUBLE PRECISION NOT NULL, + y_min DOUBLE PRECISION NOT NULL, + x_max DOUBLE PRECISION NOT NULL, + y_max DOUBLE PRECISION NOT NULL, + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + + # Create index on round_number for faster queries + cur.execute(f""" + CREATE INDEX IF NOT EXISTS idx_{table_name}_round + ON {table_name}(round_number) + """) + + conn.commit() + # cur.close() + return table_name + + +def insert_round_data(conn, table_name, round_num, df, data_type): + """Insert data for a specific round into node's table.""" + # cur = conn.cursor() + + try: + BATCH_SIZE = 10 + for i in range(0, len(df), BATCH_SIZE): + batch_train = df[i:i + BATCH_SIZE] + + args = [] + for data in batch_train.itertuples(index=False, name=None): + args.append((round_num, data_type,) + data) + + cur.executemany( + f""" + INSERT INTO {table_name} + (round_number, data_type, image, width, height, class, x_min, y_min, x_max, y_max) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + """, + args + ) + + conn.commit() + + print(f"Inserted {len(df)} {data_type} samples for round {round_num} into {table_name}") + + except Exception as e: + print(f"Error inserting data: {str(e)}") + conn.rollback() + raise + # finally: + # cur.close() + + +def verify_round_data(conn, table_name, round_num): + """Verify data counts for a specific round.""" + cur = conn.cursor() + + try: + # Count training samples + cur.execute(f""" + SELECT COUNT(*) FROM {table_name} + WHERE round_number = %s AND data_type = 'train' + """, (round_num,)) + train_count = cur.fetchone()[0] + + # Count test samples + cur.execute(f""" + SELECT COUNT(*) FROM {table_name} + WHERE round_number = %s AND data_type = 'test' + """, (round_num,)) + test_count = cur.fetchone()[0] + + return train_count, test_count + except Exception as e: + print("Excection in verify_round_data") + # finally: + # cur.close() + + +def main(): + # Configuration + NUM_NODES = 3 + NUM_ROUNDS = 10 + TRAIN_SAMPLES_PER_ROUND = 0.8 # this and below is percentages of total dataset + TEST_SAMPLES_PER_ROUND = 0.2 + + print("Creating database...") + create_database() + print("Connected to database") + + if os.path.exists("./raw"): + print("Dataset already download") + else: + print("Downloading Chest XRays (Bounding Box) dataset...") + kaggle.api.dataset_download_files( + "huthayfahodeb/nih-chest-x-rays-bbox-version", + "./raw", + unzip=True + ) + print("Downloaded dataset") + + dataframes = [] + dataset = pd.read_csv(f"{dir_path_prefix}/edgefl/data/chest_xrays_bbox/raw/tensorflow.csv") + + num_batches = NUM_NODES + batch_size = len(dataset) // num_batches + for i in range(num_batches): + start_idx = i * batch_size + # Last partial dataframe/batch will include all remaining rows + end_idx = (i + 1) * batch_size if i != NUM_NODES - 1 else len(dataset) + batch = dataset.iloc[start_idx:end_idx] + dataframes.append(batch) + + try: + # Process each node + for node in range(1, NUM_NODES + 1): + node_name = f"node{node}" + print(f"\nProcessing {node_name}") + + # Create table for this node + table_name = create_node_table(conn, node_name) + + # Process each round + df_train, df_test = train_test_split(dataframes[node - 1], + test_size=TEST_SAMPLES_PER_ROUND, + train_size=TRAIN_SAMPLES_PER_ROUND) + + df_train = np.array_split(df_train, NUM_ROUNDS) + df_test = np.array_split(df_test, NUM_ROUNDS) + + for round_num in range(1, NUM_ROUNDS + 1): + print(f"\nRound {round_num}") + + # Training and testing data + insert_round_data(conn, table_name, round_num, df_train[round_num - 1], "train") + insert_round_data(conn, table_name, round_num, df_test[round_num - 1], "test") + + # Verify data for this round + train_count, test_count = verify_round_data(conn, table_name, round_num) + print(f"Verification for {node_name} Round {round_num}:") + print(f"Training samples: {train_count}") + print(f"Test samples: {test_count}") + finally: + conn.close() + print("\nData loading complete!") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/edgefl/data/chest_xrays_bbox/chest_xrays_bbox_db_script_v1.py b/edgefl/data/chest_xrays_bbox/chest_xrays_bbox_db_script_v1.py new file mode 100644 index 0000000..8917b95 --- /dev/null +++ b/edgefl/data/chest_xrays_bbox/chest_xrays_bbox_db_script_v1.py @@ -0,0 +1,126 @@ +""" +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/ +""" + +import argparse +import datetime +import os +import json +import numpy as np +import pandas as pd +import kaggle +from sklearn.model_selection import train_test_split +import time +import requests + +DIR_PATH_PREFIX = os.getenv("GITHUB_DIR", os.path.dirname(__file__)) +DB_NAME = os.getenv("PSQL_DB_NAME", "mnist_fl") + + +def put_data(conn:str, db_name:str, table:str, payload): + headers = { + 'type': 'json', + 'dbms': db_name, + 'table': table, + 'mode': 'streaming', + 'Content-Type': 'text/plain' + } + try: + for row in payload: + response = requests.put(url=f'http://{conn}', headers=headers, data=json.dumps(row)) + response.raise_for_status() + except Exception as error: + raise Exception + + +def generate_round_data(round_num:int, batch_size:int, df, data_type:str): + """ + generate payload(s) for current round + :args: + round_num:int - current round + batch_size:innt - baatch size + df - dataframe(s) + data_type:str - train || test + :params: + payloads:list - list of payload + pyloaad:dict - content to store in EdgeLake + :return: + payloads + """ + payloads = [] + timestamp = datetime.datetime.now().strfrmt('%Y-%m-%dT%H:%M:%S.%fZ') + for i in range(0, len(df), batch_size): + batch_train = df[i:i + batch_size] + + args = [] + for data in batch_train.itertuples(index=False, name=None): + args.append((round_num, data_type,) + data) + + image, width, height, cls, x_min, y_min, x_max, y_max = data + payload = { + 'timestamp': timestamp, + 'round_number': round_num, + 'data_type': data_type, + 'image': image, + 'width': width, + 'height': height, + 'class': cls, + 'bbox': [x_min, y_min, x_max, y_max], + } + + payloads.append(payload) + + return payloads + + +def main(): + parse = argparse.ArgumentParser() + # parse.add_argument('conn', type=str, default=None, help='REST connection information') + parse.add_argument('--db-name', type=str, default=DB_NAME, help='logical database name') + parse.add_argument('--table-name', type=str, default=None) + parse.add_argument('--num-rounds', type=int, default=10, help='') + parse.add_argument('--train-sample-size', type=int, default=0.8, help='train samples per round') + parse.add_argument('--test-sample-size', type=int, default=0.2, help='test samples per round') + parse.add_argument('--batch-size', type=int, default=10, help='') + args = parse.parse_args() + + # conns = arags.conn.split(',') + + dataframes = [] + dataset = pd.read_csv(os.path.join(DIR_PATH_PREFIX, 'tensorflow.csv')) + batch_size = len(dataset) // len(conns) + for i in range(len(conns)): + start_idx = i * batch_size + end_idx = (i + 1) * batch_size if i != len(conns) - 1 else len(dataset) + batch = dataset.iloc[start_idx:end_idx] + dataframes.append(batch) + + for conn in conns: + df_sets = {'test': None, 'train': None} + + df_train, df_test = train_test_split(dataframes[node - 1], test_size=args.test_samples_size, + train_size=args.train_samples_size) + df_sets['train'] = np.array_split(df_train, args.num_rounds) + df_sets['test'] = np.array_split(df_test, args.num_rounds) + + for crt_round in range(1, args.num_rounds+1): + for data_type in df_sets: + table_name = f"{args.table_name.replace(' ', '_').replace('-', '_')}_{data_type}" if ags.table_name else dataset + payload = generate_round_data(round_num=crt_round, batch_size=args.batch_size, df=df_sets[data_type][crt_round-1], data_type=data_type) + # put_data(conn=conn, db_name=args.db_name, table=table_name, payload=payload) + + + +if __name__ == '__main__': + main() + + + + + + + + + diff --git a/edgefl/data/chest_xrays_bbox/chest_xrays_bbox_db_script_v2.py b/edgefl/data/chest_xrays_bbox/chest_xrays_bbox_db_script_v2.py new file mode 100644 index 0000000..d5ffc80 --- /dev/null +++ b/edgefl/data/chest_xrays_bbox/chest_xrays_bbox_db_script_v2.py @@ -0,0 +1,235 @@ +""" +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/ +""" + +import argparse +import datetime +import os +import json +import numpy as np +import pandas as pd +import kaggle +from sklearn.model_selection import train_test_split +import time +import requests + +DIR_PATH_PREFIX = os.getenv("GITHUB_DIR", os.path.dirname(__file__)) +DB_NAME = os.getenv("PSQL_DB_NAME", "mnist_fl") + + + +def post_data(conn:str, headers:dict, payload=None): + try: + response = requests.post(url=f'http://{conn}', headers=headers, data=payload) + response.raise_for_status() + except Exception as error: + raise Exception(f'Failed to execute POT against {conn} (Error: {error})') + + + +def blobs_policy(conn:str, policy_id:str='xray-data'): + """ + For POST commands, user needs to specify a mapping policy between the data and the ingestion process + :args: + conn:str - REST connection information + policy_id:str - policy ID + :params: + headers:dict - REST heeader + new_policy:dict - Mapping policy + + """ + # check if mapping exists + headers = { + "command": f"blockchain get mapping where id={policy_id}", + "User-Agent": "AnyLog/1.23", + } + try: + response = requests.get(url=f"http://{conn}", headers=headers) + response.raise_for_status() + except Exception as error: + raise Exception(f'Failed to execute GET against {conn} (Error: {error})') + else: + if response.json(): + return + + # create mapping policy if DNE + new_policy = {"mapping": { + "id": policy_id, + "dbms": "bring [dbms]", + "table": "bring [table]", + "schema": { + "timestamp": { + "type": "timestamp", + "default": "now()", + "bring": "[timestamp]" + }, + "round_number": { + "type": "int", + "bring": "[round_number]" + }, + "data_type": { # train or test + "type": "string", + "bring": "[data_type]" + }, + "file": { + "blob": True, + "bring": "[image]", + "extension": "jpeg", + "apply": "opencv", # I'm guessing the data is in numpy format + "hash": "md5", + "type": "varchar" + }, + "width": { + "type": "float", + "bring": "[width]" + }, + "height": { + "type": "float", + "bring": "[height]" + }, + "class": { + "type": "string", + "bring": "[class]" + }, + "bbox": { + "type": "string", + "bring": "bbox" + } + }}} + + headers = { + 'command': 'blockchain insert where policy=!new_policy and local=true and master=!ledger_conn', + 'User-Agent': 'AnyLog/1.23', + } + post_data(conn=conn, headers=headers, payload=f"") + + +def msg_client(conn:str, policy_id:str='xray-data'): + headers = { + 'command': f'get msg client where topic={policy_id}', + 'User-Agent': 'AnyLog/1.23' + } + try: + response = requests.get(url=f'http://{conn}', headers=headers) + response.raise_for_status() + except Exception as error: + raise Exception(f'Failed to execute GET against {conn} (Error: {error})') + else: + if response.text.strip() != 'No message client subscriptions': + return + + headers = { + 'command': f'run msg client where broker=rest and user-agent=anylog and log=false and topic=(name={policy_id} and policy={policy_id})', + 'User-Agent': 'AnyLog/1.23' + } + post_data(conn=conn, headers=headers, payload=None) + + + + +def generate_round_data(db_name:str, table_name:str, round_num:int, batch_size:int, df, data_type:str): + """ + generate payload(s) for current round + :args: + round_num:int - current round + batch_size:innt - baatch size + df - dataframe(s) + data_type:str - train || test + :params: + payloads:list - list of payload + pyloaad:dict - content to store in EdgeLake + :return: + payloads + """ + payloads = [] + timestamp = datetime.datetime.now().strfrmt('%Y-%m-%dT%H:%M:%S.%fZ') + for i in range(0, len(df), batch_size): + batch_train = df[i:i + batch_size] + + args = [] + for data in batch_train.itertuples(index=False, name=None): + args.append((round_num, data_type,) + data) + + image, width, height, cls, x_min, y_min, x_max, y_max = data + payload = { + 'dbms': db_name, + 'table': table_name, + 'timestamp': timestamp, + 'round_number': round_num, + 'data_type': data_type, + 'image': image, + 'width': width, + 'height': height, + 'class': cls, + 'bbox': [x_min, y_min, x_max, y_max], + } + + payloads.append(payload) + + return payloads + + +def main(): + parse = argparse.ArgumentParser() + parse.add_argument('conn', type=str, default=None, help='REST connection information') + parse.add_argument('--db-name', type=str, default=DB_NAME, help='logical database name') + parse.add_argument('--table-name', type=str, default=None) + parse.add_argument('--topic', type=str, default='xray-data', help='message client topic name') + parse.add_argument('--num-rounds', type=int, default=10, help='') + parse.add_argument('--train-sample-size', type=int, default=0.8, help='train samples per round') + parse.add_argument('--test-sample-size', type=int, default=0.2, help='test samples per round') + parse.add_argument('--batch-size', type=int, default=10, help='') + args = parse.parse_args() + + conns = arags.conn.split(',') + for conn in conns: + blobs_policy(conn=conn, policy_id=args.topic) + msg_client(conn=conn, policy_id=args.topic) + + headers = { + 'command': 'data', + 'topic': topic, + 'User-Agent': 'AnyLog/1.23', + 'Content-Type': 'text/plain' + } + dataframes = [] + dataset = pd.read_csv(os.path.join(DIR_PATH_PREFIX, 'tensorflow.csv')) + batch_size = len(dataset) // len(conns) + for i in range(len(conns)): + start_idx = i * batch_size + end_idx = (i + 1) * batch_size if i != len(conns) - 1 else len(dataset) + batch = dataset.iloc[start_idx:end_idx] + dataframes.append(batch) + + for conn in conns: + df_sets = {'test': None, 'train': None} + + df_train, df_test = train_test_split(dataframes[node - 1], test_size=args.test_samples_size, + train_size=args.train_samples_size) + df_sets['train'] = np.array_split(df_train, args.num_rounds) + df_sets['test'] = np.array_split(df_test, args.num_rounds) + + for crt_round in range(1, args.num_rounds+1): + for data_type in df_sets: + table_name = f"{args.table_name.replace(' ', '_').replace('-', '_')}_{data_type}" if ags.table_name else dataset + payload = generate_round_data(db_name=args.db_name, table_name=table_name, round_num=crt_round, + batch_size=args.batch_size, df=df_sets[data_type][crt_round-1], + data_type=data_type) + post_data(conn=conn, headers=headers, payload=payload) + + + +if __name__ == '__main__': + msg_client(conn='10.0.0.147:32149') + # main() + + + + + + + + + diff --git a/edgefl/data/mnist/publish_data.py b/edgefl/data/mnist/publish_data.py new file mode 100644 index 0000000..b669761 --- /dev/null +++ b/edgefl/data/mnist/publish_data.py @@ -0,0 +1,157 @@ +import argparse +import json +import os.path +import base64 + +import requests + +def __compress_file(file_path:str): + base64_bytes = None + try: + with open(file_path, 'rb') as f: + base64_bytes = base64.b64encode(f.read()) + except Exception as error: + raise Exception(f"Failed to read / compress file {file_path} (Error: {error}) ") + return base64_bytes.decode('utf-8') + +def post_cmd(conn:str, headers:dict, payload:str=None): + """ + Publish into Operator node via POST + """ + try: + response = requests.post(url=f"http://{conn}", headers=headers, data=payload) + response.raise_for_status() + except Exception as error: + raise Exception(f"Failed to run POST against {conn} (Error: {error})") + +def create_mapping(conn:str, policy_id:str, db_name:str, table_name:str): + """ + Create mapping policy + :url: + https://github.com/AnyLog-co/documentation/blob/master/image%20mapping.md + :note: + when declaring a policy - make sure the policy DNE beforehand + :args: + conn:str - REST connection + db_name:str - logical database name + table_name:str - logical table name + :params: + mapping_policy:dict - mapping policy to use + new_policy:str - serialized mapping policy + headers:dict -REST headers + """ + mapping_policy = { + "mapping": { + "id": policy_id, + "dbms": db_name, + "table": table_name, + "schema": { + "timestamp": { + "type": "timestamp", + "default": "now()" + }, + "image_name": { + "type": "string", + "default": "", + "bring": "[image_name]" + }, + "image": { + "blob": True, + "bring": "[image]", + "extension": "gz", + "apply": "base64decoding", + "hash": "md5", + "type": "varchar" + }, + "label_name": { + "type": "string", + "default": "", + "bring": "[label_name]" + }, + "label": { + "blob": True, + "bring": "[image]", + "extension": "gz", + "apply": "base64decoding", + "hash": "md5", + "type": "varchar" + } + } + } + } + + new_policy = f"" + headers = { + "command": "blockchain insert where policy=!new_policy and local=true and master=!ledger_conn", + "User-Agent": "AnyLog/1.23" + } + + post_cmd(conn=conn, headers=headers, payload=new_policy) + +def run_msg_client(conn:str, policy_id:str, topic:str): + headers = { + "command": "run msg client where broker=rest and log=false and user-agent=anylog and topic=(name=mnist and policy=mnist)", + "User-Agent": "AnyLog/1.23" + } + + post_cmd(conn=conn, headers=headers, payload=None) + +def publish_data(conn:str, topic:str, image_path:str, label_path:str): + headers = { + 'command': 'data', + 'topic': topic, + 'User-Agent': 'AnyLog/1.23', + 'Content-Type': 'text/plain' + } + + image_file = os.path.basename(image_path) + label_file = os.path.basename(label_path) + image_content = __compress_file(image_path) + label_content = __compress_file(label_path) + + payload = json.dumps( + { + "image_name": image_file, + "image": image_content, + "label_name": label_file, + "label": label_content + } + ) + + post_cmd(conn=conn, headers=headers, payload=payload) + +def main(): + parse = argparse.ArgumentParser() + parse.add_argument('conn', type=str, default='74.207.235.89:32149') + parse.add_argument('--policy-id', type=str, default='mnist') + parse.add_argument('--topic', type=str, default='mnist') + parse.add_argument('--db-name', type=str, default='new_company') + parse.add_argument('--table-name', type=str, default='mnist') + + parse.add_argument('--image', type=str, default=None, required=True) + parse.add_argument('--label', type=str, default=None, required=True) + args = parse.parse_args() + + args.image = os.path.expanduser(os.path.expandvars(args.image)) + if not os.path.isfile(args.image): + raise FileNotFoundError(args.image) + args.label = os.path.expanduser(os.path.expandvars(args.label)) + if not os.path.isfile(args.label): + raise FileNotFoundError(args.label) + + # publish mapping + # create_mapping(conn='74.207.235.89:32149', policy_id=args.policy_id, db_name='new_company', table_name='mnist') + # run_msg_client(conn=args.conn, policy_id=args.policy_id, topic=args.topic) + + publish_data(conn=args.conn, topic=args.topic, image_path=args.image, label_path=args.label) + # payload = { + # "image_name": image_file, + # "image": image_content, + # "label_name": label_file, + # "label": label_content + # } + # + # post_cmd(conn=args.conn, headers=) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/edgefl/data/mnist/publish_data_orig.py b/edgefl/data/mnist/publish_data_orig.py new file mode 100644 index 0000000..72544f0 --- /dev/null +++ b/edgefl/data/mnist/publish_data_orig.py @@ -0,0 +1,254 @@ +import argparse +import datetime + +import ast + +import os +import gzip + +import numpy as np +import struct +import requests +import json + + +FILE_PATH = os.path.expanduser(os.path.expandvars('$HOME/Anylog-Edgelake-Federated-Learning-Platform/edgefl/data/mnist/raw/t10k-labels-idx1-ubyte.gz')) + + +class NumpyEncoder(json.JSONEncoder): + """ Special json encoder for numpy types """ + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + return json.JSONEncoder.default(self, obj) + + +def __validate_file(file_path:str): + """ + Validate image and label file exists and converted to full path + :args: + file_path: path to image file + :params: + full_path: full path to image file + :return: + full_path: full path to image file + """ + full_path = os.path.expanduser(os.path.expandvars(file_path)) + if not os.path.isfile(full_path): + raise IOError('File does not exist: {}'.format(full_path)) + return full_path + + +def __post_data(conn:str, payload:(list or str or dict), headers:dict): + """ + Execute POST command + :args: + conn: name of database connection + payload: list of tuples containing image data + headers: dictionary of headers + :params: + response:requests response object + """ + try: + response = requests.post(url=f"http://{conn}", data=payload, headers=headers) + response.raise_for_status() + except Exception as e: + raise Exception(f"Failed to execute POST against {conn} (Error: {e})") + + +def msg_client(conn:str, topic:str): + """ + Execute `run msg client` to accept data via POST + :args: + conn:str - REST connection string + topic:str - topic name + :params: + headers:dict - REST connection headers + """ + headers = { + "command": f"run msg client where broker=rest and user-agent=anylog and log=false and topic=(name={topic} and policy=mnist-mapping)", + "User-Agent": "AnyLog/1.23" + } + + __post_data(conn=conn, payload=None, headers=headers) + + +def create_policy(conn:str, db_name:str, table_name:str): + """ + Create mapping policy if DNE + :args: + conn:str - REST connection information + db_name:str - logical database name + table_name:str - logical table name + :params: + new_policy:dict - mapping policy + response:requests.Response - HTTP response + headers:dict - HTTP headers + """ + new_policy = { + "mapping": { + "id": "mnist-mapping", + "dbms": db_name, + "table": table_name, + "schema": { + "timestamp": { + "type": "timestamp", + "default": "now()", + "bring": "[timestamp]" + }, + "image_file": { + "type": "string", + "default": "", + "value": "[image_file_name]" + }, + "image": { + "blob": True, + "bring": "[image_content]", + "extension": "ubyte", + "apply": "opencv", + "hash": "md5", + "type": "varchar" + }, + "labels_file": { + "type": "string", + "default": "", + "value": "[labels_file_name]" + }, + "label": { + "blob": True, + "bring": "[label_content]", + "extension": "ubyte", + "apply": "opencv", + "hash": "md5", + "type": "varchar" + } + } + } + } + + try: + response = requests.get(url=f"http://{conn}", + headers={"command": "blockchain get mapping where id=mnist-mapping bring.count", + "User-Agent": "AnyLog/1.23"}) + response.raise_for_status() + + except Exception as e: + raise Exception(f"Failed to execute GET against {conn} (Error: {e})") + else: + response = ast.literal_eval(response.text) + + if len(response) == 0: + headers = { + "command": "blockchain insert where policy=!new_policy and local=true and master=!ledger_conn", + "User-Agent": "AnyLog/1.23", + } + payload = f"" + __post_data(conn=conn, payload=payload, headers=headers) + + + +def read_images(file_path)->np.ndarray: + """ + Given a file_path - generate numpy array for label(s) + :args: + file_path:str - file to read content from + :prarams: + images:numpy.ndarray - extract image from file + :return: + images + """ + images = None + try: + with gzip.open(file_path, 'rb') as f: + # Read the header: magic number, number of images, number of rows, number of columns + header = f.read(16) + magic, num_images, rows, cols = struct.unpack(">IIII", header) + + # Read image data as an array of 8-bit unsigned integers + image_data = np.frombuffer(f.read(), dtype=np.uint8) + + # Reshape into (num_images, rows, cols) + images = image_data.reshape(num_images, rows, cols) + except IOError: + raise IOError("Unable to read labels from file %s" % file_path) + + return images + + +def read_labels(file_path)->np.ndarray: + """ + Given a file_path - generate numpy array for label(s) + :args: + file_path:str - file to read content from + :prarams: + labels:numpy.ndarray - extract labels from file + :return: + labels + """ + labels = None + try: + with gzip.open(file_path, 'rb') as f: + # Read the header: magic number, number of labels + header = f.read(8) + magic, num_labels = struct.unpack(">II", header) + + # Read the label data (each label is an integer) + labels = np.frombuffer(f.read(), dtype=np.uint8) + except IOError: + raise IOError("Unable to read labels from file %s" % file_path) + + return labels + + +def create_payload(db_name:str, image_file_name:str, image_content:np.ndarray, labels_file_name:str, + labels_content:np.ndarray)->str: + payload = { + "dbms": db_name, + "table": "mnist_data", + "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "image_file": image_file_name, + "image_content": image_content, + "labels_file_name": labels_file_name, + "label_content": labels_content, + } + + return json.dumps(payload, cls=NumpyEncoder) + + +def main(): + parse = argparse.ArgumentParser() + parse.add_argument('conn', type=str, default=None, help='REST connection information') + parse.add_argument('image_file', type=__validate_file, default=None, help='image gz file') + parse.add_argument('label_file', type=__validate_file, default=None, help='label gz file') + parse.add_argument('--db-name', type=str, default='my_db', help='logical database name') + parse.add_argument('--topic-name', type=str, default="mnist-mapping", help="logical topic name for msg client") + args = parse.parse_args() + + # create_policy(conn=args.conn, db_name=args.db_name, table_name="[table_name]") + # msg_client(conn=args.conn, topic=args.topic_name) + + image_content = read_images(args.image_file) + label_content = read_labels(args.label_file) + + content = create_payload(db_name=args.db_name, image_file_name=os.path.basename(args.image_file), + image_content=image_content, labels_file_name=os.path.basename(args.label_file), + labels_content=label_content) + + headers = { + 'command': 'data', + 'topic': args.topic_name, + 'User-Agent': 'AnyLog/1.23', + 'Content-Type': 'text/plain' + } + + # __post_data(conn=args.conn, payload=content, headers=headers) + + + +if __name__ == '__main__': + main() + diff --git a/edgefl/data/mnist/store_data.py b/edgefl/data/mnist/store_data.py index 203e506..05d91fe 100644 --- a/edgefl/data/mnist/store_data.py +++ b/edgefl/data/mnist/store_data.py @@ -1,46 +1,28 @@ import argparse import datetime - import ast - import os import gzip +from pydantic_core.core_schema import dataclass_args_schema +from torchvision import datasets +from email.mime.image import MIMEImage +from ipaddress import ip_address +from sqlite3.dbapi2 import paramstyle + import numpy as np import struct import requests import json +TABLE_NAME = 'mnist_data' +DATA_HEADER = { + 'command': 'data', + 'topic': None, + 'User-Agent': 'AnyLog/1.23', + 'Content-Type': 'text/plain' +} -FILE_PATH = os.path.expanduser(os.path.expandvars('$HOME/Anylog-Edgelake-Federated-Learning-Platform/edgefl/data/mnist/raw/t10k-labels-idx1-ubyte.gz')) - - -class NumpyEncoder(json.JSONEncoder): - """ Special json encoder for numpy types """ - def default(self, obj): - if isinstance(obj, np.integer): - return int(obj) - elif isinstance(obj, np.floating): - return float(obj) - elif isinstance(obj, np.ndarray): - return obj.tolist() - return json.JSONEncoder.default(self, obj) - - -def __validate_file(file_path:str): - """ - Validate image and label file exists and converted to full path - :args: - file_path: path to image file - :params: - full_path: full path to image file - :return: - full_path: full path to image file - """ - full_path = os.path.expanduser(os.path.expandvars(file_path)) - if not os.path.isfile(full_path): - raise IOError('File does not exist: {}'.format(full_path)) - return full_path def __post_data(conn:str, payload:(list or str or dict), headers:dict): @@ -77,7 +59,7 @@ def msg_client(conn:str, topic:str): __post_data(conn=conn, payload=None, headers=headers) -def create_policy(conn:str, db_name:str, table_name:str): +def create_policy(conn:str): """ Create mapping policy if DNE :args: @@ -88,44 +70,51 @@ def create_policy(conn:str, db_name:str, table_name:str): new_policy:dict - mapping policy response:requests.Response - HTTP response headers:dict - HTTP headers + :table: + CREATE TABLE IF NOT EXISTS {table_name} ( + id SERIAL PRIMARY KEY, + round_number INTEGER NOT NULL, + data_type VARCHAR(10) NOT NULL, -- 'train' or 'test' + image FLOAT[] NOT NULL, + label INTEGER NOT NULL, + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) """ new_policy = { "mapping": { "id": "mnist-mapping", - "dbms": db_name, - "table": table_name, + "dbms": 'bring [dbms]', + "table": 'bring [table]', "schema": { "timestamp": { "type": "timestamp", "default": "now()", "bring": "[timestamp]" }, - "image_file": { + "round_number": { + "type": "int", + "bring": "[round_number]" + }, + "data_type": { # train or test "type": "string", - "default": "", - "value": "[image_file_name]" + "bring": "[data_type]" }, "image": { - "blob": True, - "bring": "[image_content]", - "extension": "ubyte", - "apply": "opencv", - "hash": "md5", - "type": "varchar" - }, - "labels_file": { - "type": "string", - "default": "", - "value": "[labels_file_name]" + "type": "int", + "bring": "[image]" }, "label": { - "blob": True, - "bring": "[label_content]", - "extension": "ubyte", - "apply": "opencv", - "hash": "md5", - "type": "varchar" - } + "type": "int", + "bring": "[label]" + }, + # "live_image": { # this is what we're able to + # "blob": True, + # "bring": "[live_image]", + # "extension": "png", + # "apply": "opencv", + # "hash": "md5", + # "type": "varchar" + # } } } } @@ -150,102 +139,90 @@ def create_policy(conn:str, db_name:str, table_name:str): __post_data(conn=conn, payload=payload, headers=headers) +class MnistData: + def __init__(self): + self.train_dataset = datasets.MNIST('..', train=True, download=True) + self.test_dataset = datasets.MNIST('..', train=False, download=True) + + def __create_payload(self, db_name:str, table_name:str, round_number:int, data_type:str, image, label:int)->dict: + return { + 'dbms': db_name, + 'table': table_name, + 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%fZ'), + 'round_number': round_number, + 'data_type': data_type, + 'image': image.tolist(), + 'label': label.tolist(), + # "live_image": (image.unsqueeze(1).float() / 255.0).tolist() + } -def read_images(file_path)->np.ndarray: - """ - Given a file_path - generate numpy array for label(s) - :args: - file_path:str - file to read content from - :prarams: - images:numpy.ndarray - extract image from file - :return: - images - """ - images = None - try: - with gzip.open(file_path, 'rb') as f: - # Read the header: magic number, number of images, number of rows, number of columns - header = f.read(16) - magic, num_images, rows, cols = struct.unpack(">IIII", header) + def generate_data(self, conn_id:int, db_name:str, current_round:int, data_type:str, data_size:int, idx:int=0)->(dict, int) or None: + idx_end = idx + data_size + image = self.train_dataset.data[idx:idx_end] if data_type == 'training' else self.test_dataset.data[idx:idx_end] + label = self.train_dataset.targets[idx:idx_end] if data_type == 'training' else self.test_dataset.targets[idx:idx_end] - # Read image data as an array of 8-bit unsigned integers - image_data = np.frombuffer(f.read(), dtype=np.uint8) + # Ensure slicing is valid + if image.nelement() == 0: + return None - # Reshape into (num_images, rows, cols) - images = image_data.reshape(num_images, rows, cols) - except IOError: - raise IOError("Unable to read labels from file %s" % file_path) + payload = self.__create_payload(db_name=db_name, table_name=f'{TABLE_NAME}{conn_id}_{data_type}', + round_number=current_round, data_type=data_type, image=image, label=label) - return images + return payload, idx_end -def read_labels(file_path)->np.ndarray: +def main(): """ - Given a file_path - generate numpy array for label(s) - :args: - file_path:str - file to read content from - :prarams: - labels:numpy.ndarray - extract labels from file + Steps: + 1. against Master declare policy + python3 store_data.py 10.0.0.81:32049 --db-name mnist --declare-policy + 2. publish data :return: - labels """ - labels = None - try: - with gzip.open(file_path, 'rb') as f: - # Read the header: magic number, number of labels - header = f.read(8) - magic, num_labels = struct.unpack(">II", header) - - # Read the label data (each label is an integer) - labels = np.frombuffer(f.read(), dtype=np.uint8) - except IOError: - raise IOError("Unable to read labels from file %s" % file_path) - - return labels - - -def create_payload(db_name:str, image_file_name:str, image_content:np.ndarray, labels_file_name:str, - labels_content:np.ndarray)->str: - payload = { - "dbms": db_name, - "table": "mnist_data", - "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "image_file": image_file_name, - "image_content": image_content, - "labels_file_name": labels_file_name, - "label_content": labels_content, - } - - return json.dumps(payload, cls=NumpyEncoder) - - -def main(): parse = argparse.ArgumentParser() parse.add_argument('conn', type=str, default=None, help='REST connection information') - parse.add_argument('image_file', type=__validate_file, default=None, help='image gz file') - parse.add_argument('label_file', type=__validate_file, default=None, help='label gz file') - parse.add_argument('--db-name', type=str, default='my_db', help='logical database name') + parse.add_argument('--db-name', type=str, default='mnist', help='logical database name') + parse.add_argument('--num-rounds', type=int, default=25, help='') + parse.add_argument('--train-sample-size', type=int, default=600, help='') + parse.add_argument('--test-sample-size', type=int, default=100, help='') + + parse.add_argument('--declare-policy', type=bool, nargs='?', const=True, default=False, help='declare data mapping policy') + parse.add_argument('--declare-mqtt', type=bool, nargs='?', const=True, default=False, help='enable MQTT client') + parse.add_argument('--publish-data', type=bool, nargs='?', const=True, default=False, help='Publish data to operator node(s)') + # parse.add_argument('image_file', type=__validate_file, default=None, help='image gz file') + # parse.add_argument('label_file', type=__validate_file, default=None, help='label gz file') parse.add_argument('--topic-name', type=str, default="mnist-mapping", help="logical topic name for msg client") args = parse.parse_args() - # create_policy(conn=args.conn, db_name=args.db_name, table_name="[table_name]") - # msg_client(conn=args.conn, topic=args.topic_name) - - image_content = read_images(args.image_file) - label_content = read_labels(args.label_file) - - content = create_payload(db_name=args.db_name, image_file_name=os.path.basename(args.image_file), - image_content=image_content, labels_file_name=os.path.basename(args.label_file), - labels_content=label_content) - - headers = { - 'command': 'data', - 'topic': args.topic_name, - 'User-Agent': 'AnyLog/1.23', - 'Content-Type': 'text/plain' - } - - __post_data(conn=args.conn, payload=content, headers=headers) + conns = args.conn.split(',') + train_idx = 0 + test_idx = 0 + mnist = MnistData() + DATA_HEADER['topic'] = args.topic_name + + + for conn in conns: + if args.declare_policy is True: # declare mapping policy + create_policy(conn=conn) + if args.declare_mqtt is True: # run mqtt client + msg_client(conn=conn, topic=args.topic_name) + + if args.publish_data is True: + for round_num in range(0, args.num_rounds): + train_payload, train_idx = mnist.generate_data(db_name=args.db_name, conn_id=conns.index(conn) + 1, current_round=round_num, + data_type='train', data_size=args.train_sample_size, + idx=train_idx) + test_payload, test_idx = mnist.generate_data(db_name=args.db_name, conn_id=conns.index(conn) + 1, current_round=round_num, + data_type='test', data_size=args.test_sample_size, + idx=test_idx) + + data = [] + if train_payload is not None: + data.append(train_payload) + if test_payload is not None: + data.append(test_payload) + if data: + __post_data(conn=conn, payload=json.dumps(data), headers=DATA_HEADER) diff --git a/edgefl/data/mnist/store_data_old.py b/edgefl/data/mnist/store_data_old.py new file mode 100644 index 0000000..203e506 --- /dev/null +++ b/edgefl/data/mnist/store_data_old.py @@ -0,0 +1,254 @@ +import argparse +import datetime + +import ast + +import os +import gzip + +import numpy as np +import struct +import requests +import json + + +FILE_PATH = os.path.expanduser(os.path.expandvars('$HOME/Anylog-Edgelake-Federated-Learning-Platform/edgefl/data/mnist/raw/t10k-labels-idx1-ubyte.gz')) + + +class NumpyEncoder(json.JSONEncoder): + """ Special json encoder for numpy types """ + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + return json.JSONEncoder.default(self, obj) + + +def __validate_file(file_path:str): + """ + Validate image and label file exists and converted to full path + :args: + file_path: path to image file + :params: + full_path: full path to image file + :return: + full_path: full path to image file + """ + full_path = os.path.expanduser(os.path.expandvars(file_path)) + if not os.path.isfile(full_path): + raise IOError('File does not exist: {}'.format(full_path)) + return full_path + + +def __post_data(conn:str, payload:(list or str or dict), headers:dict): + """ + Execute POST command + :args: + conn: name of database connection + payload: list of tuples containing image data + headers: dictionary of headers + :params: + response:requests response object + """ + try: + response = requests.post(url=f"http://{conn}", data=payload, headers=headers) + response.raise_for_status() + except Exception as e: + raise Exception(f"Failed to execute POST against {conn} (Error: {e})") + + +def msg_client(conn:str, topic:str): + """ + Execute `run msg client` to accept data via POST + :args: + conn:str - REST connection string + topic:str - topic name + :params: + headers:dict - REST connection headers + """ + headers = { + "command": f"run msg client where broker=rest and user-agent=anylog and log=false and topic=(name={topic} and policy=mnist-mapping)", + "User-Agent": "AnyLog/1.23" + } + + __post_data(conn=conn, payload=None, headers=headers) + + +def create_policy(conn:str, db_name:str, table_name:str): + """ + Create mapping policy if DNE + :args: + conn:str - REST connection information + db_name:str - logical database name + table_name:str - logical table name + :params: + new_policy:dict - mapping policy + response:requests.Response - HTTP response + headers:dict - HTTP headers + """ + new_policy = { + "mapping": { + "id": "mnist-mapping", + "dbms": db_name, + "table": table_name, + "schema": { + "timestamp": { + "type": "timestamp", + "default": "now()", + "bring": "[timestamp]" + }, + "image_file": { + "type": "string", + "default": "", + "value": "[image_file_name]" + }, + "image": { + "blob": True, + "bring": "[image_content]", + "extension": "ubyte", + "apply": "opencv", + "hash": "md5", + "type": "varchar" + }, + "labels_file": { + "type": "string", + "default": "", + "value": "[labels_file_name]" + }, + "label": { + "blob": True, + "bring": "[label_content]", + "extension": "ubyte", + "apply": "opencv", + "hash": "md5", + "type": "varchar" + } + } + } + } + + try: + response = requests.get(url=f"http://{conn}", + headers={"command": "blockchain get mapping where id=mnist-mapping bring.count", + "User-Agent": "AnyLog/1.23"}) + response.raise_for_status() + + except Exception as e: + raise Exception(f"Failed to execute GET against {conn} (Error: {e})") + else: + response = ast.literal_eval(response.text) + + if len(response) == 0: + headers = { + "command": "blockchain insert where policy=!new_policy and local=true and master=!ledger_conn", + "User-Agent": "AnyLog/1.23", + } + payload = f"" + __post_data(conn=conn, payload=payload, headers=headers) + + + +def read_images(file_path)->np.ndarray: + """ + Given a file_path - generate numpy array for label(s) + :args: + file_path:str - file to read content from + :prarams: + images:numpy.ndarray - extract image from file + :return: + images + """ + images = None + try: + with gzip.open(file_path, 'rb') as f: + # Read the header: magic number, number of images, number of rows, number of columns + header = f.read(16) + magic, num_images, rows, cols = struct.unpack(">IIII", header) + + # Read image data as an array of 8-bit unsigned integers + image_data = np.frombuffer(f.read(), dtype=np.uint8) + + # Reshape into (num_images, rows, cols) + images = image_data.reshape(num_images, rows, cols) + except IOError: + raise IOError("Unable to read labels from file %s" % file_path) + + return images + + +def read_labels(file_path)->np.ndarray: + """ + Given a file_path - generate numpy array for label(s) + :args: + file_path:str - file to read content from + :prarams: + labels:numpy.ndarray - extract labels from file + :return: + labels + """ + labels = None + try: + with gzip.open(file_path, 'rb') as f: + # Read the header: magic number, number of labels + header = f.read(8) + magic, num_labels = struct.unpack(">II", header) + + # Read the label data (each label is an integer) + labels = np.frombuffer(f.read(), dtype=np.uint8) + except IOError: + raise IOError("Unable to read labels from file %s" % file_path) + + return labels + + +def create_payload(db_name:str, image_file_name:str, image_content:np.ndarray, labels_file_name:str, + labels_content:np.ndarray)->str: + payload = { + "dbms": db_name, + "table": "mnist_data", + "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + "image_file": image_file_name, + "image_content": image_content, + "labels_file_name": labels_file_name, + "label_content": labels_content, + } + + return json.dumps(payload, cls=NumpyEncoder) + + +def main(): + parse = argparse.ArgumentParser() + parse.add_argument('conn', type=str, default=None, help='REST connection information') + parse.add_argument('image_file', type=__validate_file, default=None, help='image gz file') + parse.add_argument('label_file', type=__validate_file, default=None, help='label gz file') + parse.add_argument('--db-name', type=str, default='my_db', help='logical database name') + parse.add_argument('--topic-name', type=str, default="mnist-mapping", help="logical topic name for msg client") + args = parse.parse_args() + + # create_policy(conn=args.conn, db_name=args.db_name, table_name="[table_name]") + # msg_client(conn=args.conn, topic=args.topic_name) + + image_content = read_images(args.image_file) + label_content = read_labels(args.label_file) + + content = create_payload(db_name=args.db_name, image_file_name=os.path.basename(args.image_file), + image_content=image_content, labels_file_name=os.path.basename(args.label_file), + labels_content=label_content) + + headers = { + 'command': 'data', + 'topic': args.topic_name, + 'User-Agent': 'AnyLog/1.23', + 'Content-Type': 'text/plain' + } + + __post_data(conn=args.conn, payload=content, headers=headers) + + + +if __name__ == '__main__': + main() + diff --git a/edgefl/data/winniio-rooms/publish_data.py b/edgefl/data/winniio-rooms/publish_data.py index 53bfbfc..71e4d97 100644 --- a/edgefl/data/winniio-rooms/publish_data.py +++ b/edgefl/data/winniio-rooms/publish_data.py @@ -1,119 +1,88 @@ import argparse -import csv import os.path -import ast import requests + +import pandas as pd +import numpy as np import json +from sklearn.model_selection import train_test_split +from tensorflow.python.ops.gen_experimental_dataset_ops import dataset_from_graph -def csv2dict(data_file:str)->list: - """ - Convert data in data_file into dictionary - :args: - data_file:str - file to read content from - :params: - csv_data:list - list of data in dictionary format from csv file - :return - csv_data:list - """ - csv_data = [] - try: - with open(data_file) as csvfile: - for row in csv.DictReader(csvfile): - csv_data.append(row) - except Exception as e: - raise Exception(f"Failed to extrapolate data from {data_file} (Error: {e})") - - return csv_data - - -def cleanup_data(csv_data:list)->list: - """ - Given csv data, clean it up by converting to correct data types and add logical database and table names - :args: - csv_data:list - list of data in dictionary format from csv file - db_name:str - name of database - table_name:str - name of table - process_type:str - type of process to use for cleanup - :return: - updated csv_data:list - """ - for i in range(len(csv_data)): - for column in csv_data[i].keys(): - try: - csv_data[i][column] = ast.literal_eval(csv_data[i][column]) - except ast.ExceptHandler: - # if fails ignore and keep as is - pass - - return csv_data - - -def put_data(conn:str, csv_data:list, db_name:str, table_name:str): - """ - Publish data via PUT - :args: - conn:str - REST connection string - csv_data:list - list of data in dictionary format from csv file - db_name:str - name of database - table_name:str - name of table - :params: - headers:dict - REST headers - payload:str - PUT payload - response:requests.Response - REST response - """ +def put_data(conn:str, db_name:str, table:str, payload): headers = { 'type': 'json', 'dbms': db_name, - 'table': table_name, + 'table': table, 'mode': 'streaming', 'Content-Type': 'text/plain' } + try: + for row in payload: + response = requests.put(url=f'http://{conn}', headers=headers, data=json.dumps(row)) + response.raise_for_status() + except Exception as error: + raise Exception - payload = json.dumps(csv_data) - try: - response = requests.put(f"http://{conn}", headers=headers, data=payload) - response.raise_for_status() - except Exception as e: - raise Exception(f"Failed to put data to {conn} (Error: {e})") - - -def main(): - """ - The following provides an example for publishing data into AnyLog/EdgeLake via REST PUT - used for Winni.io demo - Since the file name is room_12055.csv, I'm removing the .csv and keeping room_12055 as the table name - :positional arguments: - conn REST connection information - data_file comma seperated data files with path - options: - -h, --help show this help message and exit - --db-name DB_NAME logical database name - :params: - data_files:list - comma seperated data files with path - file_path:str - path to data file - csv_data:list - list of data in dictionary format from csv file - :sample-call: - python3.10 edgefl/data/publish_data.py 104.237.130.228:32149 edgefl/data/winniio-rooms/room_12055.csv --db-name new_company - """ + + +# Configuration +def read_file(file_path:str): + full_path = os.path.expanduser(os.path.expandvars(file_path)) + if not os.path.isfile(full_path): + raise FileNotFoundError + + dataset = pd.read_csv(full_path) + dataset['label'] = dataset['temperature'].shift(-2) + dataset.dropna(inplace=True) + + return dataset + + +def generate_data(dataset, train_split, test_split, num_rounds): + json_output = { + "train": [], + "test": [] + } + + df_train, df_test = train_test_split(dataset, train_size=train_split, test_size=test_split) + df_train_rounds = np.array_split(df_train, num_rounds) + df_test_rounds = np.array_split(df_test, num_rounds) + + for round_counter in range(1, num_rounds+1): + train_batch = df_train_rounds[round_counter - 1].copy() + train_batch['round_number'] = round_counter + train_batch['data_type'] = 'train' + + train_json = train_batch.to_dict(orient="records") + json_output["train"].append(train_json) + + test_batch = df_test_rounds[round_counter - 1].copy() + test_batch['round_number'] = round_counter + test_batch['data_type'] = 'test' + + test_json = test_batch.to_dict(orient="records") + json_output["test"].append(test_json) + + return json_output + + +def main(): parse = argparse.ArgumentParser() parse.add_argument('conn', type=str, default=None, help='REST connection information') - parse.add_argument('data_file', type=str, default=None, help='comma seperated data files with path') - parse.add_argument('--db-name', type=str, default='my_db', help='logical database name') + parse.add_argument('file_path', type=str, default=None, help='CSV file to pull data from') + parse.add_argument('--db-name', type=str, default='mnist', help='logical database name') + parse.add_argument('--num-rounds', type=int, default=5, help='') + parse.add_argument('--train-split', type=int, default=0.8, help='') + parse.add_argument('--test-split', type=int, default=0.2, help='') args = parse.parse_args() - data_files = args.data_file.split(',') # separate list of files - for data_file in data_files: - file_path = os.path.expanduser(os.path.expandvars(data_file)) - if not os.path.exists(file_path): - raise FIOError(f"File {file_path} does not exist") - csv_data = csv2dict(data_file=file_path) # read csv file - - if csv_data: - # update data in dict(s) to have proper data type (ex. '1.3' --> 1.3) - csv_data = cleanup_data(csv_data=csv_data) + dataset = read_file(file_path=args.file_path) + json_output = generate_data(dataset=dataset, train_split=args.train_split, test_split=args.test_split, num_rounds=args.num_rounds) - # publish data - put_data(conn=args.conn, csv_data=csv_data, db_name=args.db_name, table_name=os.path.basename(data_file).split('.')[0]) + table_name = args.file_path.rsplit('\\')[-1].rsplit('/', 1)[-1].split('.csv')[0] + for batch_type in json_output: + put_data(conn=args.conn, db_name=args.db_name, table=f'{table_name}_{batch_type}', payload=json_output[batch_type]) if __name__ == '__main__': diff --git a/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox-agg.env b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox-agg.env new file mode 100644 index 0000000..9d51cb0 --- /dev/null +++ b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox-agg.env @@ -0,0 +1,29 @@ + +### Note that paths are for Mac/Linux. If using Windows provide your Windows path +### an confirm that the other paths are correct. You may need to make '/' to '\' +GITHUB_DIR=/Users/roy/Github-Repos/EdgeFL + +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers +MODULE_NAME=ChestXraysBBoxDataHandler + +IMAGE_ROOT_DIR=edgefl/data/chest_xrays_bbox/raw/bbox_img + +TMP_DIR=edgefl/tmp_dir/ +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="192.168.1.125:32049" +EXTERNAL_TCP_IP_PORT="192.168.1.125:32048" + +AGG_NAME=agg + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="chest_xrays_bbox_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="192.168.1.125" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="edgefl/file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="master" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" \ No newline at end of file diff --git a/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox-db.env b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox-db.env new file mode 100644 index 0000000..f0b0230 --- /dev/null +++ b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox-db.env @@ -0,0 +1,21 @@ + +GITHUB_DIR=/Users/roy/Github-Repos/EdgeFL + +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="192.168.1.125:32049" +EXTERNAL_TCP_IP_PORT="192.168.1.125:32048" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="chest_xrays_bbox_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="127.0.0.1" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D/edgefl/file_write" + +EDGELAKE_DOCKER_RUNNING="False" +EDGELAKE_DOCKER_CONTAINER_NAME="edgelake-operator" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" + +DEBUGGER_ENABLED=True diff --git a/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox1.env b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox1.env new file mode 100644 index 0000000..66fa713 --- /dev/null +++ b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox1.env @@ -0,0 +1,28 @@ + +### Note that paths are for Mac/Linux. If using Windows provide your Windows path +### an confirm that the other paths are correct. You may need to make '/' to '\' +GITHUB_DIR=/Users/roy/Github-Repos/EdgeFL + +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers +MODULE_NAME=ChestXraysBBoxDataHandler + +IMAGE_ROOT_DIR=edgefl/data/chest_xrays_bbox/raw/bbox_img + +TMP_DIR=edgefl/tmp_dir/ +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="192.168.1.125:32149" +EXTERNAL_TCP_IP_PORT="192.168.1.125:32148" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="chest_xrays_bbox_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="192.168.1.125" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="edgefl/file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator1" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" + diff --git a/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox2.env b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox2.env new file mode 100644 index 0000000..be90be8 --- /dev/null +++ b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox2.env @@ -0,0 +1,28 @@ + +### Note that paths are for Mac/Linux. If using Windows provide your Windows path +### an confirm that the other paths are correct. You may need to make '/' to '\' +GITHUB_DIR=/Users/roy/Github-Repos/EdgeFL + +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers +MODULE_NAME=ChestXraysBBoxDataHandler + +IMAGE_ROOT_DIR=edgefl/data/chest_xrays_bbox/raw/bbox_img + +TMP_DIR=edgefl/tmp_dir/ +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="192.168.1.125:32249" +EXTERNAL_TCP_IP_PORT="192.168.1.125:32248" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="chest_xrays_bbox_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="192.168.1.125" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="edgefl/file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator2" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" + diff --git a/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox3.env b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox3.env new file mode 100644 index 0000000..378f525 --- /dev/null +++ b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox3.env @@ -0,0 +1,28 @@ + +### Note that paths are for Mac/Linux. If using Windows provide your Windows path +### an confirm that the other paths are correct. You may need to make '/' to '\' +GITHUB_DIR=/Users/roy/Github-Repos/EdgeFL + +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers +MODULE_NAME=ChestXraysBBoxDataHandler + +IMAGE_ROOT_DIR=edgefl/data/chest_xrays_bbox/raw/bbox_img + +TMP_DIR=edgefl/tmp_dir/ +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="192.168.1.125:32349" +EXTERNAL_TCP_IP_PORT="192.168.1.125:32348" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="chest_xrays_bbox_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="192.168.1.125" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="edgefl/file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator3" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" + diff --git a/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox4.env b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox4.env new file mode 100644 index 0000000..f21f671 --- /dev/null +++ b/edgefl/env_files/chest_xrays_bbox/chest_xrays_bbox4.env @@ -0,0 +1,27 @@ + +### Note that paths are for Mac/Linux. If using Windows provide your Windows path +### an confirm that the other paths are correct. You may need to make '/' to '\' +GITHUB_DIR=/Users/roy/Github-Repos/EdgeFL + +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers +MODULE_NAME=ChestXraysBBoxDataHandler + +IMAGE_ROOT_DIR=edgefl/data/chest_xrays_bbox/raw/bbox_img + +TMP_DIR=edgefl/tmp_dir/ +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="192.168.1.125:32449" +EXTERNAL_TCP_IP_PORT="192.168.1.125:32448" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="chest_xrays_bbox_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="192.168.1.125" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="edgefl/file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator4" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" diff --git a/edgefl/env_files/mnist-docker/mnist-agg.env b/edgefl/env_files/mnist-docker/mnist-agg.env new file mode 100644 index 0000000..a1127f3 --- /dev/null +++ b/edgefl/env_files/mnist-docker/mnist-agg.env @@ -0,0 +1,30 @@ + + + + +GITHUB_DIR=/app/edgefl/ + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=MnistDataHandler + +PORT=8080 +SERVER_TYPE=aggregator + +TMP_DIR=tmp_dir/agg +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="10.0.0.147:32049" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32048" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="10.0.0.147" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" +AGG_NAME=agg + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME=master +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" \ No newline at end of file diff --git a/edgefl/env_files/mnist-docker/mnist-db.env b/edgefl/env_files/mnist-docker/mnist-db.env new file mode 100644 index 0000000..d322d58 --- /dev/null +++ b/edgefl/env_files/mnist-docker/mnist-db.env @@ -0,0 +1,18 @@ +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="192.168.1.125:32049" +EXTERNAL_TCP_IP_PORT="192.168.1.125:32048" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="roy" +PSQL_DB_PASSWORD="" +PSQL_HOST="127.0.0.1" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D/edgefl/file_write" + +EDGELAKE_DOCKER_RUNNING="False" +EDGELAKE_DOCKER_CONTAINER_NAME="edgelake-operator" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" + +DEBUGGER_ENABLED=True diff --git a/edgefl/env_files/mnist-docker/mnist1.env b/edgefl/env_files/mnist-docker/mnist1.env new file mode 100644 index 0000000..81e49bf --- /dev/null +++ b/edgefl/env_files/mnist-docker/mnist1.env @@ -0,0 +1,27 @@ + + + +GITHUB_DIR=/app/edgefl + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=MnistDataHandler + +PORT=8081 +SERVER_TYPE=node +TMP_DIR=tmp_dir/node1 +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="10.0.0.147:32149" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32148" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="10.0.0.147" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator1" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" diff --git a/edgefl/env_files/mnist-docker/mnist2.env b/edgefl/env_files/mnist-docker/mnist2.env new file mode 100644 index 0000000..b0ab18f --- /dev/null +++ b/edgefl/env_files/mnist-docker/mnist2.env @@ -0,0 +1,26 @@ + + +GITHUB_DIR=/app/edgefl + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=MnistDataHandler + +PORT=8082 +SERVER_TYPE=node +TMP_DIR=tmp_dir/node2 +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="10.0.0.147:32159" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32158" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="10.0.0.147" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator2" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" diff --git a/edgefl/env_files/mnist-docker/mnist3.env b/edgefl/env_files/mnist-docker/mnist3.env new file mode 100644 index 0000000..48905d3 --- /dev/null +++ b/edgefl/env_files/mnist-docker/mnist3.env @@ -0,0 +1,26 @@ + + +GITHUB_DIR=/app/edgefl + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=MnistDataHandler + +PORT=8083 +SERVER_TYPE=node +TMP_DIR=tmp_dir/node3 +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="10.0.0.147:32169" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32168" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="10.0.0.147" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator3" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" diff --git a/edgefl/env_files/mnist-docker/mnist4.env b/edgefl/env_files/mnist-docker/mnist4.env new file mode 100644 index 0000000..78d97bd --- /dev/null +++ b/edgefl/env_files/mnist-docker/mnist4.env @@ -0,0 +1,26 @@ + + +GITHUB_DIR=/app/edgefl + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=MnistDataHandler + +PORT=8084 +SERVER_TYPE=node +TMP_DIR=tmp_dir/node4 +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="192.168.1.148:32049" +EXTERNAL_TCP_IP_PORT="104.60.100.77:32048" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="192.168.1.148" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="edgefl/file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator4" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" diff --git a/edgefl/env_files/mnist/mnist-agg.env b/edgefl/env_files/mnist/mnist-agg.env index 35f996e..ca93a8a 100644 --- a/edgefl/env_files/mnist/mnist-agg.env +++ b/edgefl/env_files/mnist/mnist-agg.env @@ -1,17 +1,28 @@ +### Note that paths are for Mac/Linux. If using Windows provide your Windows path +### an confirm that the other paths are correct. You may need to make '/' to '\' GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/custom_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=MnistDataHandler - +TMP_DIR=edgefl/tmp_dir/ # External IP Address for CURL commands to Edgelake EXTERNAL_IP="192.168.1.125:32049" EXTERNAL_TCP_IP_PORT="192.168.1.125:32048" +AGG_NAME=agg + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="192.168.56.1" +PSQL_PORT="5432" + FILE_WRITE_DESTINATION="edgefl/file_write" EDGELAKE_DOCKER_RUNNING="True" EDGELAKE_DOCKER_CONTAINER_NAME="master" -DOCKER_FILE_WRITE_DESTINATION="/app/file_write" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" \ No newline at end of file diff --git a/edgefl/env_files/mnist/mnist1.env b/edgefl/env_files/mnist/mnist1.env index 07ae40a..5c0e084 100644 --- a/edgefl/env_files/mnist/mnist1.env +++ b/edgefl/env_files/mnist/mnist1.env @@ -1,24 +1,25 @@ - +### Note that paths are for Mac/Linux. If using Windows provide your Windows path +### an confirm that the other paths are correct. You may need to make '/' to '\' GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/custom_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=MnistDataHandler - +TMP_DIR=edgefl/tmp_dir/ # External IP Address for CURL commands to Edgelake EXTERNAL_IP="192.168.1.125:32149" EXTERNAL_TCP_IP_PORT="192.168.1.125:32148" # LOCAL PSQL DB NAME -PSQL_DB_NAME="mnist_fl" -PSQL_DB_USER="demo" -PSQL_DB_PASSWORD="passwd" -PSQL_HOST="192.168.1.125" -PSQL_PORT="5432" +PSQL_DB_NAME=mnist_fl +PSQL_DB_USER=demo +PSQL_DB_PASSWORD=passwd +PSQL_HOST=192.168.56.1 +PSQL_PORT=5432 FILE_WRITE_DESTINATION="edgefl/file_write" EDGELAKE_DOCKER_RUNNING="True" EDGELAKE_DOCKER_CONTAINER_NAME="operator1" -DOCKER_FILE_WRITE_DESTINATION="/app/file_write" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" \ No newline at end of file diff --git a/edgefl/env_files/mnist/mnist2.env b/edgefl/env_files/mnist/mnist2.env index b0a43e1..025fbf8 100644 --- a/edgefl/env_files/mnist/mnist2.env +++ b/edgefl/env_files/mnist/mnist2.env @@ -1,11 +1,13 @@ +### Note that paths are for Mac/Linux. If using Windows provide your Windows path +### an confirm that the other paths are correct. You may need to make '/' to '\' GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/custom_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=MnistDataHandler - +TMP_DIR=edgefl/tmp_dir/ # External IP Address for CURL commands to Edgelake EXTERNAL_IP="192.168.1.125:32249" EXTERNAL_TCP_IP_PORT="192.168.1.125:32248" @@ -14,11 +16,11 @@ EXTERNAL_TCP_IP_PORT="192.168.1.125:32248" PSQL_DB_NAME="mnist_fl" PSQL_DB_USER="demo" PSQL_DB_PASSWORD="passwd" -PSQL_HOST="192.168.1.125" +PSQL_HOST="172.30.176.90" PSQL_PORT="5432" FILE_WRITE_DESTINATION="edgefl/file_write" EDGELAKE_DOCKER_RUNNING="True" EDGELAKE_DOCKER_CONTAINER_NAME="operator2" -DOCKER_FILE_WRITE_DESTINATION="/app/file_write" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" \ No newline at end of file diff --git a/edgefl/env_files/mnist/mnist3.env b/edgefl/env_files/mnist/mnist3.env index 78d01ef..3f0df9e 100644 --- a/edgefl/env_files/mnist/mnist3.env +++ b/edgefl/env_files/mnist/mnist3.env @@ -1,11 +1,13 @@ +### Note that paths are for Mac/Linux. If using Windows provide your Windows path +### an confirm that the other paths are correct. You may need to make '/' to '\' GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/custom_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=MnistDataHandler - +TMP_DIR=edgefl/tmp_dir # External IP Address for CURL commands to Edgelake EXTERNAL_IP="192.168.1.125:32349" EXTERNAL_TCP_IP_PORT="192.168.1.125:32348" @@ -14,11 +16,11 @@ EXTERNAL_TCP_IP_PORT="192.168.1.125:32348" PSQL_DB_NAME="mnist_fl" PSQL_DB_USER="demo" PSQL_DB_PASSWORD="passwd" -PSQL_HOST="192.168.1.125" +PSQL_HOST="172.30.176.90" PSQL_PORT="5432" FILE_WRITE_DESTINATION="edgefl/file_write" EDGELAKE_DOCKER_RUNNING="True" EDGELAKE_DOCKER_CONTAINER_NAME="operator3" -DOCKER_FILE_WRITE_DESTINATION="/app/file_write" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" \ No newline at end of file diff --git a/edgefl/env_files/mnist/mnist4.env b/edgefl/env_files/mnist/mnist4.env index 742e3ad..695aa66 100644 --- a/edgefl/env_files/mnist/mnist4.env +++ b/edgefl/env_files/mnist/mnist4.env @@ -1,8 +1,8 @@ -GITHUB_DIR=/home/roy/Anylog-Edgelake-Federated-Learning-Platform +GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/custom_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=MnistDataHandler diff --git a/edgefl/env_files/winniio-docker/winniio-agg.env b/edgefl/env_files/winniio-docker/winniio-agg.env new file mode 100644 index 0000000..6af042d --- /dev/null +++ b/edgefl/env_files/winniio-docker/winniio-agg.env @@ -0,0 +1,26 @@ +GITHUB_DIR=/app/edgefl/ + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=WinniioDataHandler + +PORT=8080 +SERVER_TYPE=aggregator +TMP_DIR=tmp_dir/agg + +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="66.175.223.243:32049" +EXTERNAL_TCP_IP_PORT="66.175.223.243:32048" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="admin" +PSQL_DB_PASSWORD="demo" +PSQL_HOST="66.175.223.243" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" +AGG_NAME=agg + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME=master +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" \ No newline at end of file diff --git a/edgefl/env_files/winniio-docker/winniio-db.env b/edgefl/env_files/winniio-docker/winniio-db.env new file mode 100644 index 0000000..9eb695c --- /dev/null +++ b/edgefl/env_files/winniio-docker/winniio-db.env @@ -0,0 +1,25 @@ + +GITHUB_DIR=$HOME/Anylog-Edgelake-Federated-Learning-Platform + +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers +MODULE_NAME=WinniioDataHandler + + +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="10.0.0.147:32049" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32048" + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="demo" +PSQL_DB_PASSWORD="passwd" +PSQL_HOST="192.168.1.125" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="edgefl/file_write" + +EDGELAKE_DOCKER_RUNNING="False" +EDGELAKE_DOCKER_CONTAINER_NAME="operator1" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" + +DEBUGGER_ENABLED=True diff --git a/edgefl/env_files/winniio-docker/winniio1.env b/edgefl/env_files/winniio-docker/winniio1.env new file mode 100644 index 0000000..95065ec --- /dev/null +++ b/edgefl/env_files/winniio-docker/winniio1.env @@ -0,0 +1,36 @@ +GITHUB_DIR=/app/edgefl + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=WinniioDataHandler + +PORT=8081 +SERVER_TYPE=node +TMP_DIR=tmp_dir/node1 + +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="97.107.131.74:32149" +EXTERNAL_TCP_IP_PORT="97.107.131.74:32148" + +# node that system_query resides on +QUERY_NODE_URL="97.107.131.74:32149" +# Edge Node containing data +EDGE_NODE_URL="97.107.131.74:32148" +# Logical database name +LOGICAL_DATABASE=mnist_fl +# Table containing trained data +TRAIN_TABLE=room_12004_train +# Table containing test data +TEST_TABLE=room_12004_test + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="admin" +PSQL_DB_PASSWORD="demo" +PSQL_HOST="97.107.131.74" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator1" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" diff --git a/edgefl/env_files/winniio-docker/winniio2.env b/edgefl/env_files/winniio-docker/winniio2.env new file mode 100644 index 0000000..89b3ee2 --- /dev/null +++ b/edgefl/env_files/winniio-docker/winniio2.env @@ -0,0 +1,36 @@ +GITHUB_DIR=/app/edgefl + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=WinniioDataHandler + +PORT=808 +SERVER_TYPE=node +TMP_DIR=tmp_dir/node2 + +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="172.232.182.97:32159" +EXTERNAL_TCP_IP_PORT="172.232.182.97:32158" + +# node that system_query resides on +QUERY_NODE_URL="172.232.182.97:32159" +# Edge Node containing data +EDGE_NODE_URL="172.232.182.97:32158" +# Logical database name +LOGICAL_DATABASE=mnist_fl +# Table containing trained data +TRAIN_TABLE=room_12055_train +# Table containing test data +TEST_TABLE=room_12055_test + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="admin" +PSQL_DB_PASSWORD="demo" +PSQL_HOST="172.232.182.97" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator2" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" diff --git a/edgefl/env_files/winniio-docker/winniio3.env b/edgefl/env_files/winniio-docker/winniio3.env new file mode 100644 index 0000000..dec6a8d --- /dev/null +++ b/edgefl/env_files/winniio-docker/winniio3.env @@ -0,0 +1,36 @@ +GITHUB_DIR=/app/edgefl + +TRAINING_APPLICATION_DIR=platform_components/data_handlers +MODULE_NAME=WinniioDataHandler + +PORT=8083 +SERVER_TYPE=node +TMP_DIR=tmp_dir/node3 + +# External IP Address for CURL commands to Edgelake +EXTERNAL_IP="172.105.28.42:32169" +EXTERNAL_TCP_IP_PORT="172.105.28.42:32168" + +# node that system_query resides on +QUERY_NODE_URL="172.105.28.42:32169" +# Edge Node containing data +EDGE_NODE_URL="172.105.28.42:32168" +# Logical database name +LOGICAL_DATABASE=mnist_fl +# Table containing trained data +TRAIN_TABLE=room_12090_train +# Table containing test data +TEST_TABLE=room_12090_test + +# LOCAL PSQL DB NAME +PSQL_DB_NAME="mnist_fl" +PSQL_DB_USER="admin" +PSQL_DB_PASSWORD="demo" +PSQL_HOST="172.105.28.42" +PSQL_PORT="5432" + +FILE_WRITE_DESTINATION="file_write" + +EDGELAKE_DOCKER_RUNNING="True" +EDGELAKE_DOCKER_CONTAINER_NAME="operator3" +DOCKER_FILE_WRITE_DESTINATION="/app/file_write" \ No newline at end of file diff --git a/edgefl/env_files/winniio/winniio-agg.env b/edgefl/env_files/winniio/winniio-agg.env index a641952..89ce2a1 100644 --- a/edgefl/env_files/winniio/winniio-agg.env +++ b/edgefl/env_files/winniio/winniio-agg.env @@ -1,17 +1,20 @@ -GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D +GITHUB_DIR=$HOME/Anylog-Edgelake-Federated-Learning-Platform -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/winniio_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=WinniioDataHandler - +SERVER_TYPE=aggregator # External IP Address for CURL commands to Edgelake -EXTERNAL_IP="192.168.1.125:32049" -EXTERNAL_TCP_IP_PORT="192.168.1.125:32048" +EXTERNAL_IP="10.0.0.147:32049" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32048" FILE_WRITE_DESTINATION="edgefl/file_write" EDGELAKE_DOCKER_RUNNING="True" EDGELAKE_DOCKER_CONTAINER_NAME="master" DOCKER_FILE_WRITE_DESTINATION="/app/file_write" + +AGG_NAME=agg +TMP_DIR=edgefl/tmp_dir/ \ No newline at end of file diff --git a/edgefl/env_files/winniio/winniio-db.env b/edgefl/env_files/winniio/winniio-db.env index 972e93c..9eb695c 100644 --- a/edgefl/env_files/winniio/winniio-db.env +++ b/edgefl/env_files/winniio/winniio-db.env @@ -1,16 +1,16 @@ -GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D +GITHUB_DIR=$HOME/Anylog-Edgelake-Federated-Learning-Platform -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/custom_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=WinniioDataHandler # External IP Address for CURL commands to Edgelake -EXTERNAL_IP="192.168.1.125:32049" -EXTERNAL_TCP_IP_PORT="192.168.1.125:32048" +EXTERNAL_IP="10.0.0.147:32049" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32048" # LOCAL PSQL DB NAME -PSQL_DB_NAME="winniio_fl" +PSQL_DB_NAME="mnist_fl" PSQL_DB_USER="demo" PSQL_DB_PASSWORD="passwd" PSQL_HOST="192.168.1.125" diff --git a/edgefl/env_files/winniio/winniio1.env b/edgefl/env_files/winniio/winniio1.env index cac15a9..99dbbc9 100644 --- a/edgefl/env_files/winniio/winniio1.env +++ b/edgefl/env_files/winniio/winniio1.env @@ -1,17 +1,17 @@ -GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D +GITHUB_DIR=$HOME/Anylog-Edgelake-Federated-Learning-Platform -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/winniio_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=WinniioDataHandler # External IP Address for CURL commands to Edgelake -EXTERNAL_IP="192.168.1.125:32149" -EXTERNAL_TCP_IP_PORT="192.168.1.125:32148" +EXTERNAL_IP="10.0.0.147:32149" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32148" # LOCAL PSQL DB NAME -PSQL_DB_NAME="winniio_fl" +PSQL_DB_NAME="mnist_fl" PSQL_DB_USER="demo" PSQL_DB_PASSWORD="passwd" PSQL_HOST="192.168.1.125" diff --git a/edgefl/env_files/winniio/winniio2.env b/edgefl/env_files/winniio/winniio2.env index 6714e77..55f6f93 100644 --- a/edgefl/env_files/winniio/winniio2.env +++ b/edgefl/env_files/winniio/winniio2.env @@ -1,17 +1,17 @@ -GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D +GITHUB_DIR=$HOME/Anylog-Edgelake-Federated-Learning-Platform -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/winniio_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=WinniioDataHandler # External IP Address for CURL commands to Edgelake -EXTERNAL_IP="192.168.1.125:32249" -EXTERNAL_TCP_IP_PORT="192.168.1.125:32248" +EXTERNAL_IP="10.0.0.147:32159" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32158" # LOCAL PSQL DB NAME -PSQL_DB_NAME="winniio_fl" +PSQL_DB_NAME="mnist_fl" PSQL_DB_USER="demo" PSQL_DB_PASSWORD="passwd" PSQL_HOST="192.168.1.125" @@ -20,5 +20,5 @@ PSQL_PORT="5432" FILE_WRITE_DESTINATION="edgefl/file_write" EDGELAKE_DOCKER_RUNNING="True" -EDGELAKE_DOCKER_CONTAINER_NAME="operator2" +EDGELAKE_DOCKER_CONTAINER_NAME="operator1" DOCKER_FILE_WRITE_DESTINATION="/app/file_write" diff --git a/edgefl/env_files/winniio/winniio3.env b/edgefl/env_files/winniio/winniio3.env index 3cb0dca..8d40073 100644 --- a/edgefl/env_files/winniio/winniio3.env +++ b/edgefl/env_files/winniio/winniio3.env @@ -1,17 +1,17 @@ -GITHUB_DIR=/Users/roy/Github-Repos/Anylog-Edgelake-CSE115D +GITHUB_DIR=$HOME/Anylog-Edgelake-Federated-Learning-Platform -TRAINING_APPLICATION_PATH=edgefl/platform_components/data_handlers/winniio_data_handler.py +TRAINING_APPLICATION_DIR=edgefl/platform_components/data_handlers MODULE_NAME=WinniioDataHandler # External IP Address for CURL commands to Edgelake -EXTERNAL_IP="192.168.1.125:32349" -EXTERNAL_TCP_IP_PORT="192.168.1.125:32348" +EXTERNAL_IP="10.0.0.147:32169" +EXTERNAL_TCP_IP_PORT="10.0.0.147:32168" # LOCAL PSQL DB NAME -PSQL_DB_NAME="winniio_fl" +PSQL_DB_NAME="mnist_fl" PSQL_DB_USER="demo" PSQL_DB_PASSWORD="passwd" PSQL_HOST="192.168.1.125" @@ -20,5 +20,5 @@ PSQL_PORT="5432" FILE_WRITE_DESTINATION="edgefl/file_write" EDGELAKE_DOCKER_RUNNING="True" -EDGELAKE_DOCKER_CONTAINER_NAME="operator3" +EDGELAKE_DOCKER_CONTAINER_NAME="operator1" DOCKER_FILE_WRITE_DESTINATION="/app/file_write" diff --git a/edgefl/mnist.npz b/edgefl/mnist.npz new file mode 100644 index 0000000..e7baa20 Binary files /dev/null and b/edgefl/mnist.npz differ diff --git a/edgefl/platform_components/EdgeLake_functions/blockchain_EL_functions.py b/edgefl/platform_components/EdgeLake_functions/blockchain_EL_functions.py index 2ab8974..20aa2a5 100644 --- a/edgefl/platform_components/EdgeLake_functions/blockchain_EL_functions.py +++ b/edgefl/platform_components/EdgeLake_functions/blockchain_EL_functions.py @@ -11,6 +11,9 @@ import socket import json +from requests import RequestException + + def insert_policy(el_url, policy): headers = { 'User-Agent': 'AnyLog/1.23', @@ -21,6 +24,16 @@ def insert_policy(el_url, policy): response = requests.post(el_url, headers=headers, data=policy) return response +# TODO: fix proper blockchain update command +# def update_policy(el_url, policy_id, policy): +# headers = { +# 'User-Agent': 'AnyLog/1.23', +# 'Content-Type': 'text/plain', +# 'command': f'blockchain update to master {policy_id} !my_policy' +# } +# response = requests.post(el_url, headers=headers, data=policy) +# return response + def delete_policy(el_url, policy_id): headers = { 'User-Agent': 'AnyLog/1.23', @@ -46,14 +59,13 @@ def check_policy_inserted(el_url, policy): response = requests.post(el_url, headers=headers, data=policy) - headers = { 'User-Agent': 'AnyLog/1.23', 'Content-Type': 'text/plain', 'command': 'get !my_policy' } - print(response.status_code) - + # print(f"check_policy_inserted: {response.status_code}") + # print(response.status_code) response = requests.get(el_url, headers=headers) retrieved_policy = json.loads(response.content.decode('utf-8')) @@ -63,6 +75,184 @@ def check_policy_inserted(el_url, policy): return False +def get_policies(el_url, policy_type='*', condition=None): + command = f'blockchain get {policy_type} {condition if condition else ""}' + headers = { + 'User-Agent': 'AnyLog/1.23', + 'Content-Type': 'text/plain', + 'command': command + } + response = requests.get(el_url, headers=headers) + if response.status_code != 200: + raise Exception(f"Request failed with status code {response.status_code}: {response.reason}. Command: {command}") + + data = response.json() # [{policy_name: {..., 'id': ..., ...}}] + policies = [] + for policy in data: + policies.append(policy[policy_type]) + return policies # [{'attr1': ..., 'attr2': ..., ...}, {'attr1': ..., 'attr2': ..., ...}, ...] + +def get_policies(el_url, policy_type='*', condition=None): + command = f'blockchain get {policy_type} {condition if condition else ""}' + headers = { + 'User-Agent': 'AnyLog/1.23', + 'Content-Type': 'text/plain', + 'command': command + } + response = requests.get(el_url, headers=headers) + if response.status_code != 200: + raise Exception(f"Request failed with status code {response.status_code}: {response.reason}. Command: {command}") + + data = response.json() # [{policy_name: {..., 'id': ..., ...}}] + policies = [] + for policy in data: + policies.append(policy[policy_type]) + return policies # [{'attr1': ..., 'attr2': ..., ...}, {'attr1': ..., 'attr2': ..., ...}, ...] + + +def get_policy_id_by_name(el_url, policy_name): + headers = { + 'User-Agent': 'AnyLog/1.23', + 'Content-Type': 'text/plain', + 'command': f'blockchain get {policy_name}' + } + response = requests.get(el_url, headers=headers) + data = response.json() + if not data: + return None + policy_id = data[0][policy_name]['id'] # [{policy_name: {..., 'id': ..., ...}}] + return policy_id + +def get_all_databases(edgelake_node_url): + """ + Gets all databases that the specified EdgeLake node is connected to. + + :param edgelake_node_url: The URL of the EdgeLake node to fetch the databases list. + :return: Set of all the connected databases. + :rtype: set() + """ + command = "get databases" + headers = { + 'User-Agent': 'AnyLog/1.23', + 'command': command, + } + + try: + # Send the POST request + response = requests.get(edgelake_node_url, headers=headers) + + # Raise an HTTPError if the response code indicates failure + response.raise_for_status() + + # Parsing response content + response_string = response.content.decode('utf-8') # Initially in bytes + lines = response_string.strip().split('\r\n') + data_lines = lines[3:] + + database_names = set() + for line in data_lines: + parts = [part.strip() for part in line.split('|')] + if parts and parts[-1] == '': + parts = parts[:-1] + if len(parts) == 6: + database_names.add(parts[0]) + + return database_names + except requests.exceptions.RequestException as e: + raise RequestException(e) + + +def connect_to_db(edgelake_node_url, db_name, user, password, ip, port): + """ + Connect to the database on the given EdgeLake node. + + :param edgelake_node_url: The URL of the EdgeLake node with the database to connect. + :param command: The Anylog command to connect databases. + """ + # TODO: add db types if necessary + command = (f"connect dbms {db_name} where type = psql" + + f" and user = {user} and password = {password}" + + f" and ip = {ip} and port = {port} and memory = true") + + headers = { + 'User-Agent': 'AnyLog/1.23', + 'command': command, + } + try: + # Send the POST request + response = requests.post(edgelake_node_url, headers=headers) + + # Raise an HTTPError if the response code indicates failure + response.raise_for_status() + + except requests.exceptions.ConnectionError as e: + raise ConnectionError(f"Unable to connect to database: {e}") + +def get_all_databases(edgelake_node_url): + """ + Gets all databases that the specified EdgeLake node is connected to. + + :param edgelake_node_url: The URL of the EdgeLake node to fetch the databases list. + :return: Set of all the connected databases. + :rtype: set() + """ + command = "get databases" + headers = { + 'User-Agent': 'AnyLog/1.23', + 'command': command, + } + + try: + # Send the POST request + response = requests.get(edgelake_node_url, headers=headers) + + # Raise an HTTPError if the response code indicates failure + response.raise_for_status() + + # Parsing response content + response_string = response.content.decode('utf-8') # Initially in bytes + lines = response_string.strip().split('\r\n') + data_lines = lines[3:] + + database_names = set() + for line in data_lines: + parts = [part.strip() for part in line.split('|')] + if parts and parts[-1] == '': + parts = parts[:-1] + if len(parts) == 6: + database_names.add(parts[0]) + + return database_names + except requests.exceptions.RequestException as e: + raise RequestException(e) + + +def connect_to_db(edgelake_node_url, db_name, user, password, ip, port): + """ + Connect to the database on the given EdgeLake node. + + :param edgelake_node_url: The URL of the EdgeLake node with the database to connect. + :param command: The Anylog command to connect databases. + """ + # TODO: add db types if necessary + command = (f"connect dbms {db_name} where type = psql" + + f" and user = {user} and password = {password}" + + f" and ip = {ip} and port = {port} and memory = true") + + headers = { + 'User-Agent': 'AnyLog/1.23', + 'command': command, + } + try: + # Send the POST request + response = requests.post(edgelake_node_url, headers=headers) + + # Raise an HTTPError if the response code indicates failure + response.raise_for_status() + + except requests.exceptions.ConnectionError as e: + raise ConnectionError(f"Unable to connect to database: {e}") + def fetch_data_from_db(edgelake_node_url, query): """ diff --git a/edgefl/platform_components/EdgeLake_functions/blockchain_EL_functions_new.py b/edgefl/platform_components/EdgeLake_functions/blockchain_EL_functions_new.py new file mode 100644 index 0000000..50c51bf --- /dev/null +++ b/edgefl/platform_components/EdgeLake_functions/blockchain_EL_functions_new.py @@ -0,0 +1,146 @@ +import ast +import requests + + +# Basic AnyLog / EdgeLake commands - we could replace this with AnyLog-API\ +def __get_cmd(node_conn:str, command:str, destination:str=None): + """ + Execute GET commands against EdgeLake + - get data nodes + - blockchain get + - sql query (requires destination) + - etc. + :args: + node_conn:str - query node connection information + command:str - query to execute (sql [db_name] ... ) + destination:str - specific TCP connection info to send request to. If not provied, send request to network + :params: + headers:dict - REST header information + response:requests.get - REST request response + :return: + query result + """ + headers = { + 'command': command, + 'User-Agent': 'AnyLog/1.23' + } + + if destination: + headers['destination'] = destination + + try: + # Send the GET request + response = requests.get(url=node_conn, headers=headers) + + # Raise an HTTPError if the response code indicates failure + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise IOError(f"Failed to execute SQL query: {e}") + except json.JSONDecodeError: + raise ValueError("The response from the request is not valid JSON.") + + try: + return response.json() + except requests.exceptions.JSONDecodeError: + try: + return ast.literal_eval(response.text) + except ValueError: + return response.text + + +def get_dbms_tables(query_node=str): + """ + From blockchain metdata (table policies) get logical daatabase and table name(s) + :args: + query_node:str - REST connection information to the Query node + :params: + db_tables:str - comma seperated list of tables + databases (db_name.table_name) + :return: + db_tables + :code-replace: + node_server.py - lines 91-96 + """ + blockchain_get = "blockchain get table bring [*][dbms] . [*][name] separator=," + db_tables = __get_cmd(node_conn=query_node, command=blockchain_get, destination=None) + return db_tables + + +def get_columns(query_node:str, db_name:str, table:str): + """ + get list of columns for a given table + :args: + query_node:str - REST connection information to the Query node + db_name:str - logical database name + table:str - logical table name + :params: + command:str - command to execute + columns:list - list of columns in table + :return: + columns + """ + command = f"get columns where dbms={db_name} and table={table} and format=json" + output = __get_cmd(node_conn=query_node, command=command, destination=None) + # remove EdgeLake defined columns + columns = [x for x in output.keys() if x not in ['row_id', 'insert_timestamp', 'tsd_name', 'tsd_id']] + return columns + +def check_tables_and_databases(query_node:str): + """ + Using blockchain metadata, check whether tables actually contain data + :args: + query_node:str - REST connection information to the Query node + :params: + db_tables:str - comma seperated list of tables + databases (db_name.table_name) + query:str - SQL query + :return: + if Success returns True, when data DNE raises ValueError + :code-replace: + node_server.py - lines 99-102 + """ + db_tables = get_dbms_tables(query_node=query_node) + query = f"sql %s format=json and stat=false select count(*) as count from %s" + + for db_table in db_tables.split(','): + db_name, table_name = db_table.split('.') + output = __get_cmd(node_conn=query_node, command=query % (db_name, table_name), destination='network') + try: + result = output['Query'][0] + if 'count' not in result or int(result['count']) <= 0: + raise ValueError(f"No data found in {db_table}.") + except Exception: + raise ValueError(f"No data found in {db_table}.") + + return True + + +def sample_queries(query_node:str, db_name:str, table_name:str, destination:str='network'): + """ + Query data basing on the columns in table + :args: + query_node:str - REST connection information to the Query node + db_name:str - logical database name + table:str - logical table name + destination:str - specific TCP connection info to send request to. If not provied, send request to network + :params: + columns:;list - list of columns in a given database.table + query:str - query to execute + output:str - raw result from query + result:list - extracted result from query + + :print: + result + :code-replace: + get_all_test_data function + """ + columns = get_columns(query_node=query_node, db_name=db_name, table=table_name) + query = f"sql {db_name} format=json and stat=false select {','.join(columns)} FROM {table_name} LIMIT 10" + output = __get_cmd(node_conn=query_node, command=query, destination=destination) + try: + result = output['Query'] + except Exception: + raise ValueError(f"No data found in {db_table}.") + + print(result) + +# check_tables_and_databases(query_node='http://10.0.0.147:32049') +sample_queries(query_node='http://10.0.0.147:32049', db_name='mnist_fl', table_name='room_12004_train', destination='network') \ No newline at end of file diff --git a/edgefl/platform_components/EdgeLake_functions/mongo_file_store.py b/edgefl/platform_components/EdgeLake_functions/mongo_file_store.py index 85b918a..0b84632 100644 --- a/edgefl/platform_components/EdgeLake_functions/mongo_file_store.py +++ b/edgefl/platform_components/EdgeLake_functions/mongo_file_store.py @@ -24,11 +24,12 @@ def write_file(edgelake_node_url, dbms, table, filename): 'User-Agent': 'AnyLog/1.23', 'Content-Type': 'application/octet-stream', # Specify binary content 'command': f'file store where dbms = {dbms} and table = {table} and dest = {filename.split("/")[-1]}' + # 'command': f'file store where dbms = {dbms} and table = {table} and dest = {filename}' } with open(filename, 'rb') as f: binary_data = f.read() - response = requests.post(edgelake_node_url, headers=headers, data=binary_data) + response = requests.post(edgelake_node_url, headers=headers, data=binary_data, verify=False) return response @@ -53,7 +54,7 @@ def create_directory_in_container(container_name, directory_path): print(f"Failed to create directory. Error: {output.decode('utf-8')}") -def copy_file_to_container(container_name, src_path, dest_path): +def copy_file_to_container(tmp_dir, container_name, src_path, dest_path): """ Copies a file from the host to a container. @@ -65,50 +66,28 @@ def copy_file_to_container(container_name, src_path, dest_path): container = client.containers.get(container_name) # Create a tar archive for the file - with tarfile.open("/tmp/temp.tar", mode="w") as tar: + with tarfile.open(f"{tmp_dir}/temp.tar", mode="w") as tar: tar.add(src_path, arcname=os.path.basename(src_path)) # Open the tar file and send it to the container - with open("/tmp/temp.tar", "rb") as tar_file: + with open(f"{tmp_dir}/temp.tar", "rb") as tar_file: container.put_archive(os.path.dirname(dest_path), tar_file) # Clean up the temporary tar file - os.remove("/tmp/temp.tar") + os.remove(f"{tmp_dir}/temp.tar") # print(f"Copied {src_path} to {container_name}:{dest_path}") # print(f"Received model params") - -def file_exists_in_container(container_name, file_path): +def copy_file_from_container(tmp_dir, container_name, src_path, dest_path): """ - Checks if a file exists inside a Docker container. + Copies a file from a container to the host machine. :param container_name: Name or ID of the container - :param file_path: Path to the file inside the container - :return: True if file exists, False otherwise - """ - client = docker.from_env() - container = client.containers.get(container_name) - - exec_result = container.exec_run(f"ls {file_path}") - - return not "No such file or directory" in exec_result.output.decode() - - -def copy_file_from_container(container_name, src_path, dest_path, max_retries=5, wait_time=1): - """ - Copies a file from a Docker container to the host machine and ensures it is fully copied. - - :param container_name: Name or ID of the Docker container :param src_path: Path of the source file inside the container - :param dest_path: Destination path on the host - :param max_retries: Number of retries for file verification - :param wait_time: Time (seconds) to wait between retries - :return: True if successful, False otherwise + :param dest_path: Destination path on the host (directory or full path) """ client = docker.from_env() container = client.containers.get(container_name) - temp_tar_path = "/tmp/temp_copy.tar" - try: # Step 1: Get file size inside the container for verification exec_result = container.exec_run(f"stat -c %s {src_path}") @@ -117,48 +96,43 @@ def copy_file_from_container(container_name, src_path, dest_path, max_retries=5, return False container_file_size = int(exec_result.output.decode().strip()) - # Step 2: Retrieve file as tar stream + # Step 2: Get the file from the container as a tar stream tar_stream, _ = container.get_archive(src_path) - # Step 3: Write tar stream to a temp file - with open(temp_tar_path, "wb") as temp_tar: + # Step 3: Extract the tar stream to the host + with open(f"{tmp_dir}/temp.tar", "wb") as temp_tar: for chunk in tar_stream: temp_tar.write(chunk) temp_tar.flush() - os.fsync(temp_tar.fileno()) # Ensure data is physically written to disk + os.fsync(temp_tar.fileno()) # Ensure data is physically written to disk # Step 4: Extract tar file - with tarfile.open(temp_tar_path, "r") as tar: - extracted_files = tar.getnames() - tar.extractall(path=os.path.dirname(dest_path)) + with tarfile.open(f"{tmp_dir}/temp.tar", mode="r") as tar: + # Extract the file to the desired host location + tar.extractall(path=os.path.dirname(dest_path), filter='fully_trusted') - # Step 5: Move extracted file to the final destination - extracted_file = os.path.join(os.path.dirname(dest_path), extracted_files[0]) - os.rename(extracted_file, dest_path) + # Step 5: Move extracted file to final destination + extracted_file = os.path.join(os.path.dirname(dest_path), os.path.basename(src_path)) + os.rename(extracted_file, dest_path) # rename file - # Step 6: Verify file integrity by checking size - for _ in range(max_retries): + # Step 6: Verify file integrity + for _ in range(5): if os.path.exists(dest_path) and os.path.getsize(dest_path) == container_file_size: print(f"✅ Successfully copied {src_path} from {container_name} to {dest_path}") - os.remove(temp_tar_path) # Cleanup temporary tar file + os.remove(f"{tmp_dir}/temp.tar") # Cleanup temporary tar file return True - time.sleep(wait_time) - + time.sleep(0.5) print( f"❌ Verification failed: Expected {container_file_size} bytes, Got {os.path.getsize(dest_path) if os.path.exists(dest_path) else 'Missing'}") - return False - except Exception as e: print(f"❌ Error: {e}") - return False - finally: - # Clean up temp tar file if it exists - if os.path.exists(temp_tar_path): - os.remove(temp_tar_path) + # Clean up the temporary tar file + if os.path.exists(f"{tmp_dir}/temp.tar"): + os.remove(f"{tmp_dir}/temp.tar") -def read_file(edgelake_node_url, file_path, dest, ip_port, container_name = None): +def read_file(edgelake_node_url, file_path, dest, ip_port): filename = file_path.split('/')[-1] headers = { 'User-Agent': 'AnyLog/1.23', @@ -171,13 +145,7 @@ def read_file(edgelake_node_url, file_path, dest, ip_port, container_name = None # print(f"FILE GET COMMAND: headers: {headers['command']}") try: response = requests.post(edgelake_node_url, headers=headers, data='') - if response.status_code == 200: - if container_name: - while not file_exists_in_container(container_name, dest): - time.sleep(3) - return response - else: - raise + return response except: errno, value = sys.exc_info()[:2] print(f'Error: {errno}: {value}') @@ -185,11 +153,6 @@ def read_file(edgelake_node_url, file_path, dest, ip_port, container_name = None def read_file_mongo(edgelake_node_url, dbms, table, filename, dest, ip_port): - # headers = { - # 'User-Agent': 'AnyLog/1.23', - # 'Content-Type': 'text/plain', - # 'command': f'file retrieve where dbms = {dbms} and table = {table} and id = {filename} and dest = {dest}' - # } headers = { 'User-Agent': 'AnyLog/1.23', @@ -209,8 +172,10 @@ def read_file_mongo(edgelake_node_url, dbms, table, filename, dest, ip_port): if __name__ == '__main__': - file_write_destination = os.getenv("FILE_WRITE_DESTINATION") - # write_file(f"http://192.168.1.118:32049", "blobs_admin", "my_table", - # f"{file_write_destination}/node1/1-replica-node1.pkl") - response = read_file(f"http://192.168.1.118:32048", "blobs_winniio_fl", "node_model_updates", "1-replica-node3.pkl", f"{file_write_destination}/aggregator/1-replica-node1.pkl", "192.53.121.36:32148") + # file_write_destination = os.getenv("FILE_WRITE_DESTINATION") + response = write_file(f"http://192.168.1.125:32049", "blobs_mydb", "admin", + f"/Users/roy/test2.txt") print(response.status_code) + # the last ip_port variable is the node in which you want to get the file from. edgelake_node_url is the node that acts as your gateway to the anylog network + # after the below command, test2.txt should be in /Users/roy/new_dir + response = read_file_mongo(f"http://192.168.1.125:32049", "blobs_mydb", "admin", "test2.txt", f"/Users/roy/my_file2.txt", "192.168.1.125:32049") diff --git a/edgefl/platform_components/aggregator/aggregator.py b/edgefl/platform_components/aggregator/aggregator.py index fc98dcd..f190717 100644 --- a/edgefl/platform_components/aggregator/aggregator.py +++ b/edgefl/platform_components/aggregator/aggregator.py @@ -11,83 +11,252 @@ import numpy as np import requests import pickle +from threading import Lock from dotenv import load_dotenv from platform_components.EdgeLake_functions.mongo_file_store import copy_file_to_container, create_directory_in_container from platform_components.EdgeLake_functions.blockchain_EL_functions import insert_policy, \ - check_policy_inserted + check_policy_inserted, delete_policy, get_policy_id_by_name, get_policies from platform_components.EdgeLake_functions.mongo_file_store import read_file, write_file, copy_file_from_container from platform_components.lib.modules.local_model_update import LocalModelUpdate from platform_components.helpers.LoadClassFromFile import load_class_from_file -CONTRACT_ADDRESS = os.getenv('CONTRACT_ADDRESS') load_dotenv() class Aggregator: - def __init__(self, ip, port): + def __init__(self, ip, port, logger): self.github_dir = os.getenv('GITHUB_DIR') - self.file_write_destination = os.path.join(self.github_dir, os.getenv("FILE_WRITE_DESTINATION")) + # self.module_name = os.getenv('MODULE_NAME') + self.edgelake_node_url = f'http://{os.getenv("EXTERNAL_IP")}' + self.edgelake_tcp_node_ip_port = f'{os.getenv("EXTERNAL_TCP_IP_PORT")}' + self.training_app_dir = os.getenv('TRAINING_APPLICATION_DIR') + + self.agg_name = os.getenv("AGG_NAME") + self.server_ip = ip self.server_port = port - # Initialize Firebase database connection - self.database_url = os.getenv('DATABASE_URL') - # init training application class reference - training_app_path = os.path.join(self.github_dir, os.getenv('TRAINING_APPLICATION_PATH')) - module_name = os.getenv('MODULE_NAME') - TrainingApp_class = load_class_from_file(training_app_path, module_name) - self.training_app = TrainingApp_class('aggregator') # Create an instance + self.logger = logger + self.logger.debug("Aggregator initializing") - self.edgelake_node_url = f'http://{os.getenv("EXTERNAL_IP")}' - self.edgelake_tcp_node_ip_port = f'{os.getenv("EXTERNAL_TCP_IP_PORT")}' + # ===== Index-specific data + self.indexes = set() + self.node_urls = {} + self.node_count = {} + self.lock = Lock() + self.minParams = {} + self.round_number = {} + + self.module_names = {} + self.module_paths = {} + self.training_apps = {} + self.databases = {} + + self.end_round = {} + # self.fetch_indexes_and_modules() + + self.file_write_destination = os.path.join(self.github_dir, os.getenv("FILE_WRITE_DESTINATION"), self.agg_name) + self.tmp_dir = os.path.join(self.github_dir, os.getenv("TMP_DIR"), self.agg_name) + self.docker_file_write_destination = None + # ===== + + # Initialize Firebase database connection + self.database_url = os.getenv('DATABASE_URL') if os.getenv("EDGELAKE_DOCKER_RUNNING").lower() == "false": self.docker_running = False else: self.docker_running = True - self.docker_file_write_destination = os.getenv("DOCKER_FILE_WRITE_DESTINATION") + + + def initialize_file_write_paths_on_index(self, index): + # Each index has only one module, so they'll also have only one file_write_path for them + if not os.path.exists(os.path.join(self.file_write_destination, index)): + os.makedirs(os.path.dirname( + f"{self.file_write_destination}/{index}/"), + exist_ok=True) + + if not os.path.exists(os.path.join(self.tmp_dir, index)): + os.makedirs(os.path.join(self.tmp_dir, index), exist_ok=True) + + if self.docker_running: + self.docker_file_write_destination = os.path.join(os.getenv("DOCKER_FILE_WRITE_DESTINATION"), self.agg_name) self.docker_container_name = os.getenv("EDGELAKE_DOCKER_CONTAINER_NAME") - create_directory_in_container(self.docker_container_name, self.docker_file_write_destination) - create_directory_in_container(self.docker_container_name,f"{self.docker_file_write_destination}/aggregator/") - - # Correctly instantiate the Fusion model here (using IterAvg as placeholder for now) - # Define or obtain hyperparameters and protocol handler for the fusion model - hyperparams = {} # Replace with actual hyperparameters as required - # protocol_handler = None # Replace with an appropriate protocol handler instance or object - - def get_contract_address(self): - headers = { - 'User-Agent': 'AnyLog/1.23', - 'Content-Type': 'text/plain', - 'command': 'get !contract' - } + create_directory_in_container(self.docker_container_name, os.path.join(self.docker_file_write_destination, index)) + # create_directory_in_container(self.docker_container_name, + # f"{self.docker_file_write_destination}/aggregator/") - response = requests.get(self.edgelake_node_url, headers=headers, data="") - if response.status_code == 200: - print(f"Contract address: {response.text}") - return response.text - else: - print(f"Failed to retrieve contract, check for active EdgeLake node") - exit(-1) - # function to call the start round function from the smart contract - def start_round(self, initParamsLink, roundNumber): + def initialize_index_on_blockchain(self, index, module_name, module_path, db_name): + if self.get_index_data_in_blockchain(index): + return { + 'status': 'error', + 'message': 'index already initialized on the blockchain' + } + + try: + data = f'''''' + success = False + while not success: + response = insert_policy(self.edgelake_node_url, data) + if response.status_code == 200: + success = True + else: + sleep(np.random.randint(2, 5)) + + if check_policy_inserted(self.edgelake_node_url, data): + success = True + + if success: + return { + 'status': 'success', + 'message': 'index initialized onto the blockchain' + } + else: + return { + 'status': 'error', + 'message': f'Request failed with status code: {response.status_code}' + } + except Exception as e: + return { + 'status': 'error', + 'message': str(e) + } + + def initialize_training_app_on_index(self, index): + try: + training_app_path = os.path.join(self.github_dir, self.module_paths[index]) + TrainingApp_class = load_class_from_file(training_app_path, self.module_names[index]) + self.training_apps[index] = TrainingApp_class('aggregator', self.databases[index]) # Create an instance at index + except Exception as e: + return { + 'status': 'error', + 'message': str(e) + } + + # On startup, indexes, modules, and module_paths caches are empty, so refill + def fetch_indexes_and_modules(self): + policies = get_policies(self.edgelake_node_url, 'index') + for policy in policies: # policy = {'attr1': ..., 'attr2': ..., ...} + index = policy['name'] + self.indexes.add(index) + self.module_names[index] = policy['module_name'] + self.module_paths[index] = policy['module_path'] + + # Each index has one training app model + def set_module_at_index(self, index, module_name, module_path): try: - # headers = { - # 'User-Agent': 'AnyLog/1.23', - # 'Content-Type': 'text/plain', - # 'command': 'edgefl insert where policy = !my_policy and local = true and edgefl = optimism' - # } + index_data = self.get_index_data_in_blockchain(index) + if index in self.module_names: # already cached module at index, don't do anything + self.logger.info(f'Index "{index}" already has a module: "{self.module_names[index]}"') + return { + 'status': 'error', + 'message': f'Index "{index}" already has a module: "{self.module_names[index]}"' + } + elif index_data: # module already stored in blockchain but not cache, so fetch + self.logger.info( + f'Index "{index}" already has a module in the blockchain: "{index_data['module_name']}". Fetching now.') + self.module_names[index] = index_data['module_name'] + self.module_paths[index] = index_data['module_path'] # self.databases[index] = index_data['db_name'] # done in the server for now + return { + 'status': 'error', + 'message': f'Index "{index}" already has a module in the blockchain: "{index_data['module_name']}". Fetching now.' + } + + # New index, so set new module + self.module_names[index] = module_name + self.module_paths[index] = module_path + # self.databases[index] = index_data['db_name'] # done in the server for now + self.logger.info(f'Added module "{module_name}" to index "{index}"') + return { + 'status': 'success', + 'message': f'Added module "{module_name}" to index {index}' + } + except Exception as e: + return { + 'status': 'error', + 'message': str(e) + } + # Gets data of specified index in blockchain if it exists, otherwise returns None + def get_index_data_in_blockchain(self, index): + where_condition = f"where name = {index}" + policies = get_policies(self.edgelake_node_url, "index", where_condition) + if not policies: + return None + if len(policies) > 1: # dev check + raise Exception(f"Multiple instances of index {index} found in the blockchain") + + return policies[0] # attributes: name, module_name, module_path, id, date, ledger + + # Deletes and inserts index-rx with updated initParams ('blockchain update to' not working) + def store_most_recent_agg_params(self, initParams_link, index): + try: + policy_name = f"{index}-r" + old_policy_id = get_policy_id_by_name(self.edgelake_node_url, policy_name) + + # Deleting old policy + delete_success = False + while old_policy_id and not delete_success: + response = delete_policy(self.edgelake_node_url, old_policy_id) + if response.status_code == 200: + delete_success = True + else: + sleep(np.random.randint(1,3)) + + # Inserting policy back in with updated initParams link + data = f'''''' + insert_success = False + while not insert_success: + response = insert_policy(self.edgelake_node_url, data) + if response.status_code == 200: + insert_success = True + else: + sleep(np.random.randint(2, 5)) + + if check_policy_inserted(self.edgelake_node_url, data): + insert_success = True + + if insert_success: + return { + 'status': 'success', + 'message': f'Successfully updated most recent aggregated model file at policy {index}-r' + } + else: + return { + 'status': 'error', + 'message': f'Request failed with status code: {response.status_code}' + } + except Exception as e: + return { + 'status': 'error', + 'message': str(e) + } + + # function to call the start round function + def start_round(self, initParams_link, round_number, index): + try: # Format data exactly like the example curl command but with your values # NOTE: ask why are we adding the node num from agg - data = f'''''' success = False while not success: @@ -100,10 +269,6 @@ def start_round(self, initParamsLink, roundNumber): if check_policy_inserted(self.edgelake_node_url, data): success = True - - # print(f"Training initialized with {roundNumber} rounds") - - # response = requests.post(self.edgelake_node_url, headers=headers, data=data) if success: return { 'status': 'success', @@ -120,62 +285,53 @@ def start_round(self, initParamsLink, roundNumber): 'message': str(e) } - def aggregate_model_params(self, node_param_download_links, ip_ports, round_number): + def fetch_decoded_params(self, decoded_params_dict, node_param_download_links, ip_ports, index): # use the node_param_download_links to get all the file # in the form of tuples, like ["('blobs_admin', 'node_model_updates', '1-replica-node1.pkl')"] # node_ref = db.reference('node_model_updates') - decoded_params = [] # Loop through each provided download link to retrieve node parameter objects for i, path in enumerate(node_param_download_links): + # Don't fetch for existing paths + if path in decoded_params_dict: + continue + try: - # make sure directory exists + # Make sure the directory exists filename = path.split('/')[-1] - os.makedirs(os.path.dirname( - f"{self.file_write_destination}/aggregator/"), - exist_ok=True) - + local_path = f'{self.file_write_destination}/{index}/{filename}' if self.docker_running: + docker_file_path = f'{self.docker_file_write_destination}/{index}/{filename}' response = read_file(self.edgelake_node_url, path, - f'{self.docker_file_write_destination}/aggregator/{filename}', ip_ports[i], self.docker_container_name) - copy_file_from_container(self.docker_container_name, - f'{self.docker_file_write_destination}/aggregator/{filename}', - f'{self.file_write_destination}/aggregator/{filename}') + docker_file_path, ip_ports[i]) + copy_file_from_container(os.path.join(self.tmp_dir, index), self.docker_container_name, + docker_file_path, + local_path) else: response = read_file(self.edgelake_node_url, path, - f'{self.file_write_destination}/aggregator/{filename}', ip_ports[i]) + local_path, ip_ports[i]) - if response.status_code == 200: - sleep(1) - # with open(f'{self.file_write_destination}/aggregator/{filename}', 'rb') as f: - # data = pickle.load(f) - - with open(f'{self.file_write_destination}/aggregator/{filename}', 'rb') as f: - data = bytearray() - while chunk := f.read(1024): - data.extend(chunk) - data = pickle.loads(data) - - if not data: - raise ValueError(f"Missing model_weights in data from file: {filename}") - # decoded_params.append(data) - - # decoded_params.append({'weights': pickle.dumps(data)}) - decoded_params.append(LocalModelUpdate(weights=data)) - # decoded_params.append(LocalModelUpdate(weights=data[0].detach().numpy())) - else: + if response.status_code != 200: raise ValueError( - f"Failed to retrieve node params from link: {filename}. HTTP Status: {response.status_code}") - except Exception as e: - if os.path.exists(f'{self.file_write_destination}/aggregator/{filename}'): - os.remove(f'{self.file_write_destination}/aggregator/{filename}') - raise ValueError(f"Error retrieving data from link {filename}: {str(e)}") + f"Failed to retrieve node params from link: {filename}. HTTP Status: {response.status_code}" + ) + + # Decode the model weights from the file + sleep(1) + with open(local_path, 'rb') as f: + data = pickle.load(f) + if not data: + raise ValueError(f"Missing model_weights in data from file: {filename}") + decoded_params_dict[path] = LocalModelUpdate(weights=data) - aggregate_params_weights = self.training_app.aggregate_model_weights(decoded_params) + except Exception as e: + self.logger.error(f"Error retrieving data from link {filename}: {str(e)}") + continue - # aggregate_params_weights = [np.array(aggregate_params_weights[0], dtype=np.float32)] + def aggregate_model_params(self, decoded_params, round_number, index): + aggregate_params_weights = self.training_apps[index].aggregate_model_weights(decoded_params) aggregate_model_update = LocalModelUpdate(weights=aggregate_params_weights) # encode params back to string @@ -185,29 +341,32 @@ def aggregate_model_params(self, node_param_download_links, ip_ports, round_numb 'newUpdates': encoded_params } - # push agg data - with open(f'{self.file_write_destination}/aggregator/{round_number}-agg_update.json', 'wb') as f: - f.write(self.encode_params(data_entry)) - f.flush() # Ensure data is written to buffer - os.fsync(f.fileno()) # Ensure data is written to disk + # TODO: will this work on windows? + file_write_path = f'{self.file_write_destination}/{index}/{round_number}-{self.agg_name}_update.json' + with open(file_write_path, 'wb') as f: + f.write(self.encode_params(data_entry)) - # print(f"Model aggregation for round {round_number} complete") if self.docker_running: - # print(f'Writing to container at {f"{self.docker_file_write_destination}/aggregator/{round_number}-agg_update.json"}') - copy_file_to_container(self.docker_container_name, f'{self.file_write_destination}/aggregator/{round_number}-agg_update.json', f'{self.docker_file_write_destination}/aggregator/{round_number}-agg_update.json') - return f'{self.docker_file_write_destination}/aggregator/{round_number}-agg_update.json' + docker_file_write_path = f'{self.docker_file_write_destination}/{index}/{round_number}-{self.agg_name}_update.json' + copy_file_to_container(os.path.join(self.tmp_dir,index), self.docker_container_name, + file_write_path, + docker_file_write_path) + return docker_file_write_path - return f'{self.file_write_destination}/aggregator/{round_number}-agg_update.json' + return file_write_path def encode_params(self, new_model_weights): serialized_data = pickle.dumps(new_model_weights) - return serialized_data def decode_params(self, encoded_model_update): - print(f"encoded_model_update: {encoded_model_update}") model_weights = pickle.loads(encoded_model_update) - print(f"model_weights: {model_weights}") return model_weights + + def inference(self, index): + return self.training_apps[index].run_inference() + + def direct_inference(self, index, data, labels): + return self.training_apps[index].direct_inference(data, labels) \ No newline at end of file diff --git a/edgefl/platform_components/aggregator/aggregator_server.py b/edgefl/platform_components/aggregator/aggregator_server.py index 0aa6fe1..65a9820 100644 --- a/edgefl/platform_components/aggregator/aggregator_server.py +++ b/edgefl/platform_components/aggregator/aggregator_server.py @@ -7,11 +7,18 @@ import argparse from dotenv import load_dotenv -import asyncio +from starlette.responses import PlainTextResponse + from platform_components.aggregator.aggregator import Aggregator +import asyncio import logging +import numpy as np +import pickle import requests import os +import threading +import time +import socket import uvicorn from fastapi import FastAPI, HTTPException, status @@ -21,7 +28,8 @@ import warnings from platform_components.lib.logger.logger_config import configure_logging - +from platform_components.lib.modules.exceptions import NodeInitializationError +import json warnings.filterwarnings("ignore") @@ -35,68 +43,221 @@ # Initialize the Aggregator instance ip = get_local_ip() port = os.getenv("SERVER_PORT", "8080") -aggregator = Aggregator(ip, port) +aggregator = Aggregator(ip, port, logger) + +# Track the training process of each index so that they can join once they're done +training_processes = {} ####### FASTAPI IMPLEMENTATION ####### class InitRequest(BaseModel): nodeUrls: list[str] + index: str + module: str + module_file: str + db_name: str class TrainingRequest(BaseModel): totalRounds: int minParams: int + index: str + +class UpdatedMinParamsRequest(BaseModel): + updatedMinParams: int + index: str + +class ContinueTrainingRequest(BaseModel): + additionalRounds: int + minParams: int + index: str + +class InferenceRequest(BaseModel): + input: list # each element in here is one data value to test + labels: list # check element type within direct_inference + # @app.route('/init', methods=['POST']) -@app.post("/init") +@app.post("/init", response_class=PlainTextResponse) def init(request: InitRequest): """Deploy the smart contract with predefined nodes.""" try: - # Initialize the nodes and send the contract address - initialize_nodes(request.nodeUrls) - logger.info(f"Initialized nodes: {request.nodeUrls}") - return { - "status": "success" - } + # Initialize the nodes on specified index and send the contract address + node_urls, index = request.nodeUrls, request.index + module_name, module_file = request.module, request.module_file + db_name = request.db_name + + # Verify filepath exists + module_path = os.path.join(aggregator.training_app_dir, module_file) + if not os.path.exists(os.path.join(os.getenv("GITHUB_DIR"), module_path)): + raise FileNotFoundError(f"Module '{module_file}' does not exist within the given path: '{module_path}'.") + + # Set up index and specific data + aggregator.indexes.add(index) + if index not in aggregator.databases: + aggregator.databases[index] = db_name + if not index in aggregator.round_number: + aggregator.round_number[index] = 1 + + initialize_nodes(node_urls, index, module_name, module_path, db_name) + + aggregator.set_module_at_index(index, module_name, module_path) + aggregator.initialize_index_on_blockchain(index, module_name, module_path, db_name) + aggregator.initialize_training_app_on_index(index) + aggregator.initialize_file_write_paths_on_index(index) + + initialized_nodes = [url for url in node_urls if url in aggregator.node_urls[index]] + failed_nodes = [url for url in node_urls if url not in aggregator.node_urls[index]] + + logger.info(f"Initialized nodes with index ({index}): {aggregator.node_urls[index]}") + return (f"{{\n'status': 'success',\n" + f" 'message': 'Initialization request finished.',\n" + f" 'initialized nodes': '{initialized_nodes}',\n" + f" 'failed nodes': '{failed_nodes}'\n" + f"}}\n") + except FileNotFoundError as e: + logger.error(f"{str(e)}") + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail={str(e)} + ) except Exception as e: - logger.error(f"Failed to initialize nodes: {str(e)}") + logger.error(f"Failed to initialize nodes with index ({index}): {str(e)}") raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=str(e) ) -def initialize_nodes(node_urls: list[str]): +def is_node_online(node_conn:tuple): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(2) + try: + s.connect(node_conn) + s.close() + return True + except (socket.timeout, socket.error): + return False + + # try: + # response = requests.get(node_url, timeout=2) + # return True + # except requests.exceptions.RequestException: + # return False + +def initialize_nodes(node_urls: list[str], index, module_name, module_path, db_name): """Send the deployed contract address to multiple node servers.""" - for urlCount, url in enumerate(node_urls): + def init_node(node_url: str): try: - my_url = node_urls[urlCount].split('/')[-1] - my_url = my_url.split(':') - logger.info(f"Initializing model at {url}") - - response = requests.post(f'{url}/init-node', json={ - 'replica_ip': my_url[0], - 'replica_port': my_url[1], - 'replica_name': f"node{urlCount+1}", + ip_port = node_url.split('/')[-1].split(':') + ip_port[1] = int(ip_port[1]) + logger.info(f"Initializing model at {node_url}") + + # Check that node is online; if it's not, then remove it from node_urls and decrement + # node_count + + if not is_node_online(tuple(ip_port)): + with aggregator.lock: + if node_url in aggregator.node_urls[index]: + aggregator.node_urls[index].remove(node_url) + aggregator.node_count[index] -= 1 + logger.warning(f"Node {node_url} is offline; skipping initialization.") + return + + with aggregator.lock: + if node_url in aggregator.node_urls[index]: # skip already initialized nodes + logger.info(f"Model at {url} already exists for index {index}.") + return + + # Reserve a replica number + replica_number = aggregator.node_count[index] + 1 + replica_name = f"node{replica_number}" + aggregator.node_count[index] = replica_number + + response = requests.post(f'{node_url}/init-node', json={ + 'replica_ip': ip_port[0], + 'replica_port': ip_port[1], + 'replica_name': replica_name, + 'replica_index': index, + 'round_number': aggregator.round_number[index], + 'module_name': module_name, + 'module_path': module_path, + 'db_name': db_name }) - if response.status_code == 200: - logger.info(f"Node at {url} initialized successfully.") + # init end_round + with aggregator.lock: + if response.status_code == 200: + aggregator.node_urls[index].add(node_url) + logger.info(f"Node at {node_url} initialized successfully.") + else: + # Rollback node count if request fails + aggregator.node_count[index] -= 1 + #---------- Sanity check pre-ss --------- # + gather_info = { + 'name': aggregator.agg_name, + 'urls':aggregator.node_urls, + 'database': aggregator.databases, + + } + logger.debug(f"Aggregator Info: {gather_info}") + # ---------- Sanity check pre-ss --------- # + raise NodeInitializationError( + status_code=response.status_code, + detail=f"Failed to initialize node at {node_url}." + ) + except Exception as e: + with aggregator.lock: + aggregator.node_count[index] -= 1 # Rollback on exception + logger.critical(f"{str(e)}") + if isinstance(e, NodeInitializationError): + raise e else: - logger.error( - f"Failed to initialize node at {url}. HTTP Status: {response.status_code}. Response: {response.text}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=str(e) + ) - except Exception as e: - logger.critical(f"Error initializing node: {str(e)}") + if index not in aggregator.node_count: + aggregator.node_count[index] = 0 + + if index not in aggregator.node_urls: + aggregator.node_urls[index] = set() + # TODO: if a node gets re-init'ed because the node server re-opened, shut down the corresponding thread and start again + threads = [] + for url in node_urls: + thread = threading.Thread(name=f"agg/init--{url}", target=init_node, args=(url,), daemon=True) + thread.start() + threads.append(thread) + time.sleep(0.1) + for i, thread in enumerate(threads): + thread.join(timeout=180) # Adjust timeout as necessary + if thread.is_alive(): + logger.warning(f"Node {i} thread timed out. Failed to initialize a node.") @app.post('/start-training') async def init_training(request: TrainingRequest): """Start the training process by setting the number of rounds.""" try: + index = request.index + if index not in aggregator.indexes: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Index {index} not found (not yet initialized)." + ) + + node_count = aggregator.node_count[index] num_rounds = request.totalRounds - min_params = request.minParams + aggregator.minParams[index] = request.minParams + + if aggregator.minParams[index] > node_count: # prevents stalling when minParams > # of active nodes; warns user + logger.info( + f"[{index}] minParams ({aggregator.minParams[index]}) is greater than number of active nodes ({node_count}). Using active nodes as minParams." + ) + aggregator.minParams[index] = node_count if num_rounds <= 0: raise HTTPException( @@ -104,89 +265,376 @@ async def init_training(request: TrainingRequest): detail="Number of rounds must be positive" ) - logger.info(f"{num_rounds} rounds of training started.") + # TODO: if a training process is in-progress, do not allow another call to /start-training + + # TODO: add a manual way to stop training (if needed) + starting_round = 1 + end_round = num_rounds initial_params = '' + logger.info(f"[{index}] {num_rounds} {'round' if num_rounds == 1 else 'rounds'} of training started.") + # Allow for independent training processes + training_thread = threading.Thread( + name=f"agg/start-training--{index}", + target=start_training, + args=(aggregator, initial_params, starting_round, end_round, index), + daemon=True + ) + training_thread.start() + + return { + "status": "success", + "message": f"Started training at index: {index}" + } + except Exception as e: + raise HTTPException( + status_code=500, + detail=str(e) + ) - for r in range(1, num_rounds + 1): - logger.info(f"Starting training round {r}") - aggregator.start_round(initial_params, r) - logger.debug("Sent initial parameters to nodes") +def start_training(aggregator, initial_params, starting_round, end_round, index): + try: + aggregator.end_round[index] = end_round + # for r in range(starting_round, end_round + 1): + while starting_round <= aggregator.end_round[index]: + r = starting_round + aggregator.round_number[index] = r + logger.info(f"[{index}] Starting training round {r}") + aggregator.start_round(initial_params, r, index) + logger.debug(f"[{index}] Sent initial parameters to nodes") # Listen for updates from nodes - new_aggregator_params = await listen_for_update_agg(min_params, r) - logger.debug("Received aggregated parameters") + new_aggregator_params = asyncio.run( + listen_for_update_agg(aggregator.minParams[index], r, index) + ) + logger.debug(f"[{index}] Received aggregated parameters") # Set initial params to newly aggregated params for the next round - initial_params = new_aggregator_params - logger.info(f"[Round {r}] Step 4 Complete: model parameters aggregated") + initial_params = new_aggregator_params # docker: /app/file_write/agg/{index}/1-agg_update.json + # print(initial_params) # debugging + logger.info(f"[{index}][Round {r}] Step 4 Complete: model parameters aggregated") + + # Track the last agg model file because it's not stored in a policy after the last round + aggregator.store_most_recent_agg_params(initial_params, index) + + # Then, update aggregator's model at 'index' + local_path_of_initial_params = f"{aggregator.file_write_destination}/{index}/{r}-{aggregator.agg_name}_update.json" + with open(local_path_of_initial_params, "rb") as f: + data = pickle.load(f) + + if data and 'newUpdates' in data: + weights = aggregator.decode_params(data['newUpdates']) + else: + aggregator.logger.error(f"[{index}] Invalid data or 'newUpdates' missing in Firestore response: {data}") + raise ValueError(f"[{index}] Invalid data or 'newUpdates' missing in Firestore response: {data}") + + aggregator.training_apps[index].update_model(weights) + + starting_round += 1 + + logger.info(f"[{index}] Training completed successfully") return { "status": "success", "message": "Training completed successfully" } + except Exception as e: + if isinstance(e, ValueError): + raise ValueError(f"[{index}] Invalid data or 'newUpdates' missing in Firestore response: {data}") + else: + raise RuntimeError(f"An error occurred during training: {str(e)}") + +@app.post('/update-minParams') +async def update_minParams(request: UpdatedMinParamsRequest): + """Update minParams at an existing index. Note that indices are specified on node initialization.""" + url = f'http://{os.getenv("EXTERNAL_IP")}' + # TODO: Rare bug, when training two different models and both are in-progress, one of them may stop when this endpoint is called...or if a node is added mid-way...not sure + try: + index = request.index + if index not in aggregator.indexes: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Index {index} not found (not yet initialized)." + ) + + check_index_response = requests.get(url, headers={ + 'User-Agent': 'AnyLog/1.23', + "command": f"blockchain get index where name = {index}" + }) + + if check_index_response.status_code != 200: + raise HTTPException( + status_code=check_index_response.status_code, + detail=check_index_response.text + ) + + index_data = check_index_response.json() + if not index_data: + raise HTTPException( + status_code=404, + detail=f"Index {index} not found in the blockchain." + ) + + node_count = aggregator.node_count[index] + aggregator.minParams[index] = request.updatedMinParams + + if aggregator.minParams[index] > node_count: # prevents stalling when minParams > # of active nodes; warns user + logger.info( + f"[{index}] minParams ({aggregator.minParams[index]}) is greater than number of active nodes ({node_count}). Using active nodes as minParams." + ) + aggregator.minParams[index] = node_count + return { + "status": "success", + "message": f"minParams successfully updated to {aggregator.minParams[index]}" + } except Exception as e: raise HTTPException( status_code=500, - detail=str(e) + detail=f"Unable to set minParams at index {index}. Have the nodes and index been initialized?" ) - -async def listen_for_update_agg(min_params, roundNumber): + +async def listen_for_update_agg(min_params, round_number, index): """Asynchronously poll for aggregated parameters from the blockchain.""" - logger.info("listening for updates...") + logger.info(f"[{index}] listening for updates...") url = f'http://{os.getenv("EXTERNAL_IP")}' + # TODO: update min_params here with aggregator.min_params since the update_minParams request doesn't affect here + # as of now + decoded_params = {} # { 'node_params_link': 'decoded_param' } + check_chances = 5 # Once this reaches <= 0, we will ignore min_params and handle accordingly while True: try: - # Check parameter count - count_response = requests.get(url, headers={ + # Fetch policies containing the node models at index and round number + params_response = requests.get(url, headers={ 'User-Agent': 'AnyLog/1.23', - "command": f"blockchain get a{roundNumber} count" + "command": f"blockchain get {index}-a{round_number}" }) - - if count_response.status_code == 200: - count_data = count_response.json() - count = len(count_data) if isinstance(count_data, list) else int(count_data) - - # If enough parameters, get the URL - if count >= min_params: - params_response = requests.get(url, headers={ - 'User-Agent': 'AnyLog/1.23', - "command": f"blockchain get a{roundNumber}" - }) - - if params_response.status_code == 200: - result = params_response.json() - if result and len(result) > 0: - # Extract all trained_params into a list - - node_params_links = [ - item[f'a{roundNumber}']['trained_params_local_path'] - for item in result - if f'a{roundNumber}' in item - ] - - ip_ports = [ - item[f'a{roundNumber}']['ip_port'] - for item in result - if f'a{roundNumber}' in item - ] - - # Aggregate the parameters - aggregated_params_link = aggregator.aggregate_model_params( - node_param_download_links=node_params_links, - ip_ports=ip_ports, - round_number=roundNumber - ) - return aggregated_params_link + params_response.raise_for_status() # != 200 + + result = params_response.json() + if result: + # Extract all trained_params into a list + node_params_links = [ + item[f'{index}-a{round_number}']['trained_params_local_path'] + for item in result + if f'{index}-a{round_number}' in item + ] + ip_ports = [ + item[f'{index}-a{round_number}']['ip_port'] + for item in result + if f'{index}-a{round_number}' in item + ] + + # Updates decoded_params with newly fetched decoded params (with node link as key) + aggregator.fetch_decoded_params( + decoded_params_dict=decoded_params, + node_param_download_links=node_params_links, + ip_ports=ip_ports, + index=index + ) + + # If enough parameters or not getting ALL parameters in time, get the URL + if len(decoded_params) >= min_params or (decoded_params and not check_chances): + aggregated_params_link = aggregator.aggregate_model_params( + decoded_params=list(decoded_params.values()), + round_number=round_number, + index=index + ) + return aggregated_params_link + + # TODO: Adjust this to decrement with >=0 decoded params, but based on nodes' training process + # Only decrement the counter when there is at least 1 decoded params + if decoded_params and check_chances: + check_chances -= 1 + + # Use most recent aggregated model link if failed to pull any node models + if not decoded_params and not check_chances: + aggregated_params_link = get_last_aggregated_params(index) + if aggregated_params_link: # but only fetch if there exists one + return aggregated_params_link + check_chances = 5 # If none, then reset and try to fetch node model links again except Exception as e: - logger.error(f"Aggregator_server.py --> Waiting for file: {e}") + logger.error(f"[{index}] Aggregator_server.py --> Waiting for file: {e}") + # TODO: see there's an alternative to this sleep e.g. sleeping for less time or using another function await asyncio.sleep(2) +@app.post('/continue-training') +async def continue_training(request: ContinueTrainingRequest): + """Continue training from the last completed round.""" + try: + index = request.index + if index not in aggregator.indexes: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Index {index} not found (not yet initialized)." + ) + + node_count = aggregator.node_count[index] + additional_rounds = request.additionalRounds + aggregator.minParams[index] = request.minParams + + if aggregator.minParams[index] > node_count: # prevents stalling when minParams > # of active nodes; warns user + logger.info( + f"[{index}] minParams ({aggregator.minParams[index]}) is greater than number of active nodes ({node_count}). Using active nodes as minParams." + ) + aggregator.minParams[index] = node_count + + if additional_rounds <= 0: + raise HTTPException( + status_code=400, + detail=f"[{index}] Invalid number of additional rounds" + ) + + # Get the last round number from the blockchain layer + last_round = get_last_round_number(index) + if last_round is None: + raise HTTPException( + status_code=400, + detail=f"[{index}] No previous training found" + ) + + # if mid training, we don't need to do anything but update the end_round value + if aggregator.round_number[index] < aggregator.end_round[index]: + aggregator.end_round[index] = aggregator.end_round[index] + additional_rounds + return { + "status": "success", + "message": f"Extended training at index {index} to round {aggregator.end_round[index]}: current round is {aggregator.round_number[index]}" + } + + + # Fetch the most recent aggregated model parameters + initial_params = get_last_aggregated_params(index) + if not initial_params: + raise HTTPException( + status_code=500, + detail=f"[{index}] Failed to fetch aggregated parameters from round {last_round}" + ) + + # TODO: if a training process is in-progress, do not allow another call to /continue-training + + # TODO: add a manual way to stop training (if needed) + + starting_round = last_round + 1 + end_round = last_round + additional_rounds + logger.info(f"[{index}] Continuing training from round {last_round}, adding {additional_rounds} more {'round' if additional_rounds == 1 else 'rounds'}.") + # Allow for independent training processes + training_thread = threading.Thread( + name=f"agg/continue-training--{index}", + target=start_training, + args=(aggregator, initial_params, starting_round, end_round, index), + daemon=True + ) + training_thread.start() + + return { + "status": "success", + "message": f"Continuing training at index from round {starting_round} to {end_round}: {index}" + } + except Exception as e: + raise HTTPException( + status_code=500, + detail=str(e) + ) + + +def get_last_round_number(index): + """Get the last completed round number from the blockchain.""" + url = f'http://{os.getenv("EXTERNAL_IP")}' + + try: + # Query the blockchain for all 'r' prefixed keys to find the highest round number + response = requests.get(url, headers={ + 'User-Agent': 'AnyLog/1.23', + "command": f"blockchain get * where [index] = {index} and [node_type] = aggregator" + }) + + if response.status_code == 200: + policies = response.json() + if not policies or not isinstance(policies, list): + return None + + # Extract round numbers from keys like '{index}-r1', '{index}-r2', etc. + highest_round_number = 0 + for policy in policies: + # if key.startswith('a') and key[1:].isdigit(): + # round_numbers.append(int(key[1:])) + key = next(iter(policy)) # it is dict form at first + if key[-1] == 'r': + break + + _, number = key.rsplit("-r", 1) + highest_round_number = max(highest_round_number, int(number)) + + if highest_round_number == 0: + return None + + return highest_round_number + else: + logger.error(f"[{index}] Error fetching keys: {response.status_code}") + return None + + except Exception as e: + logger.error(f"[{index}] Error fetching last round number: {str(e)}") + return None + + +def get_last_aggregated_params(index): + """Get the aggregated parameters from the specified round.""" + url = f'http://{os.getenv("EXTERNAL_IP")}' + try: + # Get the aggregated parameters from index-r + response = requests.get(url, headers={ + 'User-Agent': 'AnyLog/1.23', + "command": f"blockchain get {index}-r" + }) + + if response.status_code == 200: + result = response.json() + if result and isinstance(result, list) and len(result) > 0: + for item in result: + if f'{index}-r' in item and 'initParams' in item[f'{index}-r']: + return item[f'{index}-r']['initParams'] + + logger.info(f"[{index}] No aggregated parameters found in policy {index}-r") + return None + + else: + logger.error(f"[{index}] Error fetching aggregated parameters: {response.status_code}") + return None + + except Exception as e: + logger.error(f"[{index}] Error fetching aggregated parameters: {str(e)}") + return None + + +# TODO: make labels optional (...maybe user doesn't feel like getting the accuracy?) +@app.post("/direct-inference/{index}", response_class=PlainTextResponse) +async def direct_inference(index, request: InferenceRequest): + try: + results = aggregator.direct_inference(index, request.input, request.labels) + response = (f"{{" + f"'index': '{index}'," + f" 'status': 'success'," + f" 'message': 'Inference completed successfully'," + f" 'accuracy': '{str(results)}'" + f"}}\n") + return response + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e) + ) + except Exception as e: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Inference failed: {str(e)}" + ) + + if __name__ == '__main__': # Add argument parsing to make the port configurable parser = argparse.ArgumentParser(description="Run the Aggregator Server.") diff --git a/edgefl/platform_components/data_handlers/chest_xrays_bbox_data_handler.py b/edgefl/platform_components/data_handlers/chest_xrays_bbox_data_handler.py new file mode 100644 index 0000000..d92d54c --- /dev/null +++ b/edgefl/platform_components/data_handlers/chest_xrays_bbox_data_handler.py @@ -0,0 +1,278 @@ +""" +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/ +""" + +# import ast +import os +import logging + +import numpy as np +import pandas as pd +from dotenv import load_dotenv +from keras.src.metrics.metrics_utils import confusion_matrix + +# from sklearn.preprocessing import MinMaxScaler + +from platform_components.EdgeLake_functions.blockchain_EL_functions import fetch_data_from_db +from keras import layers, optimizers, models +from tensorflow.python import keras + +import tensorflow as tf +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout +from tensorflow.keras.preprocessing.image import ImageDataGenerator +from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping + +from sklearn.metrics import mean_squared_error, accuracy_score +from sklearn.metrics import mean_absolute_error +from sklearn.metrics import r2_score + +from platform_components.lib.modules.local_model_update import LocalModelUpdate +from platform_components.model_fusion_algorithms.FedAvg import FedAvg_aggregate + +from platform_components.lib.logger.logger_config import configure_logging +logger = logging.getLogger(__name__) + +load_dotenv("./../../env_files/chest_xrays_bbox/chest_xrays_bbox1.env") # change path if respective env is elsewhere + +CLASSES = [ + "Infiltrate", + "Atelectasis", + "Pneumonia", + "Cardiomegaly", + "Effusion", + "Pneumothorax", + "Mass", + "Nodule" +] + +class ChestXraysBBoxDataHandler(): + def __init__(self, node_name, db_name): + """ + Initialize. + + Args: + data_path: File path for the dataset + batch_size (int): The batch size for the data loader + **kwargs: Additional arguments, passed to super init and load_mnist_shard + """ + # configure_logging(f"node_server_{port}") + configure_logging("node_server_data_handler") + self.logger = logging.getLogger(__name__) + self.edgelake_node_url = f'http://{os.getenv("EXTERNAL_IP")}' + self.db_name = db_name + + # Data Handler Initialization + self.image_root_dir = os.path.join(os.getenv("GITHUB_DIR"), os.getenv("IMAGE_ROOT_DIR")) + self.train_df = None + self.test_df = None + + self.data_generator = None + self.training_generator = None + self.testing_generator = None + + self.preprocessor = None + + self.node_name = node_name + self.fl_model = self.model_def() + # self.initialize_model() + + def initialize_model(self): + self.load_dataset(self.node_name, 1) + self.fl_model = self.model_def() + + def model_def(self): + # num_classes = len(self.training_generator.class_indices) + + model = Sequential([ + Conv2D(32, (3, 3), activation="relu", input_shape=(224, 224, 3)), + MaxPooling2D((2, 2)), + Conv2D(64, (3, 3), activation="relu"), + MaxPooling2D((2, 2)), + Conv2D(128, (3, 3), activation="relu"), + MaxPooling2D((2, 2)), + Flatten(), + Dense(128, activation="relu"), + Dropout(0.5), + Dense(8, activation="softmax") # 7 unique labels + # Dense(num_classes, activation="softmax") + ]) + + model.compile( + optimizer="adam", + loss="categorical_crossentropy", + metrics=["accuracy"] + ) + return model + + + def load_dataset(self, node_name, round_number): + + """ + Loads the training and testing datasets by running SQL queries to fetch data. + + :param nb_points: Number of data points to fetch for training and testing datasets. + :type nb_points: int + """ + query_train = f"""sql {self.db_name} SELECT image, width, height, class, x_min, y_min, x_max, y_max FROM node_{node_name} WHERE round_number = {round_number}AND data_type = 'train'""" + query_test = f"""sql {self.db_name} SELECT image, width, height, class, x_min, y_min, x_max, y_max FROM node_{node_name} WHERE round_number = {round_number} AND data_type = 'test'""" + + try: + train_data = fetch_data_from_db(self.edgelake_node_url, query_train) + test_data = fetch_data_from_db(self.edgelake_node_url, query_test) + + # Convert the data into dataframes suitable for the data generator + train_df = pd.DataFrame(train_data["Query"], columns=["image", "width", "height", "class", "x_min", "y_min", "x_max", "y_max"]) + test_df = pd.DataFrame(test_data["Query"], columns=["image", "width", "height", "class", "x_min", "y_min", "x_max", "y_max"]) + + # Add the full path of the image; currently, image is the filename + train_df["file_path"] = train_df["image"].apply(lambda x: os.path.join(self.image_root_dir, x)) + test_df["file_path"] = test_df["image"].apply(lambda x: os.path.join(self.image_root_dir, x)) + + self.train_df = train_df + self.test_df = test_df + self.set_generators(train_df, test_df, 32) + + except Exception as e: + raise IOError(f"Error fetching datasets: {str(e)}") + + + def get_data(self): + """ + Gets pre-process chest xray bbox training and testing data. + + :return: training data + :rtype: `tuple` + """ + return self.train_df, self.test_df + + def get_weights(self): + return self.fl_model.weights + + def update_model(self, weights): + if isinstance(weights, LocalModelUpdate): + weights = weights.get("weights") + self.fl_model.set_weights(weights) + + def train(self, round_number): + self.load_dataset(node_name=self.node_name, round_number=round_number) + + early_stopping = EarlyStopping( + monitor="loss", + patience=10, + restore_best_weights=True + ) + + history = self.fl_model.fit( + self.training_generator, + callbacks=[early_stopping], + steps_per_epoch=30, + epochs=1, + verbose=1 + ) + + return self.get_weights() + + + def set_generators(self, training_data, testing_data, batch_size): + self.data_generator = ImageDataGenerator( + rescale=1.0/255.0, + rotation_range=15, + width_shift_range=0.1, + horizontal_flip=True + ) + + self.training_generator = self.data_generator.flow_from_dataframe( + dataframe=training_data, + directory=self.image_root_dir, + x_col="file_path", + y_col="class", + target_size=(224, 224), + batch_size=batch_size, + class_mode="categorical", + classes=CLASSES, + verbose=0 + ) + + self.testing_generator = self.data_generator.flow_from_dataframe( + dataframe=testing_data, + directory=self.image_root_dir, + x_col="file_path", + y_col="class", + target_size=(224, 224), + batch_size=batch_size, + class_mode="categorical", + classes=CLASSES, + verbose=0 + ) + + # def get_all_test_data(self, node_name): + # pass + + def aggregate_model_weights(self, weights): + aggregated_params = FedAvg_aggregate(weights) + return aggregated_params + + # TODO: bbox.direct_inference() + def direct_inference(self, data, labels: list[float]): + """ + Run inference on raw input data against given labels (already in WINNIIO format). + Handles data conversion and validation internally. + """ + + # Validate existence and check there is same number of data inputs as number of labels + if not data and not labels and len(data) != len(labels): + raise ValueError(f"Data and labels lists must have the same length ({len(data)} != {len(labels)}).") + + # Validate labels/predictions + if all([not isinstance(labels[0], t) for t in [int, float, str]]): + raise TypeError( + f"Labels must be a list of floats, ints, or str." + ) + pass + + def run_inference(self): + y_true = self.testing_generator.classes + y_pred = self.fl_model.predict(self.testing_generator) + y_pred = np.argmax(y_pred, axis=1) + + acc = accuracy_score(y_true, y_pred) * 100 + cm = confusion_matrix(y_true, y_pred, 9) + print(cm) + + return acc + + # TODO: bbox.regression_accuracy() (maybe) + def regression_accuracy(self, y_true, y_pred, threshold=0.1): + correct = np.abs(y_true - y_pred) / y_true < threshold + return np.mean(correct) + + + # TODO: bbox.validate_data() (maybe) + @staticmethod + def validate_data(values): + """ + Validates that the input is a dictionary with exactly six specific keys + required for sensor data. Checks for missing or extra keys and raises + a ValueError with details if the validation fails. + + Parameters: + values (dict): The dictionary to validate. Expected keys are: + - 'actuatorState' + - 'co2Value' + - 'eventCount' + - 'humidity' + - 'switchStatus' + - 'temperature' + + Raises: + TypeError: If the input is not a dictionary. + ValueError: If there are missing or extra keys, or if the dictionary + does not have exactly six keys. + + Returns: + bool: True if validation passes. + """ + pass \ No newline at end of file diff --git a/edgefl/platform_components/data_handlers/custom_data_handler.py b/edgefl/platform_components/data_handlers/custom_data_handler.py index a218916..d0089c7 100644 --- a/edgefl/platform_components/data_handlers/custom_data_handler.py +++ b/edgefl/platform_components/data_handlers/custom_data_handler.py @@ -23,8 +23,8 @@ device = "/GPU:0" if tf.config.list_physical_devices('GPU') else "/CPU:0" gpus = tf.config.list_physical_devices('GPU') if gpus: - print(device_lib.list_local_devices()) - print(tf.sysconfig.get_build_info()) + print(device_lib.list_local_devices()) # debugging + print(tf.sysconfig.get_build_info()) # debugging try: # Restrict Tensorflow to only use the first GPU tf.config.set_visible_devices(gpus[0], 'GPU') @@ -33,11 +33,12 @@ print(e) class MnistDataHandler(): - def __init__(self, node_name): + def __init__(self, node_name, db_name): # configure_logging(f"node_server_{port}") configure_logging("node_server_data_handler") self.logger = logging.getLogger(__name__) self.edgelake_node_url = f'http://{os.getenv("EXTERNAL_IP")}' + self.db_name = db_name # Data Handler Initialization self.x_train = None @@ -49,11 +50,12 @@ def __init__(self, node_name): self.training_generator = None self.node_name = node_name - + + self.fl_model = self.model_def() + # load the datasets from SQL - if self.node_name != 'aggregator': + if self.node_name != 'aggregator': # for now, aggregator only allows for direct inference (self.x_train, self.y_train), (self.x_test, self.y_test) = self.load_dataset(node_name, 1) - self.fl_model = self.model_def() # pre-process the datasets self.preprocess() @@ -130,6 +132,57 @@ def run_inference(self): return acc + def direct_inference(self, data, labels: list[int]): + """ + Run inference on raw input data against given labels (already in MNIST format). + Handles data conversion and validation internally. + """ + # TODO: add another input type that allows for raw images to work (would be converted properly) + + # Validate existence and check that there is the same number of data inputs as number of labels + if not data and not labels and len(data) != len(labels): + raise ValueError(f"Data and labels lists must have the same length ({len(data)} != {len(labels)}).") + + # Validate labels/predictions and convert labels into a numpy array + if not isinstance(labels[0], int): + raise TypeError( + f"Labels must be a list of integers." + ) + + # Set up the test data properly + test_images = [] + test_labels = [] + for image, label in zip(data, labels): + # Convert data type to required numpy array + image = np.array(image, dtype=np.float32) + # Validate input dimensions and reshape + if image.ndim not in [1, 3]: + raise ValueError( + f"Invalid input dimensions ({image.ndim}D). " + "Expected 1D (784 elements) or 3D (28x28x1) array." + ) + if image.size != 784: + raise ValueError( + f"1D input must contain exactly 784 elements. Got {image.size}." + ) + test_images.append(image) + test_labels.append(label) + + # Convert test data into final numpy arrays + img_rows, img_cols = 28, 28 + test_images_final = np.array(test_images, dtype=np.float32).reshape(-1, img_rows, img_cols, 1) + test_labels_final = np.array(test_labels, dtype=np.int64) + + # Get predictions + with tf.device(device): + predictions = self.fl_model.predict(test_images_final) + y_pred = np.argmax(predictions, axis=1) + + # Calculate accuracy + acc = accuracy_score(test_labels_final, y_pred) * 100 + + return acc + def train(self, round_number): (x_train, y_train), (x_test, y_test) = self.load_dataset( node_name=self.node_name, round_number=round_number) @@ -167,21 +220,21 @@ def get_all_test_data(self, node_name): # 2. check if number returned equals number in db # 3. return test data batch_amount = 50 # TODO: make this parameterized - db_name = os.getenv("PSQL_DB_NAME") + # db_name = os.getenv("PSQL_DB_NAME") # Get number of rows - row_count_query = f"sql {db_name} SELECT count(*) FROM node_{node_name} WHERE data_type = 'test'" + row_count_query = f"sql {self.db_name} SELECT count(*) FROM node_{node_name} WHERE data_type = 'test'" row_count = fetch_data_from_db(self.edgelake_node_url, row_count_query) num_rows = row_count["Query"][0].get('count(*)') # fetch in offsets of 50 # TODO: Get row offset queries to work for offset in range(1): # for offset in range(0, num_rows, batch_amount): - query_test = f"sql {db_name} SELECT image, label FROM node_{node_name} WHERE data_type = 'test' LIMIT 50" + query_test = f"sql {self.db_name} SELECT image, label FROM node_{node_name} WHERE data_type = 'test' LIMIT 50" test_data = fetch_data_from_db(self.edgelake_node_url, query_test) # Assuming the data is returned as dictionaries with keys 'x' and 'y' - query_test_result = np.array(test_data["Query"]) + query_test_result = np.array(test_data["Query"]) # TODO: watch out when exceeding max rounds stored in the db x_test_images = [] y_test_labels = [] for i in range(len(query_test_result)): @@ -222,9 +275,9 @@ def load_dataset(self, node_name, round_number): # self.logger.debug(query_train) # query_test = f"SELECT * FROM test-{node_name}-{round_number}" - db_name = os.getenv("PSQL_DB_NAME") - query_train = f"sql {db_name} SELECT image, label FROM node_{node_name} WHERE round_number = {round_number} AND data_type = 'train'" - query_test = f"sql {db_name} SELECT image, label FROM node_{node_name} WHERE round_number = {round_number} AND data_type = 'test'" + # db_name = os.getenv("PSQL_DB_NAME") + query_train = f"sql {self.db_name} SELECT image, label FROM node_{node_name} WHERE round_number = {round_number} AND data_type = 'train'" + query_test = f"sql {self.db_name} SELECT image, label FROM node_{node_name} WHERE round_number = {round_number} AND data_type = 'test'" try: train_data = fetch_data_from_db(self.edgelake_node_url, query_train) @@ -263,4 +316,4 @@ def load_dataset(self, node_name, round_number): except Exception as e: raise IOError(f"Error fetching datasets: {str(e)}") - return (x_train_images_final, y_train_label_final), (x_test_images_final, y_test_label_final) + return (x_train_images_final, y_train_label_final), (x_test_images_final, y_test_label_final) \ No newline at end of file diff --git a/edgefl/platform_components/data_handlers/winniio_data_handler.py b/edgefl/platform_components/data_handlers/winniio_data_handler.py index c8072c1..40bad84 100644 --- a/edgefl/platform_components/data_handlers/winniio_data_handler.py +++ b/edgefl/platform_components/data_handlers/winniio_data_handler.py @@ -9,6 +9,7 @@ import logging import numpy as np +import requests # import pandas as pd # from sklearn.preprocessing import MinMaxScaler @@ -26,8 +27,41 @@ from platform_components.lib.logger.logger_config import configure_logging logger = logging.getLogger(__name__) + +# node that system_query resides on +QUERY_NODE_URL=f"http:{os.getenv('QUERY_NODE_URL')}" +# Edge Node containing data +EDGE_NODE_URL=os.getenv('EDGE_NODE_URL', 'network') +# Logical database name +LOGICAL_DATABASE=os.getenv('LOGICAL_DATABASE') +# Table containing trained data +TRAIN_TABLE=os.getenv('TRAIN_TABLE') +# Table containing test data +TEST_TABLE=os.getenv('TEST_TABLE') + + +def get_data(query:str, is_query:bool=True): + logger.debug(f'Query: {query}') + headers = { + 'command': query, + "User-Agent": 'AnyLog/1.23' + } + if is_query is True: + headers['destination'] = EDGE_NODE_URL + try: + response = requests.get(url=QUERY_NODE_URL, headers=headers) + response.raise_for_status() + except Exception as error: + logger.error(Exception(f"Failed to execute GET against {QUERY_NODE_URL} (Error: {error})")) + try: + output = response.json() + except: + output = response.text + return output + + class WinniioDataHandler(): - def __init__(self, node_name): + def __init__(self, node_name, db_name): """ Initialize. @@ -40,6 +74,7 @@ def __init__(self, node_name): configure_logging("node_server_data_handler") self.logger = logging.getLogger(__name__) self.edgelake_node_url = f'http://{os.getenv("EXTERNAL_IP")}' + self.db_name = db_name # Data Handler Initialization self.x_train = None @@ -53,8 +88,7 @@ def __init__(self, node_name): # (self.x_train, self.y_train), (self.x_test, self.y_test) = self.load_dataset(node_name, 1) # self.logger.debug("AFTER LOAD DATASET") self.node_name = node_name - if self.node_name != 'aggregator': - self.fl_model = self.model_def() + self.fl_model = self.model_def() def model_def(self): time_steps = 1 @@ -92,16 +126,18 @@ def load_dataset(self, node_name, round_number): # self.logger.debug(query_train) # query_test = f"SELECT * FROM test-{node_name}-{round_number}" - db_name = os.getenv("PSQL_DB_NAME") - query_train = f"sql {db_name} SELECT actuatorState, co2Value, eventCount, humidity, switchStatus, temperature, label FROM node_{node_name} WHERE round_number = {round_number} AND data_type = 'train'" - query_test = f"sql {db_name} SELECT actuatorState, co2Value, eventCount, humidity, switchStatus, temperature, label FROM node_{node_name} WHERE round_number = {round_number} AND data_type = 'test'" + # db_name = os.getenv("PSQL_DB_NAME") + query_train = f"sql {LOGICAL_DATABASE} format=json and stat=false SELECT actuatorState, co2Value, eventCount, humidity, switchStatus, temperature, label FROM {TRAIN_TABLE} WHERE round_number = {round_number} AND data_type = 'train'" + query_test = f"sql {LOGICAL_DATABASE} format=json and stat=false SELECT actuatorState, co2Value, eventCount, humidity, switchStatus, temperature, label FROM {TEST_TABLE} WHERE round_number = {round_number} AND data_type = 'test'" try: - train_data = fetch_data_from_db(self.edgelake_node_url, query_train) - test_data = fetch_data_from_db(self.edgelake_node_url, query_test) + # train_data = fetch_data_from_db(self.edgelake_node_url, query_train) + # test_data = fetch_data_from_db(self.edgelake_node_url, query_test) + train_data = get_data(query=query_train, is_query=True) + test_data = get_data(query=query_test, is_query=True) # Assuming the data is returned as dictionaries with keys 'x' and 'y' - query_train_result = np.array(train_data["Query"]) + query_train_result = np.array(train_data["Query"]) # TODO: watch out when exceeding max rounds stored in the db x_train_images = [] y_train_labels = [] for i in range(len(query_train_result)): @@ -196,11 +232,12 @@ def get_all_test_data(self, node_name): # 1. run sql to get all test data for x and y # 2. check if number returned equals number in db # 3. return test data - db_name = os.getenv("PSQL_DB_NAME") + # db_name = os.getenv("PSQL_DB_NAME") # query_test = f"sql {db_name} SELECT image, label FROM node_{node_name} WHERE data_type = 'test'" - query_test = f"sql {db_name} SELECT actuatorState, co2Value, eventCount, humidity, switchStatus, temperature, label FROM node_{node_name} WHERE data_type = 'test'" + query_test = f"sql {LOGICAL_DATABASE} SELECT actuatorState, co2Value, eventCount, humidity, switchStatus, temperature, label FROM {TEST_TABLE} WHERE data_type = 'test'" - test_data = fetch_data_from_db(self.edgelake_node_url, query_test) + # test_data = fetch_data_from_db(self.edgelake_node_url, query_test) + test_data = get_data(query_test=query_test, is_query=True) # Assuming the data is returned as dictionaries with keys 'x' and 'y' query_test_result = np.array(test_data["Query"]) @@ -224,11 +261,56 @@ def aggregate_model_weights(self, weights): aggregated_params = FedAvg_aggregate(weights) return aggregated_params - def direct_inference(self, data): - data = data.reshape(-1, 1, 6) - predictions = self.fl_model.predict_on_batch(data) + def direct_inference(self, data, labels: list[float]): + """ + Run inference on raw input data against given labels (already in WINNIIO format). + Handles data conversion and validation internally. + """ + + # Validate existence and check there is same number of data inputs as number of labels + if not data and not labels and len(data) != len(labels): + raise ValueError(f"Data and labels lists must have the same length ({len(data)} != {len(labels)}).") + + # Validate labels/predictions + if all([not isinstance(labels[0], t) for t in [int, float, str]]): + raise TypeError( + f"Labels must be a list of floats, ints, or str." + ) + + # Set up the test data properly + test_data = [] + test_labels = [] + for values, label in zip(data, labels): + self.validate_sensor_data(values) # values: {'exact_key1': str, 'exact_key2': str, ...} + values = np.array(list(values.values()), dtype=np.float32) # just get the floats + test_data.append(values) + test_labels.append(label) + + # Convert test data into final numpy arrays + test_data_final = np.array(test_data, dtype=np.float32).reshape(-1, 1, 6) + test_labels_final = np.array(test_labels, dtype=np.float32) + + # Get prediction (on-batch) and reshape + predictions = self.fl_model.predict_on_batch(test_data_final) + predictions = predictions.reshape(-1) + + # Set up prediction results + i = 1 + res = {} + for prediction, label in zip(predictions, test_labels_final): + res[i] = f"{prediction} --> {label}" + i += 1 + if len(res) == 10: + break + + # Do data calculations to send along with results + mae = mean_absolute_error(test_labels_final, predictions) + mse = mean_squared_error(test_labels_final, predictions) + rmse = np.sqrt(mse) + r2 = r2_score(test_labels_final, predictions) + reg_accuracy = self.regression_accuracy(test_labels_final, predictions, threshold=0.1) self.logger.info(f"[Inference] Step 5: Edge inference complete") - return predictions.reshape(-1) + return {"results": str(res), "mae": mae, "mse": mse, "rmse": rmse, "r2": r2, "reg_accuracy": reg_accuracy} def run_inference(self): x_test_images, y_test_labels = self.get_all_test_data(self.node_name) @@ -272,4 +354,55 @@ def run_inference(self): def regression_accuracy(self, y_true, y_pred, threshold=0.1): correct = np.abs(y_true - y_pred) / y_true < threshold - return np.mean(correct) \ No newline at end of file + return np.mean(correct) + + @staticmethod + def validate_sensor_data(values): + """ + Validates that the input is a dictionary with exactly six specific keys + required for sensor data. Checks for missing or extra keys and raises + a ValueError with details if the validation fails. + + Parameters: + values (dict): The dictionary to validate. Expected keys are: + - 'actuatorState' + - 'co2Value' + - 'eventCount' + - 'humidity' + - 'switchStatus' + - 'temperature' + + Raises: + TypeError: If the input is not a dictionary. + ValueError: If there are missing or extra keys, or if the dictionary + does not have exactly six keys. + + Returns: + bool: True if validation passes. + """ + required_keys = { + "actuatorState", "co2Value", "eventCount", + "humidity", "switchStatus", "temperature" + } + + if not isinstance(values, dict): + raise TypeError(f"Expected dict, got {type(values).__name__} for test data: {values}") + + keys = set(values.keys()) + missing = required_keys - keys + extra = keys - required_keys + + if missing or extra: + error = [] + if missing: + error.append(f"Missing keys: {sorted(missing)}") + if extra: + error.append(f"Extra keys: {sorted(extra)}") + raise ValueError(". ".join(error)) + + if len(values) != 6: + raise ValueError( + f"Expected 6 keys, got {len(values)} keys: {values}" + ) + + return True \ No newline at end of file diff --git a/edgefl/platform_components/lib/logger/logger_config.py b/edgefl/platform_components/lib/logger/logger_config.py index 2eccf77..c9c4dff 100644 --- a/edgefl/platform_components/lib/logger/logger_config.py +++ b/edgefl/platform_components/lib/logger/logger_config.py @@ -27,11 +27,11 @@ def configure_logging(file_type="app"): format="[%(levelname)s] %(message)s", handlers=[ logging.FileHandler(f"{log_directory}/{file_type}.log"), - logging.StreamHandler() + logging.StreamHandler(sys.stdout) ] ) # Adjust console handler level separately (since basicConfig doesn't let us set different levels) for handler in logging.getLogger().handlers: if isinstance(handler, logging.StreamHandler) and not isinstance(handler, logging.FileHandler): - handler.setLevel(console_level) + handler.setLevel(console_level) \ No newline at end of file diff --git a/edgefl/platform_components/lib/modules/exceptions.py b/edgefl/platform_components/lib/modules/exceptions.py new file mode 100644 index 0000000..422ea0e --- /dev/null +++ b/edgefl/platform_components/lib/modules/exceptions.py @@ -0,0 +1,7 @@ +class NodeInitializationError(Exception): + """Raised when a node fails to initialize properly.""" + def __init__(self, status_code, detail): + self.status_code = status_code + self.detail = detail + super().__init__(f"Node initialization failed ({status_code}): {detail}") + diff --git a/edgefl/platform_components/node/node.py b/edgefl/platform_components/node/node.py index ed7d61a..02e0397 100644 --- a/edgefl/platform_components/node/node.py +++ b/edgefl/platform_components/node/node.py @@ -13,7 +13,8 @@ import numpy as np # from keras import layers, optimizers, models -from platform_components.EdgeLake_functions.blockchain_EL_functions import insert_policy, check_policy_inserted +from platform_components.EdgeLake_functions.blockchain_EL_functions import insert_policy, check_policy_inserted, \ + get_policies from platform_components.EdgeLake_functions.mongo_file_store import copy_file_to_container, create_directory_in_container from platform_components.EdgeLake_functions.mongo_file_store import read_file, write_file, copy_file_from_container from platform_components.lib.logger.logger_config import configure_logging @@ -24,40 +25,133 @@ class Node: - def __init__(self, replica_name, ip, port): + def __init__(self, replica_name, ip, port, logger): self.github_dir = os.getenv('GITHUB_DIR') - self.file_write_destination = os.path.join(self.github_dir, os.getenv("FILE_WRITE_DESTINATION")) + self.edgelake_node_url = f'http://{os.getenv("EXTERNAL_IP")}' + self.edgelake_tcp_node_ip_port = f'{os.getenv("EXTERNAL_TCP_IP_PORT")}' + + self.replica_name = replica_name self.node_ip = ip self.node_port = port - configure_logging(f"node_server_{port}") - self.logger = logging.getLogger(__name__) + self.logger = logger self.logger.debug("Node initializing") + # ===== Index-specific data + self.indexes = set() + # self.indexes.add(index) + # self.replica_names = {} + self.data_batches = {} # {'index1': [], 'index2': [], ...} + self.round_number = {} + + self.module_names = {} + self.module_paths = {} + self.data_handlers = {} + self.databases = {} + # self.fetch_indexes_and_modules() + + self.file_write_destination = os.path.join(self.github_dir, os.getenv("FILE_WRITE_DESTINATION"), self.replica_name) + self.tmp_dir = os.path.join(self.github_dir, os.getenv("TMP_DIR"), self.replica_name) + self.docker_file_write_destination = None + # ===== + + # Initialize Firebase database connection self.database_url = os.getenv("DATABASE_URL") - self.edgelake_node_url = f'http://{os.getenv("EXTERNAL_IP")}' - self.edgelake_tcp_node_ip_port = f'{os.getenv("EXTERNAL_TCP_IP_PORT")}' self.mongo_db_name = os.getenv('MONGO_DB_NAME') - self.replicaName = replica_name - - # init training application class reference - training_app_path = os.path.join(self.github_dir, os.getenv('TRAINING_APPLICATION_PATH')) - module_name = os.getenv('MODULE_NAME') - TrainingApp_class = load_class_from_file(training_app_path, module_name) - self.data_handler = TrainingApp_class(self.replicaName) # Create an instance if os.getenv("EDGELAKE_DOCKER_RUNNING").lower() == "false": self.docker_running = False else: self.docker_running = True - self.docker_file_write_destination = os.getenv("DOCKER_FILE_WRITE_DESTINATION") + + def initialize_specific_node_on_index(self, index, module_name, module_path): + # Initializing index specific data in this node + self.initialize_index(index) + self.set_module_at_index(index, module_name, module_path) + self.initialize_training_app_on_index(index) + self.initialize_file_write_paths_on_index(index) + + def initialize_index(self, index): + self.indexes.add(index) + + def initialize_file_write_paths_on_index(self, index): + if not os.path.exists(os.path.join(self.file_write_destination, index)): + os.makedirs(os.path.dirname( + f"{self.file_write_destination}/{index}/"), + exist_ok=True) + + if not os.path.exists(os.path.join(self.tmp_dir, index)): + os.makedirs(os.path.join(self.tmp_dir, index), exist_ok=True) + + if self.docker_running: + self.docker_file_write_destination = os.path.join(os.getenv("DOCKER_FILE_WRITE_DESTINATION"), self.replica_name) self.docker_container_name = os.getenv("EDGELAKE_DOCKER_CONTAINER_NAME") - create_directory_in_container(self.docker_container_name, self.docker_file_write_destination) - create_directory_in_container(self.docker_container_name, f"{self.docker_file_write_destination}/{self.replicaName}/") + create_directory_in_container(self.docker_container_name, os.path.join(self.docker_file_write_destination, index)) + # create_directory_in_container(self.docker_container_name, f"{self.docker_file_write_destination}/{self.replica_name}/{self.index}/") - # Node local data batches - self.data_batches = [] - self.currentRound = 1 + def initialize_training_app_on_index(self, index): + try: + training_app_path = os.path.join(self.github_dir, self.module_paths[index]) + TrainingApp_class = load_class_from_file(training_app_path, self.module_names[index]) + self.data_handlers[index] = TrainingApp_class(self.replica_name, self.databases[index]) # Create an instance at index + except Exception as e: + return { + 'status': 'error', + 'message': str(e) + } + + # On startup, indexes, modules, and module_paths caches are empty, so refill + def fetch_indexes_and_modules(self): + policies = get_policies(self.edgelake_node_url, 'index') + for policy in policies: # policy = {'attr1': ..., 'attr2': ..., ...} + index = policy['name'] + self.indexes.add(index) + self.module_names[index] = policy['module_name'] + self.module_paths[index] = policy['module_path'] + + # Each index has one training app model + def set_module_at_index(self, index, module_name, module_path): + try: + index_data = self.get_index_data_in_blockchain(index) + if index in self.module_names: # already cached module at index, don't do anything + self.logger.info(f'Index "{index}" already has a module: "{self.module_names[index]}"') + return { + 'status': 'error', + 'message': f'Index "{index}" already has a module: "{self.module_names[index]}"' + } + elif index_data: # module already stored in blockchain but not cache, so fetch + self.logger.info(f'Index "{index}" already has a module in the blockchain: "{index_data['module_name']}". Fetching now.') + self.module_names[index] = index_data['module_name'] + self.module_paths[index] = index_data['module_path'] + return { + 'status': 'success', + 'message': f'Index "{index}" already has a module in the blockchain: "{index_data['module_name']}". Fetching now.' + } + + # New index, so set new module + self.module_names[index] = module_name + self.module_paths[index] = module_path + self.logger.info(f'Added module "{module_name}" to index "{index}"') + return { + 'status': 'success', + 'message': f'Added module "{module_name}" to index {index}' + } + except Exception as e: + return { + 'status': 'error', + 'message': str(e) + } + + # Gets data of specified index in blockchain if it exists, otherwise returns None + def get_index_data_in_blockchain(self, index): + where_condition = f"where name = {index}" + policies = get_policies(self.edgelake_node_url, "index", where_condition) + if not policies: + return None + if len(policies) > 1: # dev check + raise Exception(f"Multiple instances of index {index} found in the blockchain") + + return policies[0] # attributes: name, module_name, module_path, id, date, ledger ''' add_data_batch(data) @@ -65,25 +159,27 @@ def __init__(self, replica_name, ip, port): - Used for simulating data stream - Assumes data is in correct format for model / datahandler ''' - def add_data_batch(self, data): - self.data_batches.append(data) + def add_data_batch(self, index, data): + self.data_batches[index].append(data) ''' add_node_params() - - Returns current node nodel parameters to edgefl via event listener + - Returns current node model parameters to edgefl via event listener ''' - def add_node_params(self, round_number, model_metadata): - self.logger.debug("in add_node_params") + def add_node_params(self, round_number, model_metadata, index): + self.logger.debug(f"[{index}] in add_node_params") try: - data = f'''''' success = False while not success: - self.logger.debug("Attempting insert") + self.logger.debug(f"[{index}] Attempting insert") response = insert_policy(self.edgelake_node_url, data) if response.status_code == 200: success = True @@ -92,7 +188,7 @@ def add_node_params(self, round_number, model_metadata): if check_policy_inserted(self.edgelake_node_url, data): success = True - self.logger.debug(f"Submitting results for round {round_number}") + self.logger.debug(f"[{index}] Submitting results for round {round_number}") # response = requests.post(self.edgelake_node_url, headers=headers, data=data) # TODO: add error check here @@ -108,77 +204,69 @@ def add_node_params(self, round_number, model_metadata): ''' train_model_params(aggregator_model_params) - - Uses updated aggreagtor model params and updates local model + - Uses updated aggregator model params and updates local model - Gets local data and runs training on updated model ''' - def train_model_params(self, aggregator_model_params_db_link, round_number, ip_ports): - self.logger.debug(f"in train_model_params for round {round_number}") + def train_model_params(self, aggregator_model_params_db_link, round_number, ip_ports, index): + self.logger.debug(f"[{index}] in train_model_params for round {round_number}") # First round initialization - if round_number == 1: + if round_number == 1 and not aggregator_model_params_db_link: # weights = self.local_training_handler.fl_model.get_model_update() - weights = self.data_handler.get_weights() - # model_update = self.data_handler.get_model_update() + weights = self.data_handlers[index].get_weights() + # model_update = self.data_handlers[index].get_model_update() else: try: # Extract the key from the URL filename = aggregator_model_params_db_link.split('/')[-1] if self.docker_running: response = read_file(self.edgelake_node_url, aggregator_model_params_db_link, - f'{self.docker_file_write_destination}/{self.replicaName}/{filename}', ip_ports, self.docker_container_name) - copy_file_from_container(self.docker_container_name, f'{self.docker_file_write_destination}/{self.replicaName}/{filename}', f'{self.file_write_destination}/{self.replicaName}/{filename}') + f'{self.docker_file_write_destination}/{index}/{filename}', ip_ports) + copy_file_from_container(os.path.join(self.tmp_dir, index), self.docker_container_name, f'{self.docker_file_write_destination}/{index}/{filename}', f'{self.file_write_destination}/{index}/{filename}') else: - response = read_file(self.edgelake_node_url, aggregator_model_params_db_link,f'{self.file_write_destination}/{self.replicaName}/{filename}', ip_ports) + response = read_file(self.edgelake_node_url, aggregator_model_params_db_link, f'{self.file_write_destination}/{index}/{filename}', ip_ports) + # response = requests.get(link) if response.status_code == 200: sleep(1) - # with open( - # f'{self.file_write_destination}/{self.replicaName}/{filename}', - # 'rb') as f: - # data = pickle.load(f) - with open(f'{self.file_write_destination}/{self.replicaName}/{filename}','rb') as f: - data = bytearray() - while chunk := f.read(1024): - data.extend(chunk) - data = pickle.loads(data) - + with open( + f'{self.file_write_destination}/{index}/{filename}', + 'rb') as f: + data = pickle.load(f) # Ensure the data is valid and decode the parameters if data and 'newUpdates' in data: weights = self.decode_params(data['newUpdates']) else: - self.logger.error(f"Invalid data or 'newUpdates' missing in Firestore response: {data}") - raise ValueError(f"Invalid data or 'newUpdates' missing in Firestore response: {data}") + self.logger.error(f"[{index}] Invalid data or 'newUpdates' missing in Firestore response: {data}") + raise ValueError(f"[{index}] Invalid data or 'newUpdates' missing in Firestore response: {data}") except Exception as e: - if os.path.exists(f'{self.file_write_destination}/{self.replicaName}/{filename}'): - os.remove(f'{self.file_write_destination}/{self.replicaName}/{filename}') - self.logger.error(f"Error getting weights: {str(e)}") + self.logger.error(f"[{index}] Error getting weights: {str(e)}") raise # Update model with weights - self.data_handler.update_model(weights) + self.data_handlers[index].update_model(weights) # Train model # model_update = self.local_training_handler.train({}) - model_params = self.data_handler.train(round_number) + # print(f"[INFO] [{index}][Round {round_number}] ========== Model training progress ==========") + model_params = self.data_handlers[index].train(round_number) + self.logger.info(f"[{index}][Round {round_number}] Step 2 Complete: Model training done") # Save and return new weights encoded_params = self.encode_model(model_params) - file = f"{round_number}-replica-{self.replicaName}.pkl" + file = f"{round_number}-replica-{self.replica_name}.pkl" # make sure directory exists - os.makedirs(os.path.dirname(f"{self.file_write_destination}/{self.replicaName}/"), exist_ok=True) - file_name = f"{self.file_write_destination}/{self.replicaName}/{file}" + os.makedirs(os.path.dirname(f"{self.file_write_destination}/{index}/"), exist_ok=True) + file_name = f"{self.file_write_destination}/{index}/{file}" with open(f"{file_name}", "wb") as f: f.write(encoded_params) - f.flush() - os.fsync(f.fileno()) - self.logger.info(f"[Round {round_number}] Step 2 Complete: model training done") if self.docker_running: - self.logger.debug(f'written to container at {f"{self.docker_file_write_destination}/{self.replicaName}/{file}"}') - copy_file_to_container(self.docker_container_name, file_name, f"{self.docker_file_write_destination}/{self.replicaName}/{file}") - return f'{self.docker_file_write_destination}/{self.replicaName}/{file}' + self.logger.debug(f'[{index}] written to container at {f"{self.docker_file_write_destination}/{index}/{file}"}') + copy_file_to_container(os.path.join(self.tmp_dir, index), self.docker_container_name, file_name, f"{self.docker_file_write_destination}/{index}/{file}") + return f'{self.docker_file_write_destination}/{index}/{file}' return file_name def encode_model(self, model_update): @@ -189,8 +277,8 @@ def decode_params(self, encoded_model_update): model_weights = pickle.loads(encoded_model_update) return model_weights - def inference(self): - return self.data_handler.run_inference() + def inference(self, index): + return self.data_handlers[index].run_inference() - def direct_inference(self, data): - return self.data_handler.direct_inference(data) + def direct_inference(self, index, data): + return self.data_handlers[index].direct_inference(data) \ No newline at end of file diff --git a/edgefl/platform_components/node/node_server.py b/edgefl/platform_components/node/node_server.py index 358081f..f684c09 100644 --- a/edgefl/platform_components/node/node_server.py +++ b/edgefl/platform_components/node/node_server.py @@ -3,9 +3,11 @@ License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/ """ +from starlette.responses import PlainTextResponse # from dotenv import load_dotenv -from platform_components.EdgeLake_functions.blockchain_EL_functions import get_local_ip +from platform_components.EdgeLake_functions.blockchain_EL_functions import get_local_ip, fetch_data_from_db, \ + connect_to_db, get_all_databases from platform_components.node.node import Node import numpy as np import logging @@ -19,6 +21,7 @@ from uvicorn import run from fastapi import FastAPI, HTTPException, status +from contextlib import asynccontextmanager from pydantic import BaseModel from platform_components.lib.logger.logger_config import configure_logging @@ -26,9 +29,29 @@ warnings.filterwarnings("ignore") -app = FastAPI() load_dotenv() +# node that system_query resides on +QUERY_NODE_URL=f"htt://{os.getenv('QUERY_NODE_URL')}" +# Edge Node conntaining data +EDGE_NODE_CONN=os.getenv('QUERY_NODE_URL') +# Logical database name +LOGICAL_DATABASE=[os.getenv('LOGICAL_DATABASE')] +# Table containing trained data +TRAIN_TABLE=os.getenv('TRAIN_TABLE') +# Table containing test data +TEST_TABLE=os.getenv('TEST_TABLE') + + +db_password = os.getenv("PSQL_DB_PASSWORD") +db_host = os.getenv("PSQL_HOST") +db_port = os.getenv("PSQL_PORT") + +db_list = os.getenv('LOGICAL_DATABASE').split(',') +edgelake_node_url = f'http://{os.getenv("EXTERNAL_IP")}' +edgelake_node_port = edgelake_node_url.split(":")[2] + +configure_logging(f"node_server_{edgelake_node_port}") logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # Excludes WARNING, ERROR, CRITICAL @@ -37,59 +60,120 @@ listener_thread = None stop_listening_thread = False +def check_data(): + sql_stmt = f'sql {LOGICAL_DATABASE} format=json and stat=false "select count(*) as count from %s"' + headers = { + 'command': None, + 'User-Agent': 'AnyLog/1.23', + 'destination': EDGE_NODE_CONN + } + + for table in [TRAIN_TABLE, TEST_TABLE]: + headers['command'] = sql_stmt % table + try: + response = requests.get(url=QUERY_NODE, headers=headers) + response.raise_for_status() + if int(response.json()['Query'][0]['count']) < 1: + raise ValueError(f"No data found in the database: {LOGICAL_DATABASE}.{table}") + except Exception as error: + raise Exception(f"Failed to execute GET for table {LOGICAL_DATABASE}.{table} (Error: {error})") + + +@asynccontextmanager +async def lifespan(app: FastAPI): + global db_list + # logger.info(f"Node server on port {edgelake_node_port} starting up.") + + # Get all connected databases from the EdgeLake node + db_list = LOGICAL_DATABASE + + yield + # logger.info("Node server shutting down.") + +app = FastAPI(lifespan=lifespan) -edgelake_node_url = f'http://{os.getenv("EXTERNAL_IP")}' class InitNodeRequest(BaseModel): replica_name: str replica_ip: str replica_port: str + replica_index: str + round_number: int + module_name: str + module_path: str + LOGICAL_DATABASE: str @app.post('/init-node') def init_node(request: InitNodeRequest): - """Receive the contract address from the aggregator server.""" global node_instance, listener_thread, stop_listening_thread try: ip = get_local_ip() + most_recent_round = request.round_number port = request.replica_port - replica_name = request.replica_name + index = request.replica_index + module_name = request.module_name + module_path = request.module_path - # logger.debug(f"Replica name " + replica_name) + LOGICAL_DATABASE = request.LOGICAL_DATABASE # testing winniio_fl + mnist_fl DBs - if listener_thread and listener_thread.is_alive(): - stop_listening_thread = True - listener_thread.join(timeout=1) + # Connect to DB if it's not in the EdgeLake node + # if LOGICAL_DATABASE not in db_list: + # connect_to_db(edgelake_node_url, LOGICAL_DATABASE, db_user, db_password, db_host, db_port) + # db_list.add(LOGICAL_DATABASE) - # Reset the stop flag - stop_listening_thread = False + # # Fetch and check for existing data + # query = f"sql {LOGICAL_DATABASE} SELECT * FROM node_{replica_name} LIMIT 1" + # check_data = fetch_data_from_db(edgelake_node_url, query) + # if not check_data: + # raise ValueError(f"No data found in the database: {LOGICAL_DATABASE}.") + check_data() # Instantiate the Node class logger.info(f"{replica_name} before initialized") - node_instance = Node(replica_name, ip, port) - configure_logging(f"node_server_{port}") - node_instance.currentRound = 1 + if not node_instance: + node_instance = Node(replica_name, ip, port, logger) + + if index not in node_instance.databases: + node_instance.databases[index] = LOGICAL_DATABASE - logger.info(f"{replica_name} successfully initialized") + node_instance.initialize_specific_node_on_index(index, module_name, module_path) + node_instance.round_number[index] = most_recent_round # 1 or current round + + logger.info(f"{replica_name} successfully initialized for ({index})") + # print(f"indexes: {node_instance.indexes}") + # print(f"module names: {node_instance.module_names}") + # print(f"module paths: {node_instance.module_paths}") + # print(f"starting round numbers: {node_instance.round_number}") + # print(f"training apps: {node_instance.data_handlers}") # Start event listener for start round listener_thread = threading.Thread( + name=f"{replica_name}--{index}", target=listen_for_start_round, - args=(node_instance, lambda: stop_listening_thread) + args=(node_instance, index, lambda: stop_listening_thread) ) listener_thread.daemon = True # Make thread daemon so it exits when main thread exits listener_thread.start() return { 'status': 'success', - 'message': 'Contract address set and Node initialized successfully' + 'message': 'Node initialized successfully' } - except Exception as e: + except ValueError as e: + raise ValueError( + f"No data found in the database: {LOGICAL_DATABASE}" + ) + except HTTPException as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=str(e) + detail=f"/init-node - {str(e)}" + ) + except ConnectionError as e: + raise ConnectionError( + f"Unable to access the database tables: {str(e)}" ) ''' @@ -97,6 +181,7 @@ def init_node(request: InitNodeRequest): - Endpoint to receive data block from the simulated data stream ''' class ReceiveDataRequest(BaseModel): + index: str data: list # @app.route('/receive_data', methods=['POST']) @@ -108,7 +193,7 @@ def receive_data(request: ReceiveDataRequest): detail="Node instance not initialized" ) if request.data: - node_instance.add_data_batch(np.array(request.data)) + node_instance.add_data_batch(request.index, np.array(request.data)) return { "status": "data_received", "batch_size": len(request.data) @@ -118,19 +203,17 @@ def receive_data(request: ReceiveDataRequest): detail="No Data Provided" ) -def listen_for_start_round(nodeInstance, stop_event): - logger.debug(f"listening for start round {nodeInstance.currentRound}") +def listen_for_start_round(nodeInstance, index, stop_event): + current_round = nodeInstance.round_number[index] + + logger.debug(f"[{index}] listening for start round {current_round}") while True: try: - # next_round = nodeInstance.currentRound + 1 - - # logger.debug(f"listening for start round {nodeInstance.currentRound}") - headers = { 'User-Agent': 'AnyLog/1.23', - 'command': f'blockchain get r{nodeInstance.currentRound}' + 'command': f'blockchain get {index}-r{current_round}' } - response = requests.get(edgelake_node_url, headers=headers) + response = requests.get(QUERY_NODE_URL, headers=headers) if response.status_code == 200: data = response.json() @@ -139,39 +222,67 @@ def listen_for_start_round(nodeInstance, stop_event): round_data = None for item in data: # Check if the key exists in the current dictionary - if f'r{nodeInstance.currentRound}' in item: - round_data = item[f'r{nodeInstance.currentRound}'] + if f'{index}-r{current_round}' in item: + round_data = item[f'{index}-r{current_round}'] break # Stop searching once the current round's data is found if round_data: - logger.debug(f"Round Data: {round_data}") # Debugging line + logger.debug(f"[{index}] Round Data: {round_data}") # Debugging line paramsLink = round_data.get('initParams', '') ip_port = round_data.get('ip_port', '') - modelUpdate_metadata = nodeInstance.train_model_params(paramsLink, nodeInstance.currentRound, ip_port) - nodeInstance.add_node_params(nodeInstance.currentRound, modelUpdate_metadata) - logger.info(f"[Round {nodeInstance.currentRound}] Step 3 Complete: Model parameters published") - nodeInstance.currentRound += 1 - # else: # Debugging line - # logger.error(f"No data found for round r{nodeInstance.currentRound}") + modelUpdate_metadata = nodeInstance.train_model_params(paramsLink, current_round, ip_port, index) + nodeInstance.add_node_params(current_round, modelUpdate_metadata, index) + logger.info(f"[{index}][Round {current_round}] Step 3 Complete: Model parameters published") + current_round += 1 time.sleep(5) # Poll every 2 seconds except Exception as e: - logger.error(f"Error in listener thread: {str(e)}") + logger.error(f"[{index}] Error in listener thread: {str(e)}") time.sleep(2) +# TODO: move to a (helper) file (i.e. 'node_helpers.py'?) +# Extracts initParams from the policy 'index-r' at the specified index +def get_most_recent_agg_params(index): + policy_name = f"{index}-r" + agg_params = None + + try: + headers = { + 'User-Agent': 'AnyLog/1.23', + 'command': f'blockchain get {policy_name}' + } + response = requests.get(QUERY_NODE_URL, headers=headers) + + if response.status_code == 200: + data = response.json() + + if data: + policy = data[0] + policy_data = policy[policy_name] + agg_params = policy_data["initParams"] + + return agg_params + except Exception as e: + logger.error(f"[{index}] Error in extracting round number: {str(e)}") + # @app.route('/inference', methods=['POST']) -@app.post('/inference') -def inference(): +@app.post('/inference/{index}', response_class=PlainTextResponse) +def inference(index): """Inference on current model w/ data passed in.""" try: - - logger.info("received inference request") - results = node_instance.inference() - response = { - 'status': 'success', - 'message': 'Inference completed successfully', - 'model_accuracy': str(results), - } + logger.info(f"[{index}] received inference request") + if not index: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Index must be specified." + ) + results = node_instance.inference(index) + response = (f"{{" + f"'index': '{index}'," + f" 'status': 'success'," + f" 'message': 'Inference completed successfully'," + f" 'model_accuracy': '{str(results)}'" + f"}}\n") return response except Exception as e: raise HTTPException( @@ -182,6 +293,7 @@ def inference(): class InferenceRequest(BaseModel): input: list[float] = [244.46153846153845, 453, 0, 52.29666666666667, 0.0375170724933045, 20.515] +# TODO: add index and reformat response to FastAPI PlainTextResponse # @app.route('/infer', methods=['POST']) @app.post('/infer') def direct_inference(request: InferenceRequest): diff --git a/edgefl/requirements.txt b/edgefl/requirements.txt new file mode 100644 index 0000000..f47e193 --- /dev/null +++ b/edgefl/requirements.txt @@ -0,0 +1,26 @@ +numpy~=1.26.0 +skorch>=0.15.0 +psycopg2-binary~=2.9.9 +scikit-learn~=1.6.1 +python-dotenv~=1.0.1 +requests~=2.32.3 +FastAPI~=0.115.8 +uvicorn~=0.34.0 +keras~=3.7.0 +joblib~=1.4.2 +gensim~=4.3.2 +diffprivlib~=0.6.5 +pycloudmessenger~=0.8.2 +pandas~=2.2.1 +PyYAML~=6.0.2 +tqdm~=4.67.1 +python-dateutil~=2.9.0.post0 +matplotlib~=3.9.4 +docker~=7.1.0 + +torch~=2.2.2 +tensorflow~=2.16.2 +cython~=3.0.12 +pydantic~=2.11.0a2 + +torchvision~=0.17.2 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c67b98c..f47e193 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,5 @@ torch~=2.2.2 tensorflow~=2.16.2 cython~=3.0.12 pydantic~=2.11.0a2 + +torchvision~=0.17.2 \ No newline at end of file