From 388841e3fd85894758b24c70c6566c824351d3c6 Mon Sep 17 00:00:00 2001 From: Alexander Blinov Date: Mon, 7 Jul 2025 21:16:10 +0300 Subject: [PATCH] feat(db/views.py,db/urls.py,db/shortcuts.py,db/docs.py): Added uploading data to Postgres DB from csv file --- backend/core/db/docs.py | 28 +++++++++ backend/core/db/shortcuts.py | 41 ++++++++++++ backend/core/db/urls.py | 3 +- backend/core/db/views.py | 113 +++++++++++++++++++++++++++++++++- backend/core/requirements.txt | 7 +++ 5 files changed, 189 insertions(+), 3 deletions(-) diff --git a/backend/core/db/docs.py b/backend/core/db/docs.py index e6da7d6b..996acdea 100644 --- a/backend/core/db/docs.py +++ b/backend/core/db/docs.py @@ -9,6 +9,14 @@ description="Session ID passed as a query parameter.", ) +table_name_param = openapi.Parameter( + name="table_name", + in_=openapi.IN_QUERY, + type=openapi.TYPE_STRING, + required=True, + description="Name of the table where data from csv will be inserted", +) + get_db_schema_doc = swagger_auto_schema( manual_parameters=[session_id_query_param], ) @@ -23,3 +31,23 @@ type=openapi.TYPE_STRING, description="Raw plain text query" ), ) + +post_csv_upload_doc = swagger_auto_schema( + manual_parameters=[ + session_id_query_param, + table_name_param, + openapi.Parameter( + name="file", + in_=openapi.IN_FORM, + type=openapi.TYPE_FILE, + required=True, + description="CSV file to upload (must use colon as separator)", + ), + ], + consumes=['multipart/form-data'], + responses={ + 200: openapi.Response("CSV file processed successfully"), + 400: openapi.Response("Invalid CSV file or request"), + 500: openapi.Response("Server error while processing file"), + } +) diff --git a/backend/core/db/shortcuts.py b/backend/core/db/shortcuts.py index 8a63f1be..f936d442 100644 --- a/backend/core/db/shortcuts.py +++ b/backend/core/db/shortcuts.py @@ -1,3 +1,8 @@ +import pandas as pd +from typing import List, Any +from datetime import datetime, date +import numpy as np + from engines import postgres_engine, mongo_engine, DBEngine from templates.models import DBType @@ -9,3 +14,39 @@ def get_db_engine(type: str) -> DBEngine | None: case DBType.MONGODB.value: return mongo_engine return None + + +def _sql_literal(value: Any) -> str: + """Converts Python/Pandas symbol to the Postgres SQL literal.""" + + if pd.isna(value): + return "NULL" + + if isinstance(value, (bool, np.bool_)): + return "TRUE" if value else "FALSE" + + if isinstance(value, (int, np.integer)): + return str(value) + + if isinstance(value, (float, np.floating)): + return str(value) if np.isfinite(value) else "NULL" + + if isinstance(value, (datetime, pd.Timestamp)): + return f"'{value.strftime('%Y-%m-%d %H:%M:%S')}'" + if isinstance(value, date): + return f"'{value.isoformat()}'" + + escaped = str(value).replace("'", "''") + return f"'{escaped}'" + + +def df_to_insert_queries(df: pd.DataFrame, table: str, engine: DBEngine, db_name: str) -> List[str]: + + cols_sql = ', '.join(f'"{c}"' for c in df.columns) + + queries: List[str] = [] + for _, row in df.iterrows(): + vals_sql = ', '.join(_sql_literal(v) for v in row) + queries.append(f'INSERT INTO "{table}" ({cols_sql}) VALUES ({vals_sql});') + + return queries \ No newline at end of file diff --git a/backend/core/db/urls.py b/backend/core/db/urls.py index 68525207..1daba2b5 100644 --- a/backend/core/db/urls.py +++ b/backend/core/db/urls.py @@ -1,9 +1,10 @@ from django.urls import path -from .views import PutView, QueryView, SchemaView +from .views import PutView, QueryView, SchemaView, CSVView urlpatterns = [ path("", PutView.as_view()), path("schema/", SchemaView.as_view()), path("query/", QueryView.as_view()), + path("csv/", CSVView.as_view()), ] diff --git a/backend/core/db/views.py b/backend/core/db/views.py index 5e825cd2..6cf2f99d 100644 --- a/backend/core/db/views.py +++ b/backend/core/db/views.py @@ -1,5 +1,12 @@ +import tempfile +import os + +from django.core.files.uploadedfile import UploadedFile + +from engines import postgres_engine from engines.exceptions import QueryError from engines.shortcuts import db_exists +from rest_framework.parsers import MultiPartParser from rest_framework.parsers import BaseParser from rest_framework.request import Request from rest_framework.response import Response @@ -7,9 +14,10 @@ from session.models import Session, SessionInfo from session.shortcuts import resolve_session_id -from .docs import get_db_schema_doc, post_db_query_doc, put_db_schema_doc -from .shortcuts import get_db_engine +from .docs import get_db_schema_doc, post_db_query_doc, put_db_schema_doc, post_csv_upload_doc +from .shortcuts import get_db_engine, df_to_insert_queries +import pandas as pd class PlainTextParser(BaseParser): media_type = "text/plain" @@ -111,3 +119,104 @@ def post(self, request: Request): json_schema = schema.to_json() return Response({"results": json_results, "schema": json_schema}) + + +class CSVView(APIView): + + parser_classes = [MultiPartParser] + + @post_csv_upload_doc + def post(self, request: Request): + session_id, err_response = resolve_session_id(request) + if err_response: + return err_response + + session = Session.objects.get(id=session_id) + session_info = SessionInfo.objects.get(session=session_id) + + if not session_info.template: + return Response({"detail": "Template not chosen"}, status=400) + + + if 'file' not in request.FILES: + return Response({"detail": "No file provided"}, status=400) + + uploaded_file: UploadedFile = request.FILES['file'] + + if not uploaded_file.name.lower().endswith('.csv'): + return Response({"detail": "Only CSV files are allowed"}, status=400) + + + max_size = 10 * 1024 * 1024 # 10MB in bytes + if uploaded_file.size > max_size: + return Response({"detail": "File size exceeds 10MB limit"}, status=400) + + table_name = request.query_params.get("table_name") + engine = get_db_engine(session_info.template.type) + db_name = session.get_unauth_dbname() + + try: + with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file: + for chunk in uploaded_file.chunks(): + temp_file.write(chunk) + temp_file.flush() + + try: + df = pd.read_csv(temp_file.name, sep=':') + + if not table_name: + return Response({"detail": "Missing `table_name` query parameter"}, status=400) + + if not engine: + return Response({"detail": "Unknown engine type"}, status=418) + + + try: + db_info = engine.get_db(db_name) + except QueryError as e: + return Response({"detail": "QueryError while getting DB schema: " + str(e)}, status=400) + + table_info = next((t for t in db_info.tables if t.name == table_name), None) + if table_info is None: + return Response({"detail": f"Table \"{table_name}\" not found"}, status=400) + + + NUMERIC_KEYWORDS = { + "int", "serial", "numeric", "decimal", + "real", "double precision", "float", "money" + } + numeric_cols = { + col.name for col in table_info.columns + if any(kw in col.type.lower() for kw in NUMERIC_KEYWORDS) + } + + + df.columns = df.columns.str.strip() + for col in numeric_cols: + if col in df.columns: + df[col] = pd.to_numeric(df[col], errors='coerce') + + + queries = df_to_insert_queries(df, table_name, engine, db_name) + full_query = "\n".join(queries) + + try: + engine.send_query(db_name, full_query) + except QueryError as e: + return Response({"detail": "QueryError: " + str(e)}, status=400) + + return Response({"detail": f"Successfully inserted {len(df)} rows into {table_name}"}, status=200) + + except pd.errors.EmptyDataError: + return Response({"detail": "Empty CSV file"}, status=400) + except pd.errors.ParserError: + return Response({"detail": "Invalid CSV format"}, status=400) + + except Exception as e: + return Response({"detail": f"Error processing file: {str(e)}"}, + status=500) + finally: + if os.path.exists(temp_file.name): + os.unlink(temp_file.name) + + diff --git a/backend/core/requirements.txt b/backend/core/requirements.txt index 8b6d4658..be3afde5 100644 --- a/backend/core/requirements.txt +++ b/backend/core/requirements.txt @@ -4,6 +4,7 @@ Django==5.2.1 django-cors-headers==4.7.0 django-filter==25.1 djangorestframework==3.16.0 +djangorestframework_simplejwt==5.5.0 dnspython==2.7.0 drf-yasg==1.21.10 flake8==7.3.0 @@ -11,18 +12,24 @@ inflection==0.5.1 iniconfig==2.1.0 Markdown==3.8 mccabe==0.7.0 +numpy==2.3.1 packaging==25.0 +pandas==2.3.0 pillow==11.2.1 pluggy==1.6.0 psycopg2-binary==2.9.10 pycodestyle==2.14.0 pyflakes==3.4.0 Pygments==2.19.1 +PyJWT==2.9.0 pymongo==4.13.2 pyparsing==3.2.3 pytest==8.4.0 +python-dateutil==2.9.0.post0 python-decouple==3.8 pytz==2025.2 PyYAML==6.0.2 +six==1.17.0 sqlparse==0.5.3 +tzdata==2025.2 uritemplate==4.2.0