Skip to content
Closed

CSV #125

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions backend/core/db/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
description="Session ID passed as a query parameter.",
)

table_name_param = openapi.Parameter(
name="table_name",
in_=openapi.IN_QUERY,
type=openapi.TYPE_STRING,
required=True,
description="Name of the table where data from csv will be inserted",
)

get_db_schema_doc = swagger_auto_schema(
manual_parameters=[session_id_query_param],
)
Expand All @@ -23,3 +31,23 @@
type=openapi.TYPE_STRING, description="Raw plain text query"
),
)

post_csv_upload_doc = swagger_auto_schema(
manual_parameters=[
session_id_query_param,
table_name_param,
openapi.Parameter(
name="file",
in_=openapi.IN_FORM,
type=openapi.TYPE_FILE,
required=True,
description="CSV file to upload (must use colon as separator)",
),
],
consumes=['multipart/form-data'],
responses={
200: openapi.Response("CSV file processed successfully"),
400: openapi.Response("Invalid CSV file or request"),
500: openapi.Response("Server error while processing file"),
}
)
43 changes: 42 additions & 1 deletion backend/core/db/shortcuts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
from engines import DBEngine, mongo_engine, postgres_engine
import pandas as pd
from typing import List, Any
from datetime import datetime, date
import numpy as np

from engines import postgres_engine, mongo_engine, DBEngine
from templates.models import DBType


Expand All @@ -9,3 +14,39 @@ def get_db_engine(type: str) -> DBEngine | None:
case DBType.MONGODB.value:
return mongo_engine
return None


def _sql_literal(value: Any) -> str:
"""Converts Python/Pandas symbol to the Postgres SQL literal."""

if pd.isna(value):
return "NULL"

if isinstance(value, (bool, np.bool_)):
return "TRUE" if value else "FALSE"

if isinstance(value, (int, np.integer)):
return str(value)

if isinstance(value, (float, np.floating)):
return str(value) if np.isfinite(value) else "NULL"

if isinstance(value, (datetime, pd.Timestamp)):
return f"'{value.strftime('%Y-%m-%d %H:%M:%S')}'"
if isinstance(value, date):
return f"'{value.isoformat()}'"

escaped = str(value).replace("'", "''")
return f"'{escaped}'"


def df_to_insert_queries(df: pd.DataFrame, table: str, engine: DBEngine, db_name: str) -> List[str]:

cols_sql = ', '.join(f'"{c}"' for c in df.columns)

queries: List[str] = []
for _, row in df.iterrows():
vals_sql = ', '.join(_sql_literal(v) for v in row)
queries.append(f'INSERT INTO "{table}" ({cols_sql}) VALUES ({vals_sql});')

return queries
3 changes: 2 additions & 1 deletion backend/core/db/urls.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from django.urls import path

from .views import PutView, QueryView, SchemaView
from .views import PutView, QueryView, SchemaView, CSVView

urlpatterns = [
path("", PutView.as_view()),
path("schema/", SchemaView.as_view()),
path("query/", QueryView.as_view()),
path("csv/", CSVView.as_view()),
]
114 changes: 112 additions & 2 deletions backend/core/db/views.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
import tempfile
import os

from django.core.files.uploadedfile import UploadedFile

from engines import postgres_engine
from rest_framework.parsers import MultiPartParser

from rest_framework.parsers import BaseParser
from rest_framework.request import Request
from rest_framework.response import Response
Expand All @@ -8,9 +16,10 @@
from session.models import Session, SessionInfo
from session.shortcuts import resolve_session_id

from .docs import get_db_schema_doc, post_db_query_doc, put_db_schema_doc
from .shortcuts import get_db_engine
from .docs import get_db_schema_doc, post_db_query_doc, put_db_schema_doc, post_csv_upload_doc
from .shortcuts import get_db_engine, df_to_insert_queries

import pandas as pd

class PlainTextParser(BaseParser):
media_type = "text/plain"
Expand Down Expand Up @@ -114,3 +123,104 @@ def post(self, request: Request):
json_schema = schema.to_json()

return Response({"results": json_results, "schema": json_schema})


class CSVView(APIView):

parser_classes = [MultiPartParser]

@post_csv_upload_doc
def post(self, request: Request):
session_id, err_response = resolve_session_id(request)
if err_response:
return err_response

session = Session.objects.get(id=session_id)
session_info = SessionInfo.objects.get(session=session_id)

if not session_info.template:
return Response({"detail": "Template not chosen"}, status=400)


if 'file' not in request.FILES:
return Response({"detail": "No file provided"}, status=400)

uploaded_file: UploadedFile = request.FILES['file']

if not uploaded_file.name.lower().endswith('.csv'):
return Response({"detail": "Only CSV files are allowed"}, status=400)


max_size = 10 * 1024 * 1024 # 10MB in bytes
if uploaded_file.size > max_size:
return Response({"detail": "File size exceeds 10MB limit"}, status=400)

table_name = request.query_params.get("table_name")
engine = get_db_engine(session_info.template.type)
db_name = session.get_unauth_dbname()

try:
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as temp_file:
for chunk in uploaded_file.chunks():
temp_file.write(chunk)
temp_file.flush()

try:
df = pd.read_csv(temp_file.name, sep=':')

if not table_name:
return Response({"detail": "Missing `table_name` query parameter"}, status=400)

if not engine:
return Response({"detail": "Unknown engine type"}, status=418)


try:
db_info = engine.get_db(db_name)
except QueryError as e:
return Response({"detail": "QueryError while getting DB schema: " + str(e)}, status=400)

table_info = next((t for t in db_info.tables if t.name == table_name), None)
if table_info is None:
return Response({"detail": f"Table \"{table_name}\" not found"}, status=400)


NUMERIC_KEYWORDS = {
"int", "serial", "numeric", "decimal",
"real", "double precision", "float", "money"
}
numeric_cols = {
col.name for col in table_info.columns
if any(kw in col.type.lower() for kw in NUMERIC_KEYWORDS)
}


df.columns = df.columns.str.strip()
for col in numeric_cols:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')


queries = df_to_insert_queries(df, table_name, engine, db_name)
full_query = "\n".join(queries)

try:
engine.send_query(db_name, full_query)
except QueryError as e:
return Response({"detail": "QueryError: " + str(e)}, status=400)

return Response({"detail": f"Successfully inserted {len(df)} rows into {table_name}"}, status=200)

except pd.errors.EmptyDataError:
return Response({"detail": "Empty CSV file"}, status=400)
except pd.errors.ParserError:
return Response({"detail": "Invalid CSV format"}, status=400)

except Exception as e:
return Response({"detail": f"Error processing file: {str(e)}"},
status=500)
finally:
if os.path.exists(temp_file.name):
os.unlink(temp_file.name)


7 changes: 7 additions & 0 deletions backend/core/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,32 @@ Django==5.2.1
django-cors-headers==4.7.0
django-filter==25.1
djangorestframework==3.16.0
djangorestframework_simplejwt==5.5.0
dnspython==2.7.0
drf-yasg==1.21.10
flake8==7.3.0
inflection==0.5.1
iniconfig==2.1.0
Markdown==3.8
mccabe==0.7.0
numpy==2.3.1
packaging==25.0
pandas==2.3.0
pillow==11.2.1
pluggy==1.6.0
psycopg2-binary==2.9.10
pycodestyle==2.14.0
pyflakes==3.4.0
Pygments==2.19.1
PyJWT==2.9.0
pymongo==4.13.2
pyparsing==3.2.3
pytest==8.4.0
python-dateutil==2.9.0.post0
python-decouple==3.8
pytz==2025.2
PyYAML==6.0.2
six==1.17.0
sqlparse==0.5.3
tzdata==2025.2
uritemplate==4.2.0
Loading