Skip to content

Commit a5a1ff8

Browse files
authored
Allow CSVs with no headers (#310)
1 parent 0645c74 commit a5a1ff8

File tree

6 files changed

+80
-23
lines changed

6 files changed

+80
-23
lines changed

pgml-dashboard/app/models.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,17 +318,20 @@ class UploadedData(models.Model):
318318
created_at = models.DateTimeField(auto_now_add=True)
319319
updated_at = models.DateTimeField(auto_now=True)
320320

321-
def create_table(self, file):
321+
def create_table(self, file, has_header=False):
322322
if file.content_type == "text/csv":
323323
reader = csv.reader(codecs.iterdecode(file, "utf-8"))
324324
headers = next(reader)
325-
columns = ", ".join(map(lambda x: f"{x.replace(' ', '_').lower()} FLOAT4", headers))
325+
326+
if has_header:
327+
columns = ", ".join(map(lambda x: f"{x.replace(' ', '_').lower()} TEXT", headers))
328+
else:
329+
columns = ", ".join(map(lambda x: f"column_{x} TEXT", range(len(headers))))
326330

327331
with transaction.atomic():
328332
sql = f"CREATE TABLE data_{self.pk} (" + columns + ")"
329333

330334
with connection.cursor() as cursor:
331335
cursor.execute(sql)
332-
333336
file.seek(0)
334-
cursor.copy_expert(f"COPY data_{self.pk} FROM STDIN CSV HEADER", file)
337+
cursor.copy_expert(f"COPY data_{self.pk} FROM STDIN CSV {'HEADER' if has_header else ''}", file)

pgml-dashboard/app/static/css/base.css

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,3 +685,39 @@ body.uploader section li {
685685
body.uploader strong {
686686
font-weight: bold;
687687
}
688+
689+
body.uploader label {
690+
user-select: none;
691+
cursor: pointer;
692+
}
693+
694+
/*
695+
* Checkbox
696+
*/
697+
input[type=checkbox] {
698+
/* Reset style */
699+
appearance: none;
700+
701+
background: transparent;
702+
border: 1px solid var(--gray-5);
703+
704+
height: 1.6em;
705+
width: 1.6em;
706+
707+
border-radius: 3px;
708+
709+
display: inline-flex;
710+
align-items: center;
711+
justify-content: center;
712+
position: relative;
713+
714+
cursor: pointer;
715+
}
716+
717+
input[type=checkbox]:checked:after {
718+
content: '\2714';
719+
font-size: 1em;
720+
position: absolute;
721+
color: var(--highlite-green);
722+
filter: brightness(0.75);
723+
}

pgml-dashboard/app/templates/uploader/index.html

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,13 @@ <h1><span class="material-symbols-outlined">cloud_upload</span>Upload Data</h1>
1111
{% endif %}
1212

1313
{% if error %}
14-
<p style="margin-bottom: 1rem;">Hmm, something went wrong. Please make sure:</p>
14+
<p>Hmm, something went wrong. Make sure:</p>
1515
{% else %}
16-
<p style="margin-bottom: 1rem;">You can upload your datasets using the CSV format. Before uploading, please make sure:</p>
16+
<p>You can upload your datasets using the CSV format. Before uploading, make sure:</p>
1717
{% endif %}
1818

1919
<ol>
20-
<li>The data is numeric (i.e. only floats or integers and no text)</li>
21-
<li>The CSV includes headers on the first line</li>
22-
<li>The headers are alphanumeric, contain no spaces and don't start with a number</li>
20+
<li>If the CSV has a header, the column names are alphanumeric, contain no spaces and don't start with a number</li>
2321
<li>The CSV is comma (<code>,</code>) delimited</li>
2422
</ol>
2523

@@ -30,7 +28,7 @@ <h4>Error: </h4>
3028
</div>
3129
{% endif %}
3230

33-
<p>If you are exporting data from a PostgreSQL database, you can use <code>psql</code> to generate a valid CSV file:</p>
31+
<p>If you are exporting data from a PostgreSQL database, you can use <code>psql</code> to generate a valid CSV file with a header:</p>
3432
<div class="markdown-body">
3533
<pre><code class="language-sql">\copy your_table_name TO 'output.csv' CSV HEADER</code></pre>
3634
</div>
@@ -40,8 +38,12 @@ <h4>Error: </h4>
4038
<section>
4139
<form action="{% url 'uploader' %}" method="post" enctype="multipart/form-data">
4240
{% csrf_token %}
43-
<div class="flex">
41+
<div class="flex flex-center">
4442
<input id="file" type="file" name="file" accept="text/csv,application/json" required="true" />
43+
<div class="flex flex-center" style="margin-left: 2rem;">
44+
<input type="checkbox" name="has_header" checked id="has_header" style="margin-right: 0.5rem;" checked />
45+
<label for="has_header">My CSV has a header</label>
46+
</div>
4547
</div>
4648

4749
<div class="button-container">

pgml-dashboard/app/templates/uploader/uploaded.html

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,21 @@ <h2><span class="material-symbols-outlined">data_array</span>Preview</h2>
1515

1616
<section>
1717
<h2><span class="material-symbols-outlined">table_rows</span>Next Steps</h2>
18-
<p>Your data has been saved in <strong>pgml.{{ table_name }}</strong> table.</p>
19-
<p>You can now build a model using a <a href="{% url 'notebooks' %}">Notebook</a> or browse the data in the <a href="{% url 'console' %}">Console</a>:</p>
18+
<p>Your data has been saved in <strong>pgml.{{ table_name }}</strong> table. You can explore the data in the <a href="{% url 'console' %}">Console</a>:</p>
2019
<div class="markdown-body">
2120
<pre><code class="language-sql">SELECT * FROM pgml.{{ table_name }}
21+
LIMIT 10</code></pre>
22+
</div>
23+
<p> All columns were converted to text, so you'll need to cast them to the appropriate data type before training a model, for example:</p>
24+
<div class="markdown-body">
25+
<pre><code class="language-sql">CREATE MATERIALIZED VIEW pgml.{{ table_name }}_view AS
26+
SELECT {% for column in columns %}
27+
CAST({{ column }} AS FLOAT4){% if not forloop.last %},{% else %}
28+
{% endif %}{% endfor %}FROM pgml.{{ table_name }}</code></pre>
29+
</div>
30+
<p>You can now run experiments and build models using <a href="{% url 'notebooks' %}">Notebooks</a>:</p>
31+
<div class="markdown-body">
32+
<pre><code class="language-sql">SELECT * FROM pgml.{{ table_name }}_view
2233
LIMIT 10</code></pre>
2334
</section>
2435

pgml-dashboard/app/views/console.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,20 @@ def run_sql(request):
4848
try:
4949
cursor.execute("SET statement_timeout = '30s'")
5050
cursor.execute(query)
51-
results = cursor.fetchall()
5251

53-
return render(
54-
request,
55-
"projects/sample.html",
56-
{
57-
"columns": [desc[0] for desc in cursor.description],
58-
"rows": results,
59-
},
60-
)
52+
if cursor.description:
53+
results = cursor.fetchall()
54+
55+
return render(
56+
request,
57+
"projects/sample.html",
58+
{
59+
"columns": [desc[0] for desc in cursor.description],
60+
"rows": results,
61+
},
62+
)
63+
else:
64+
raise Exception(str(cursor.statusmessage))
6165
except Exception as e:
6266
return HttpResponse(
6367
f"""

pgml-dashboard/app/views/uploader.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
class UploadForm(forms.Form):
1616
file = forms.FileField()
17+
has_header = forms.BooleanField(required=False)
1718

1819

1920
def index(request):
@@ -31,7 +32,7 @@ def index(request):
3132
file_type=1 if file.content_type == "text/csv" else 2,
3233
)
3334

34-
upload.create_table(file)
35+
upload.create_table(file, form.cleaned_data.get("has_header", False))
3536
except Exception as e:
3637
return render(
3738
request,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy