Skip to content

Commit db2dcf5

Browse files
committed
Make some marginal performance improvements in reportErrorPosition(),
which turns out to be a dominant part of the runtime in scenarios involving lots of parse-time warnings (such as Stephen Frost's example of an INSERT with a lot of backslash-containing strings). There's not a whole lot we can do about the character-at-a-time scanning, but we can at least avoid traversing the query twice.
1 parent 8e4fe3b commit db2dcf5

File tree

1 file changed

+86
-66
lines changed

1 file changed

+86
-66
lines changed

src/interfaces/libpq/fe-protocol3.c

Lines changed: 86 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.27 2006/08/18 19:52:39 tgl Exp $
11+
* $PostgreSQL: pgsql/src/interfaces/libpq/fe-protocol3.c,v 1.28 2006/10/01 22:25:48 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -883,20 +883,25 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
883883
#define MIN_RIGHT_CUT 10 /* try to keep this far away from EOL */
884884

885885
char *wquery;
886-
int clen,
887-
slen,
886+
int slen,
887+
cno,
888888
i,
889-
w,
890889
*qidx,
891890
*scridx,
892891
qoffset,
893892
scroffset,
894893
ibeg,
895894
iend,
896895
loc_line;
897-
bool beg_trunc,
896+
bool mb_encoding,
897+
beg_trunc,
898898
end_trunc;
899899

900+
/* Convert loc from 1-based to 0-based; no-op if out of range */
901+
loc--;
902+
if (loc < 0)
903+
return;
904+
900905
/* Need a writable copy of the query */
901906
wquery = strdup(query);
902907
if (wquery == NULL)
@@ -905,13 +910,13 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
905910
/*
906911
* Each character might occupy multiple physical bytes in the string, and
907912
* in some Far Eastern character sets it might take more than one screen
908-
* column as well. We compute the starting byte offset and starting
913+
* column as well. We compute the starting byte offset and starting
909914
* screen column of each logical character, and store these in qidx[] and
910915
* scridx[] respectively.
911916
*/
912917

913918
/* we need a safe allocation size... */
914-
slen = strlen(query) + 1;
919+
slen = strlen(wquery) + 1;
915920

916921
qidx = (int *) malloc(slen * sizeof(int));
917922
if (qidx == NULL)
@@ -927,79 +932,93 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
927932
return;
928933
}
929934

935+
/* We can optimize a bit if it's a single-byte encoding */
936+
mb_encoding = (pg_encoding_max_length(encoding) != 1);
937+
938+
/*
939+
* Within the scanning loop, cno is the current character's logical number,
940+
* qoffset is its offset in wquery, and scroffset is its starting logical
941+
* screen column (all indexed from 0). "loc" is the logical character
942+
* number of the error location. We scan to determine loc_line (the
943+
* 1-based line number containing loc) and ibeg/iend (first character
944+
* number and last+1 character number of the line containing loc).
945+
* Note that qidx[] and scridx[] are filled only as far as iend.
946+
*/
930947
qoffset = 0;
931948
scroffset = 0;
932-
for (i = 0; query[qoffset] != '\0'; i++)
933-
{
934-
qidx[i] = qoffset;
935-
scridx[i] = scroffset;
936-
w = pg_encoding_dsplen(encoding, &query[qoffset]);
937-
/* treat control chars as width 1; see tab hack below */
938-
if (w <= 0)
939-
w = 1;
940-
scroffset += w;
941-
qoffset += pg_encoding_mblen(encoding, &query[qoffset]);
942-
}
943-
qidx[i] = qoffset;
944-
scridx[i] = scroffset;
945-
clen = i;
949+
loc_line = 1;
950+
ibeg = 0;
951+
iend = -1; /* -1 means not set yet */
946952

947-
/* convert loc to zero-based offset in qidx/scridx arrays */
948-
loc--;
949-
950-
/* do we have something to show? */
951-
if (loc >= 0 && loc <= clen)
953+
for (cno = 0; wquery[qoffset] != '\0'; cno++)
952954
{
953-
/* input line number of our syntax error. */
954-
loc_line = 1;
955-
/* first included char of extract. */
956-
ibeg = 0;
957-
/* last-plus-1 included char of extract. */
958-
iend = clen;
955+
char ch = wquery[qoffset];
956+
957+
qidx[cno] = qoffset;
958+
scridx[cno] = scroffset;
959959

960960
/*
961961
* Replace tabs with spaces in the writable copy. (Later we might
962962
* want to think about coping with their variable screen width, but
963963
* not today.)
964-
*
965-
* Extract line number and begin and end indexes of line containing
966-
* error location. There will not be any newlines or carriage returns
967-
* in the selected extract.
968964
*/
969-
for (i = 0; i < clen; i++)
965+
if (ch == '\t')
966+
wquery[qoffset] = ' ';
967+
968+
/*
969+
* If end-of-line, count lines and mark positions. Each \r or \n counts
970+
* as a line except when \r \n appear together.
971+
*/
972+
else if (ch == '\r' || ch == '\n')
970973
{
971-
/* character length must be 1 or it's not ASCII */
972-
if ((qidx[i + 1] - qidx[i]) == 1)
974+
if (cno < loc)
973975
{
974-
if (wquery[qidx[i]] == '\t')
975-
wquery[qidx[i]] = ' ';
976-
else if (wquery[qidx[i]] == '\r' || wquery[qidx[i]] == '\n')
977-
{
978-
if (i < loc)
979-
{
980-
/*
981-
* count lines before loc. Each \r or \n counts
982-
* as a line except when \r \n appear together.
983-
*/
984-
if (wquery[qidx[i]] == '\r' ||
985-
i == 0 ||
986-
(qidx[i] - qidx[i - 1]) != 1 ||
987-
wquery[qidx[i - 1]] != '\r')
988-
loc_line++;
989-
/* extract beginning = last line start before loc. */
990-
ibeg = i + 1;
991-
}
992-
else
993-
{
994-
/* set extract end. */
995-
iend = i;
996-
/* done scanning. */
997-
break;
998-
}
999-
}
976+
if (ch == '\r' ||
977+
cno == 0 ||
978+
wquery[qidx[cno - 1]] != '\r')
979+
loc_line++;
980+
/* extract beginning = last line start before loc. */
981+
ibeg = cno + 1;
982+
}
983+
else
984+
{
985+
/* set extract end. */
986+
iend = cno;
987+
/* done scanning. */
988+
break;
1000989
}
1001990
}
1002991

992+
/* Advance */
993+
if (mb_encoding)
994+
{
995+
int w;
996+
997+
w = pg_encoding_dsplen(encoding, &wquery[qoffset]);
998+
/* treat any non-tab control chars as width 1 */
999+
if (w <= 0)
1000+
w = 1;
1001+
scroffset += w;
1002+
qoffset += pg_encoding_mblen(encoding, &wquery[qoffset]);
1003+
}
1004+
else
1005+
{
1006+
/* We assume wide chars only exist in multibyte encodings */
1007+
scroffset++;
1008+
qoffset++;
1009+
}
1010+
}
1011+
/* Fix up if we didn't find an end-of-line after loc */
1012+
if (iend < 0)
1013+
{
1014+
iend = cno; /* query length in chars, +1 */
1015+
qidx[iend] = qoffset;
1016+
scridx[iend] = scroffset;
1017+
}
1018+
1019+
/* Print only if loc is within computed query length */
1020+
if (loc <= cno)
1021+
{
10031022
/* If the line extracted is too long, we truncate it. */
10041023
beg_trunc = false;
10051024
end_trunc = false;
@@ -1050,7 +1069,8 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
10501069
scroffset = 0;
10511070
for (; i < msg->len; i += pg_encoding_mblen(encoding, &msg->data[i]))
10521071
{
1053-
w = pg_encoding_dsplen(encoding, &msg->data[i]);
1072+
int w = pg_encoding_dsplen(encoding, &msg->data[i]);
1073+
10541074
if (w <= 0)
10551075
w = 1;
10561076
scroffset += w;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy