diff --git a/Makefile b/Makefile
index 0717592f5e..dcfd883319 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \
 	src/rumbtree.o src/rumbulk.o src/rumdatapage.o \
 	src/rumentrypage.o src/rumget.o src/ruminsert.o \
 	src/rumscan.o src/rumutil.o src/rumvacuum.o src/rumvalidate.o \
-	src/btree_rum.o $(WIN32RES)
+	src/btree_rum.o src/tf_idf.o $(WIN32RES)
 
 EXTENSION = rum
 DATA = rum--1.0.sql rum--1.0--1.1.sql rum--1.1.sql
diff --git a/src/rum.h b/src/rum.h
index 78cb8db439..3f48ed4fa7 100644
--- a/src/rum.h
+++ b/src/rum.h
@@ -19,6 +19,7 @@
 #include "access/sdir.h"
 #include "lib/rbtree.h"
 #include "storage/bufmgr.h"
+#include "utils/guc.h"
 
 #include "rumsort.h"
 
@@ -1008,4 +1009,10 @@ extern Datum FunctionCall10Coll(FmgrInfo *flinfo, Oid collation,
 				   Datum arg6, Datum arg7, Datum arg8,
 				   Datum arg9, Datum arg10);
 
+/* tf_idf.c */
+extern char *TFIDFSource;
+extern bool check_tf_idf_source(char **newval, void **extra, GucSource source);
+extern void assign_tf_idf_source(const char *newval, void *extra);
+extern float4 estimate_idf(char *lexeme, int length);
+
 #endif   /* __RUM_H__ */
diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c
index 07faabe42c..d9f79423b2 100644
--- a/src/rum_ts_utils.c
+++ b/src/rum_ts_utils.c
@@ -96,12 +96,13 @@ typedef struct
 		} key;
 	} data;
 	uint8		wclass;
+	float4		idf;
 	int32		pos;
 } DocRepresentation;
 
 typedef struct
 {
-	bool	operandexist;
+	bool			operandexist;
 	WordEntryPos	pos;
 }
 QueryRepresentationOperand;
@@ -140,6 +141,7 @@ static WordEntryPosVector POSNULL = {
 #define RANK_NORM_UNIQ			0x08
 #define RANK_NORM_LOGUNIQ		0x10
 #define RANK_NORM_RDIVRPLUS1	0x20
+#define RANK_NORM_IDF			0x40
 #define DEF_NORM_METHOD			RANK_NO_NORM
 
 #define QR_GET_OPERAND(q, v)	\
@@ -1090,7 +1092,7 @@ find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 }
 
 static DocRepresentation *
-get_docrep(TSVector txt, QueryRepresentation *qr, uint32 *doclen)
+get_docrep(TSVector txt, QueryRepresentation *qr, uint32 *doclen, bool load_idf)
 {
 	QueryItem  *item = GETQUERY(qr->query);
 	WordEntry  *entry,
@@ -1126,6 +1128,8 @@ get_docrep(TSVector txt, QueryRepresentation *qr, uint32 *doclen)
 
 		while (entry - firstentry < nitem)
 		{
+			float4		idf;
+
 			if (entry->haspos)
 			{
 				dimt = POSDATALEN(txt, entry);
@@ -1179,12 +1183,18 @@ get_docrep(TSVector txt, QueryRepresentation *qr, uint32 *doclen)
 
 						}
 					}
+
+					if (load_idf)
+						idf = estimate_idf(STRPTR(txt) + entry->pos, entry->len);
+					else
+						idf = 1.0f;
 				}
 				else
 				{
 					doc[cur].data.item.nitem = doc[cur - 1].data.item.nitem;
 					doc[cur].data.item.item = doc[cur - 1].data.item.item;
 				}
+				doc[cur].idf = idf;
 				doc[cur].pos = WEP_GETPOS(post[j]);
 				doc[cur].wclass = WEP_GETWEIGHT(post[j]);
 				cur++;
@@ -1229,6 +1239,7 @@ calc_score_docr(float4 *arrdata, DocRepresentation *doc, uint32 doclen,
 	{
 		double		Cpos = 0.0;
 		double		InvSum = 0.0;
+		double		Idf = 0.0;
 		int			nNoise;
 		DocRepresentation *ptr = ext.begin;
 		/* Added by SK */
@@ -1247,6 +1258,7 @@ calc_score_docr(float4 *arrdata, DocRepresentation *doc, uint32 doclen,
 			/* For rum_tsquery_distance() */
 			else
 				new_cover_key += (int)(uintptr_t)ptr->data.key.item_first;
+			Idf += ptr->idf;
 			ptr++;
 		}
 
@@ -1286,6 +1298,9 @@ calc_score_docr(float4 *arrdata, DocRepresentation *doc, uint32 doclen,
 		if (nitems > 0)
 			Cpos *= nitems;
 
+		if (method & RANK_NORM_IDF)
+			Cpos *= Idf;
+
 		/*
 		 * if doc are big enough then ext.q may be equal to ext.p due to limit
 		 * of posional information. In this case we approximate number of
@@ -1371,7 +1386,7 @@ calc_score(float4 *arrdata, TSVector txt, TSQuery query, int method)
 	qr.operandData = palloc0(sizeof(qr.operandData[0]) * query->size);
 	qr.length = query->size;
 
-	doc = get_docrep(txt, &qr, &doclen);
+	doc = get_docrep(txt, &qr, &doclen, (method & RANK_NORM_IDF) ? true : false);
 	if (!doc)
 	{
 		pfree(qr.operandData);
diff --git a/src/rumutil.c b/src/rumutil.c
index 25eaaedddd..e67209b578 100644
--- a/src/rumutil.c
+++ b/src/rumutil.c
@@ -49,6 +49,17 @@ _PG_init(void)
 							PGC_USERSET, 0,
 							NULL, NULL, NULL);
 
+	DefineCustomStringVariable("tf_tdf_source",
+							   "Source statistics for TD/IFD calculation.",
+							   "",
+							   &TFIDFSource,
+							   "",
+							   PGC_USERSET,
+							   0,
+							   check_tf_idf_source,
+							   assign_tf_idf_source,
+							   NULL);
+
 	rum_relopt_kind = add_reloption_kind();
 
 	add_string_reloption(rum_relopt_kind, "attach",
diff --git a/src/tf_idf.c b/src/tf_idf.c
new file mode 100644
index 0000000000..1c14ef2d04
--- /dev/null
+++ b/src/tf_idf.c
@@ -0,0 +1,346 @@
+/*-------------------------------------------------------------------------
+ *
+ * tf_idf.c
+ *		Implementation of TD/IDF statistics calculation.
+ *
+ * Portions Copyright (c) 2017, Postgres Professional
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
+#include "nodes/nodeFuncs.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+#include "utils/varlena.h"
+
+#include "rum.h"
+
+/*
+ * FIXME:
+ *  * cache IDF for ts_query (non-prefix search?)
+ *  * calculate IDF from RUM index
+ */
+
+/* lookup table type for binary searching through MCELEMs */
+typedef struct
+{
+	text	   *element;
+	float4		frequency;
+} TextFreq;
+
+/* type of keys for bsearch'ing through an array of TextFreqs */
+typedef struct
+{
+	char	   *lexeme;
+	int			length;
+} LexemeKey;
+
+typedef struct
+{
+	TextFreq   *lookup;
+	int			nmcelem;
+	float4		minfreq;
+} MCelemStats;
+
+typedef struct
+{
+	Oid			relId;
+	AttrNumber	attrno;
+} RelAttrInfo;
+
+char				   *TFIDFSource;
+static RelAttrInfo		TFIDFSourceParsed;
+static bool				TDIDFLoaded = false;
+static MemoryContext	TFIDFContext = NULL;
+static MCelemStats		TDIDFStats;
+
+#define EXIT_CHECK_TF_IDF_SOURCE(error) \
+	do { \
+		GUC_check_errdetail(error); \
+		pfree(rawname); \
+		list_free(namelist); \
+		if (rel) \
+			RelationClose(rel); \
+		return false; \
+	} while (false);
+
+static void load_tf_idf_source(void);
+static void check_load_tf_idf_source(void);
+static void forget_tf_idf_stats(void);
+static int	compare_lexeme_textfreq(const void *e1, const void *e2);
+
+bool
+check_tf_idf_source(char **newval, void **extra, GucSource source)
+{
+	char			   *rawname;
+	char			   *attname;
+	List			   *namelist;
+	Oid					namespaceId;
+	Oid					relId;
+	Relation			rel = NULL;
+	AttrNumber			attrno;
+	int					i;
+	RelAttrInfo		   *myextra;
+
+	/* Need a modifiable copy of string */
+	rawname = pstrdup(*newval);
+
+	/* Parse string into list of identifiers */
+	if (!SplitIdentifierString(rawname, '.', &namelist))
+	{
+		/* syntax error in name list */
+		EXIT_CHECK_TF_IDF_SOURCE("List syntax is invalid.");
+	}
+
+	switch (list_length(namelist))
+	{
+		case 0:
+			return true;
+		case 1:
+			EXIT_CHECK_TF_IDF_SOURCE("improper column name (there should be at least 2 dotted names)");
+		case 2:
+			relId = RelnameGetRelid(linitial(namelist));
+			attname = lsecond(namelist);
+			break;
+		case 3:
+			/* use exact schema given */
+			namespaceId = LookupExplicitNamespace(linitial(namelist), true);
+			if (!OidIsValid(namespaceId))
+				relId = InvalidOid;
+			else
+				relId = get_relname_relid(lsecond(namelist), namespaceId);
+			attname = lthird(namelist);
+			break;
+		default:
+			EXIT_CHECK_TF_IDF_SOURCE("improper column name (too many dotted names)");
+	}
+
+	if (!OidIsValid(relId))
+		EXIT_CHECK_TF_IDF_SOURCE("relation not found");
+
+	rel = RelationIdGetRelation(relId);
+	if (rel->rd_rel->relkind == RELKIND_INDEX)
+	{
+		int		exprnum = 0;
+
+		attrno = pg_atoi(attname, sizeof(attrno), 10);
+		if (attrno <= 0 || attrno > rel->rd_index->indnatts)
+			EXIT_CHECK_TF_IDF_SOURCE("wrong index attribute number");
+		if (rel->rd_index->indkey.values[attrno - 1] != InvalidAttrNumber)
+			EXIT_CHECK_TF_IDF_SOURCE("regular indexed column is specified");
+		for (i = 0; i < attrno - 1; i++)
+		{
+			if (rel->rd_index->indkey.values[i] == InvalidAttrNumber)
+				exprnum++;
+		}
+		RelationGetIndexExpressions(rel);
+		if (exprType((Node *) list_nth(rel->rd_indexprs, exprnum)) != TSVECTOROID)
+			EXIT_CHECK_TF_IDF_SOURCE("indexed expression should be of tsvector type");
+	}
+	else
+	{
+		TupleDesc	tupDesc = rel->rd_att;
+
+		attrno = InvalidAttrNumber;
+		for (i = 0; i < tupDesc->natts; i++)
+		{
+			if (namestrcmp(&(tupDesc->attrs[i]->attname), attname) == 0)
+			{
+				attrno = tupDesc->attrs[i]->attnum;
+				break;
+			}
+		}
+		if (attrno == InvalidAttrNumber)
+			EXIT_CHECK_TF_IDF_SOURCE("attribute not found");
+		if (tupDesc->attrs[attrno - 1]->atttypid != TSVECTOROID)
+			EXIT_CHECK_TF_IDF_SOURCE("attribute should be of tsvector type");
+	}
+
+
+	myextra = (RelAttrInfo *) malloc(sizeof(RelAttrInfo));
+	myextra->relId = relId;
+	myextra->attrno = attrno;
+	*extra = (void *) myextra;
+
+	pfree(rawname);
+	list_free(namelist);
+	RelationClose(rel);
+	return true;
+}
+
+
+void
+assign_tf_idf_source(const char *newval, void *extra)
+{
+	RelAttrInfo  *myextra = (RelAttrInfo *) extra;
+
+	if (myextra)
+	{
+		TFIDFSourceParsed = *myextra;
+	}
+	else
+	{
+		TFIDFSourceParsed.relId = InvalidOid;
+		TFIDFSourceParsed.attrno = InvalidAttrNumber;
+	}
+
+	forget_tf_idf_stats();
+}
+
+static void
+load_tf_idf_source(void)
+{
+	HeapTuple		statsTuple;
+	AttStatsSlot	sslot;
+	MemoryContext	oldContext;
+	int				i;
+
+	if (!TFIDFContext)
+		TFIDFContext = AllocSetContextCreate(TopMemoryContext,
+											 "Memory context for TF/IDF statistics",
+											 ALLOCSET_DEFAULT_SIZES);
+
+	if (!OidIsValid(TFIDFSourceParsed.relId)
+		|| TFIDFSourceParsed.attrno == InvalidAttrNumber)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("statistics for TD/IDF is not defined"),
+				 errhint("consider setting tf_idf_source GUC")));
+	}
+
+	statsTuple = SearchSysCache3(STATRELATTINH,
+								 ObjectIdGetDatum(TFIDFSourceParsed.relId),
+								 Int16GetDatum(TFIDFSourceParsed.attrno),
+								 BoolGetDatum(true));
+
+	if (!statsTuple)
+		statsTuple = SearchSysCache3(STATRELATTINH,
+									 ObjectIdGetDatum(TFIDFSourceParsed.relId),
+									 Int16GetDatum(TFIDFSourceParsed.attrno),
+									 BoolGetDatum(false));
+
+	MemoryContextReset(TFIDFContext);
+	TDIDFLoaded = false;
+
+	oldContext = MemoryContextSwitchTo(TFIDFContext);
+
+	if (!statsTuple
+		|| !get_attstatsslot(&sslot, statsTuple,
+							 STATISTIC_KIND_MCELEM, InvalidOid,
+							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
+		|| sslot.nnumbers != sslot.nvalues + 2)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("statistics for TD/IDF is not found"),
+				 errhint("consider running ANALYZE")));
+	}
+
+	TDIDFStats.nmcelem = sslot.nvalues;
+	TDIDFStats.minfreq = sslot.numbers[sslot.nnumbers - 2];
+	/*
+	 * Transpose the data into a single array so we can use bsearch().
+	 */
+	TDIDFStats.lookup = (TextFreq *) palloc(sizeof(TextFreq) * TDIDFStats.nmcelem);
+	for (i = 0; i < TDIDFStats.nmcelem; i++)
+	{
+		/*
+		 * The text Datums came from an array, so it cannot be compressed or
+		 * stored out-of-line -- it's safe to use VARSIZE_ANY*.
+		 */
+		Assert(!VARATT_IS_COMPRESSED(sslot.values[i]) && !VARATT_IS_EXTERNAL(sslot.values[i]));
+		TDIDFStats.lookup[i].element = (text *) DatumGetPointer(sslot.values[i]);
+		TDIDFStats.lookup[i].frequency = sslot.numbers[i];
+	}
+
+	MemoryContextSwitchTo(oldContext);
+
+	TDIDFLoaded = true;
+
+	ReleaseSysCache(statsTuple);
+}
+
+static void
+check_load_tf_idf_source(void)
+{
+	if (!TDIDFLoaded)
+		load_tf_idf_source();
+}
+
+static void
+forget_tf_idf_stats(void)
+{
+	if (TFIDFContext)
+		MemoryContextReset(TFIDFContext);
+	TDIDFLoaded = false;
+}
+
+/*
+ * bsearch() comparator for a lexeme (non-NULL terminated string with length)
+ * and a TextFreq. Use length, then byte-for-byte comparison, because that's
+ * how ANALYZE code sorted data before storing it in a statistic tuple.
+ * See ts_typanalyze.c for details.
+ */
+static int
+compare_lexeme_textfreq(const void *e1, const void *e2)
+{
+	const LexemeKey *key = (const LexemeKey *) e1;
+	const TextFreq *t = (const TextFreq *) e2;
+	int			len1,
+				len2;
+
+	len1 = key->length;
+	len2 = VARSIZE_ANY_EXHDR(t->element);
+
+	/* Compare lengths first, possibly avoiding a strncmp call */
+	if (len1 > len2)
+		return 1;
+	else if (len1 < len2)
+		return -1;
+
+	/* Fall back on byte-for-byte comparison */
+	return strncmp(key->lexeme, VARDATA_ANY(t->element), len1);
+}
+
+float4
+estimate_idf(char *lexeme, int length)
+{
+	TextFreq   *searchres;
+	LexemeKey	key;
+	float4		selec;
+
+	check_load_tf_idf_source();
+
+	key.lexeme = lexeme;
+	key.length = length;
+
+	searchres = (TextFreq *) bsearch(&key, TDIDFStats.lookup, TDIDFStats.nmcelem,
+									 sizeof(TextFreq),
+									 compare_lexeme_textfreq);
+
+	if (searchres)
+	{
+		/*
+		 * The element is in MCELEM.  Return precise selectivity (or
+		 * at least as precise as ANALYZE could find out).
+		 */
+		selec = searchres->frequency;
+	}
+	else
+	{
+		/*
+		 * The element is not in MCELEM.  Punt, but assume that the
+		 * selectivity cannot be more than minfreq / 2.
+		 */
+		selec = TDIDFStats.minfreq / 2;
+	}
+
+	return 1.0f / selec;
+}

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://rainy.clevelandohioweatherforecast.com/php-proxy/index.php?q=https://github.com/postgrespro/rum/compare/master...tf_idf.diff" target="_blank">Alternative Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/index.php?u=https://github.com/postgrespro/rum/compare/master...tf_idf.diff" target="_blank">pFad Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v3index.php?u=https://github.com/postgrespro/rum/compare/master...tf_idf.diff" target="_blank">pFad v3 Proxy</a></p><p><a href="http://rainy.clevelandohioweatherforecast.com/pFad/v4index.php?u=https://github.com/postgrespro/rum/compare/master...tf_idf.diff" target="_blank">pFad v4 Proxy</a></p></body>
</html>