diff --git a/pgml-cms/docs/SUMMARY.md b/pgml-cms/docs/SUMMARY.md
index 6c8d97eb8..d93e0ac0a 100644
--- a/pgml-cms/docs/SUMMARY.md
+++ b/pgml-cms/docs/SUMMARY.md
@@ -18,10 +18,10 @@
* [SQL extension](api/sql-extension/README.md)
* [pgml.embed()](api/sql-extension/pgml.embed.md)
* [pgml.transform()](api/sql-extension/pgml.transform/README.md)
- * [Fill Mask](api/sql-extension/pgml.transform/fill-mask.md)
- * [Question Answering](api/sql-extension/pgml.transform/question-answering.md)
+ * [Fill-Mask](api/sql-extension/pgml.transform/fill-mask.md)
+ * [Question answering](api/sql-extension/pgml.transform/question-answering.md)
* [Summarization](api/sql-extension/pgml.transform/summarization.md)
- * [Text Classification](api/sql-extension/pgml.transform/text-classification.md)
+ * [Text classification](api/sql-extension/pgml.transform/text-classification.md)
* [Text Generation](api/sql-extension/pgml.transform/text-generation.md)
* [Text-to-Text Generation](api/sql-extension/pgml.transform/text-to-text-generation.md)
* [Token Classification](api/sql-extension/pgml.transform/token-classification.md)
diff --git a/pgml-cms/docs/api/sql-extension/pgml.embed.md b/pgml-cms/docs/api/sql-extension/pgml.embed.md
index a8f57f9dc..b31c944b3 100644
--- a/pgml-cms/docs/api/sql-extension/pgml.embed.md
+++ b/pgml-cms/docs/api/sql-extension/pgml.embed.md
@@ -22,7 +22,7 @@ pgml.embed(
|----------|-------------|---------|
| transformer | The name of a Hugging Face embedding model. | `intfloat/e5-large-v2` |
| text | The text to embed. This can be a string or the name of a column from a PostgreSQL table. | `'I am your father, Luke'` |
-| kwargs | Additional arguments that are passed to the model. | |
+| kwargs | Additional arguments that are passed to the model during inference. | |
### Examples
@@ -43,7 +43,7 @@ SELECT * FROM pgml.embed(
{% endtab %}
{% endtabs %}
-#### Generate embeddings from a table
+#### Generate embeddings inside a table
SQL functions can be used as part of a query to insert, update, or even automatically generate column values of any table:
@@ -96,9 +96,3 @@ LIMIT 1;
{% endtabs %}
This query will return the quote with the most similar meaning to `'Feel the force!'` by generating an embedding of that quote and comparing it to all other embeddings in the table, using vector cosine similarity as the measure of distance.
-
-## Performance
-
-First time `pgml.embed()` is called with a new model, it is downloaded from Hugging Face and saved in the cache directory. Subsequent calls will use the cached model, which is faster, and if the connection to the database is kept open, the model will be reused across multiple queries without being unloaded from memory.
-
-If a GPU is available, the model will be automatically loaded onto the GPU and the embedding generation will be even faster.
diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/README.md b/pgml-cms/docs/api/sql-extension/pgml.transform/README.md
index 101ee0828..9e13f5c2a 100644
--- a/pgml-cms/docs/api/sql-extension/pgml.transform/README.md
+++ b/pgml-cms/docs/api/sql-extension/pgml.transform/README.md
@@ -27,7 +27,7 @@ The `pgml.transform()` function comes in two flavors, task-based and model-based
### Task-based API
-The task-based API automatically chooses a model to use based on the task:
+The task-based API automatically chooses a model based on the task:
```postgresql
pgml.transform(
@@ -37,22 +37,34 @@ pgml.transform(
)
```
-| Argument | Description | Example |
-|----------|-------------|---------|
-| task | The name of a natural language processing task. | `text-generation` |
-| args | Additional kwargs to pass to the pipeline. | `{"max_new_tokens": 50}` |
-| inputs | Array of prompts to pass to the model for inference. | `['Once upon a time...']` |
+| Argument | Description | Example | Required |
+|----------|-------------|---------|----------|
+| task | The name of a natural language processing task. | `'text-generation'` | Required |
+| args | Additional kwargs to pass to the pipeline. | `'{"max_new_tokens": 50}'::JSONB` | Optional |
+| inputs | Array of prompts to pass to the model for inference. Each prompt is evaluated independently and a separate result is returned. | `ARRAY['Once upon a time...']` | Required |
-#### Example
+#### Examples
{% tabs %}
-{% tab title="SQL" %}
+{% tabs %}
+{% tab title="Text generation" %}
+
+```postgresql
+SELECT *
+FROM pgml.transform(
+ task => 'text-generation',
+ inputs => ARRAY['In a galaxy far far away']
+);
+```
+
+{% endtab %}
+{% tab title="Translation" %}
```postgresql
SELECT *
-FROM pgml.transform (
- 'translation_en_to_fr',
- 'How do I say hello in French?',
+FROM pgml.transform(
+ task => 'translation_en_to_fr',
+ inputs => ARRAY['How do I say hello in French?']
);
```
@@ -61,7 +73,7 @@ FROM pgml.transform (
### Model-based API
-The model-based API requires the name of the model and the task, passed as a JSON object, which allows it to be more generic:
+The model-based API requires the name of the model and the task, passed as a JSON object. This allows it to be more generic and support more models:
```postgresql
pgml.transform(
@@ -71,16 +83,41 @@ pgml.transform(
)
```
-| Argument | Description | Example |
-|----------|-------------|---------|
-| task | Model configuration, including name and task. | `{"task": "text-generation", "model": "mistralai/Mixtral-8x7B-v0.1"}` |
-| args | Additional kwargs to pass to the pipeline. | `{"max_new_tokens": 50}` |
-| inputs | Array of prompts to pass to the model for inference. | `['Once upon a time...']` |
+
+
+ Argument |
+ Description |
+ Example |
+
+
+
+ model |
+ Model configuration, including name and task. |
+
+
+ '{
+ "task": "text-generation",
+ "model": "mistralai/Mixtral-8x7B-v0.1"
+ }'::JSONB
+
+ |
+
+
+ args |
+ Additional kwargs to pass to the pipeline. |
+ '{"max_new_tokens": 50}'::JSONB |
+
+
+ inputs |
+ Array of prompts to pass to the model for inference. Each prompt is evaluated independently. |
+ ARRAY['Once upon a time...'] |
+
+
#### Example
{% tabs %}
-{% tab title="SQL" %}
+{% tab title="PostgresML SQL" %}
```postgresql
SELECT pgml.transform(
@@ -89,8 +126,9 @@ SELECT pgml.transform(
"model": "TheBloke/zephyr-7B-beta-GPTQ",
"model_type": "mistral",
"revision": "main",
+ "device_map": "auto"
}'::JSONB,
- inputs => ['AI is going to change the world in the following ways:'],
+ inputs => ARRAY['AI is going to'],
args => '{
"max_new_tokens": 100
}'::JSONB
@@ -138,11 +176,12 @@ PostgresML currently supports most NLP tasks available on Hugging Face:
| [Token classification](token-classification) | `token-classification` | Classify tokens in a text. |
| [Translation](translation) | `translation` | Translate text from one language to another. |
| [Zero-shot classification](zero-shot-classification) | `zero-shot-classification` | Classify a text without training data. |
+| Conversational | `conversational` | Engage in a conversation with the model, e.g. chatbot. |
+### Structured inputs
-## Performance
+Both versions of the `pgml.transform()` function also support structured inputs, formatted with JSON. Structured inputs are used with the conversational task, e.g. to differentiate between the system and user prompts. Simply replace the text array argument with an array of JSONB objects.
-Much like `pgml.embed()`, the models used in `pgml.transform()` are downloaded from Hugging Face and cached locally. If the connection to the database is kept open, the model remains in memory, which allows for faster inference on subsequent calls. If you want to free up memory, you can close the connection.
## Additional resources
diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/fill-mask.md b/pgml-cms/docs/api/sql-extension/pgml.transform/fill-mask.md
index 7d7ed9948..07775258f 100644
--- a/pgml-cms/docs/api/sql-extension/pgml.transform/fill-mask.md
+++ b/pgml-cms/docs/api/sql-extension/pgml.transform/fill-mask.md
@@ -2,9 +2,14 @@
description: Task to fill words in a sentence that are hidden
---
-# Fill Mask
+# Fill-Mask
-Fill-mask refers to a task where certain words in a sentence are hidden or "masked", and the objective is to predict what words should fill in those masked positions. Such models are valuable when we want to gain statistical insights about the language used to train the model.
+Fill-Mask is a task where certain words in a sentence are hidden or "masked", and the objective for the model is to predict what words should fill in those masked positions. Such models are valuable when we want to gain statistical insights about the language used to train the model.
+
+## Example
+
+{% tabs %}
+{% tab title="SQL" %}
```sql
SELECT pgml.transform(
@@ -12,19 +17,54 @@ SELECT pgml.transform(
"task" : "fill-mask"
}'::JSONB,
inputs => ARRAY[
- 'Paris is the of France.'
+ 'Paris is the <mask> of France.'
]
) AS answer;
```
-_Result_
+{% endtab %}
+
+{% tab title="Result" %}
```json
[
- {"score": 0.679, "token": 812, "sequence": "Paris is the capital of France.", "token_str": " capital"},
- {"score": 0.051, "token": 32357, "sequence": "Paris is the birthplace of France.", "token_str": " birthplace"},
- {"score": 0.038, "token": 1144, "sequence": "Paris is the heart of France.", "token_str": " heart"},
- {"score": 0.024, "token": 29778, "sequence": "Paris is the envy of France.", "token_str": " envy"},
- {"score": 0.022, "token": 1867, "sequence": "Paris is the Capital of France.", "token_str": " Capital"}]
+ {
+ "score": 0.6811484098434448,
+ "token": 812,
+ "sequence": "Paris is the capital of France.",
+ "token_str": " capital"
+ },
+ {
+ "score": 0.050908513367176056,
+ "token": 32357,
+ "sequence": "Paris is the birthplace of France.",
+ "token_str": " birthplace"
+ },
+ {
+ "score": 0.03812871500849724,
+ "token": 1144,
+ "sequence": "Paris is the heart of France.",
+ "token_str": " heart"
+ },
+ {
+ "score": 0.024047480896115303,
+ "token": 29778,
+ "sequence": "Paris is the envy of France.",
+ "token_str": " envy"
+ },
+ {
+ "score": 0.022767696529626846,
+ "token": 1867,
+ "sequence": "Paris is the Capital of France.",
+ "token_str": " Capital"
+ }
+]
```
+
+{% endtab %}
+{% endtabs %}
+
+### Additional resources
+
+- [Hugging Face documentation](https://huggingface.co/tasks/fill-mask)
diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/question-answering.md b/pgml-cms/docs/api/sql-extension/pgml.transform/question-answering.md
index 5118327a4..9dfd41246 100644
--- a/pgml-cms/docs/api/sql-extension/pgml.transform/question-answering.md
+++ b/pgml-cms/docs/api/sql-extension/pgml.transform/question-answering.md
@@ -1,10 +1,15 @@
---
-description: Retrieve the answer to a question from a given text
+description: Retrieve the answer to a question from a given text.
---
-# Question Answering
+# Question answering
-Question Answering models are designed to retrieve the answer to a question from a given text, which can be particularly useful for searching for information within a document. It's worth noting that some question answering models are capable of generating answers even without any contextual information.
+Question answering models are designed to retrieve the answer to a question from a given text, which can be particularly useful for searching for information within a document. It's worth noting that some question answering models are capable of generating answers even without any contextual information.
+
+## Example
+
+{% tabs %}
+{% tab title="SQL" %}
```sql
SELECT pgml.transform(
@@ -18,7 +23,9 @@ SELECT pgml.transform(
) AS answer;
```
-_Result_
+{% endtab %}
+
+{% tab title="Result" %}
```json
{
@@ -28,3 +35,11 @@ _Result_
"answer": "İstanbul"
}
```
+
+{% endtab %}
+{% endtabs %}
+
+
+### Additional resources
+
+- [Hugging Face documentation](https://huggingface.co/tasks/question-answering)
diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/summarization.md b/pgml-cms/docs/api/sql-extension/pgml.transform/summarization.md
index 90c303cd8..b37a406ec 100644
--- a/pgml-cms/docs/api/sql-extension/pgml.transform/summarization.md
+++ b/pgml-cms/docs/api/sql-extension/pgml.transform/summarization.md
@@ -1,53 +1,46 @@
---
-description: Task of creating a condensed version of a document
+description: Task of creating a condensed version of a document.
---
# Summarization
Summarization involves creating a condensed version of a document that includes the important information while reducing its length. Different models can be used for this task, with some models extracting the most relevant text from the original document, while other models generate completely new text that captures the essence of the original content.
+## Example
+
+{% tabs %}
+{% tab title="SQL" %}
+
```sql
SELECT pgml.transform(
- task => '{"task": "summarization",
- "model": "sshleifer/distilbart-cnn-12-6"
+ task => '{
+ "task": "summarization",
+ "model": "google/pegasus-xsum"
}'::JSONB,
- inputs => array[
- 'Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018, in an area of more than 105 square kilometres (41 square miles). The City of Paris is the centre and seat of government of the region and province of Île-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017.'
- ]
+ inputs => array[
+ 'Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018,
+ in an area of more than 105 square kilometres (41 square miles). The City of Paris is the centre and seat of government
+ of the region and province of Île-de-France, or Paris Region, which has an estimated population of 12,174,880,
+ or about 18 percent of the population of France as of 2017.'
+ ]
);
```
-_Result_
+{% endtab %}
+{% tab title="Result" %}
```json
[
- {
- "summary_text": "Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018 . The city is the centre and seat of government of the region and province of Île-de-France, or Paris Region . Paris Region has an estimated 18 percent of the population of France as of 2017 ."
- }
+ {
+ "summary_text": "The City of Paris is the centre and seat of government of the region and province of le-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017."
+ }
]
```
-You can control the length of summary\_text by passing `min_length` and `max_length` as arguments to the SQL query.
+{% endtab %}
+{% endtabs %}
-```sql
-SELECT pgml.transform(
- task => '{"task": "summarization",
- "model": "sshleifer/distilbart-cnn-12-6"
- }'::JSONB,
- inputs => array[
- 'Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018, in an area of more than 105 square kilometres (41 square miles). The City of Paris is the centre and seat of government of the region and province of Île-de-France, or Paris Region, which has an estimated population of 12,174,880, or about 18 percent of the population of France as of 2017.'
- ],
- args => '{
- "min_length" : 20,
- "max_length" : 70
- }'::JSONB
-);
-```
+### Additional resources
-```json
-[
- {
- "summary_text": " Paris is the capital and most populous city of France, with an estimated population of 2,175,601 residents as of 2018 . City of Paris is centre and seat of government of the region and province of Île-de-France, or Paris Region, which has an estimated 12,174,880, or about 18 percent"
- }
-]
-```
+- [Hugging Face documentation](https://huggingface.co/tasks/summarization)
+- [google/pegasus-xsum](https://huggingface.co/google/pegasus-xsum)
diff --git a/pgml-cms/docs/api/sql-extension/pgml.transform/text-classification.md b/pgml-cms/docs/api/sql-extension/pgml.transform/text-classification.md
index 2a378e3f1..eb670b267 100644
--- a/pgml-cms/docs/api/sql-extension/pgml.transform/text-classification.md
+++ b/pgml-cms/docs/api/sql-extension/pgml.transform/text-classification.md
@@ -2,15 +2,18 @@
description: Task that involves assigning a label or category to a given text.
---
-# Text Classification
+# Text classification
-Common use cases include sentiment analysis, natural language inference, and the assessment of grammatical correctness. It has a wide range of applications in fields such as marketing, customer service, and political analysis
+Text classification is a task which includes sentiment analysis, natural language inference, and the assessment of grammatical correctness. It has a wide range of applications in fields such as marketing, customer service, and political analysis.
-### Sentiment Analysis
+### Sentiment analysis
-Sentiment analysis is a type of natural language processing technique that involves analyzing a piece of text to determine the sentiment or emotion expressed within it. It can be used to classify a text as positive, negative, or neutral.
+Sentiment analysis is a type of natural language processing technique which analyzes a piece of text to determine the sentiment or emotion expressed within. It can be used to classify a text as positive, negative, or neutral.
-_Basic usage_
+#### Example
+
+{% tabs %}
+{% tab title="SQL" %}
```sql
SELECT pgml.transform(
@@ -22,7 +25,8 @@ SELECT pgml.transform(
) AS positivity;
```
-_Result_
+{% endtab %}
+{% tab title="Result" %}
```json
[
@@ -31,25 +35,37 @@ _Result_
]
```
-The default [model](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english) used for text classification is a fine-tuned version of DistilBERT-base-uncased that has been specifically optimized for the Stanford Sentiment Treebank dataset (sst2).
+{% endtab %}
+{% endtabs %}
+
+
+Currently, the default model used for text classification is a [fine-tuned version](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english) of DistilBERT-base-uncased that has been specifically optimized for the [Stanford Sentiment Treebank dataset (sst2)](https://huggingface.co/datasets/stanfordnlp/sst2).
+
+#### Using a specific model
-#### _Using specific model_
+To use one of the [thousands of models]((https://huggingface.co/models?pipeline\_tag=text-classification)) available on Hugging Face, include the name of the desired model and `text-classification` task as a JSONB object in the SQL query.
-To use one of the over 19,000 models available on Hugging Face, include the name of the desired model and `text-classification` task as a JSONB object in the SQL query. For example, if you want to use a RoBERTa [model](https://huggingface.co/models?pipeline\_tag=text-classification) trained on around 40,000 English tweets and that has POS (positive), NEG (negative), and NEU (neutral) labels for its classes, include this information in the JSONB object when making your query.
+For example, if you want to use a RoBERTa model trained on around 40,000 English tweets and that has POS (positive), NEG (negative), and NEU (neutral) labels for its classes, include it in the query:
+
+{% tabs %}
+{% tab title="SQL" %}
```sql
SELECT pgml.transform(
+ task => '{
+ "task": "text-classification",
+ "model": "finiteautomata/bertweet-base-sentiment-analysis"
+ }'::JSONB,
inputs => ARRAY[
'I love how amazingly simple ML has become!',
'I hate doing mundane and thankless tasks. ☹️'
- ],
- task => '{"task": "text-classification",
- "model": "finiteautomata/bertweet-base-sentiment-analysis"
- }'::JSONB
+ ]
+
) AS positivity;
```
-_Result_
+{% endtab %}
+{% tab title="Result" %}
```json
[
@@ -58,23 +74,33 @@ _Result_
]
```
-#### _Using industry specific model_
+{% endtab %}
+{% endtabs %}
+
-By selecting a model that has been specifically designed for a particular industry, you can achieve more accurate and relevant text classification. An example of such a model is [FinBERT](https://huggingface.co/ProsusAI/finbert), a pre-trained NLP model that has been optimized for analyzing sentiment in financial text. FinBERT was created by training the BERT language model on a large financial corpus, and fine-tuning it to specifically classify financial sentiment. When using FinBERT, the model will provide softmax outputs for three different labels: positive, negative, or neutral.
+
+#### Using an industry-specific model
+
+By selecting a model that has been specifically designed for a particular subject, you can achieve more accurate and relevant text classification. An example of such a model is [FinBERT](https://huggingface.co/ProsusAI/finbert), a pre-trained NLP model that has been optimized for analyzing sentiment in financial text. FinBERT was created by training the BERT language model on a large financial corpus, and fine-tuning it to specifically classify financial sentiment. When using FinBERT, the model will provide softmax outputs for three different labels: positive, negative, or neutral.
+
+{% tabs %}
+{% tab title="SQL" %}
```sql
SELECT pgml.transform(
+ task => '{
+ "task": "text-classification",
+ "model": "ProsusAI/finbert"
+ }'::JSONB,
inputs => ARRAY[
'Stocks rallied and the British pound gained.',
'Stocks making the biggest moves midday: Nvidia, Palantir and more'
- ],
- task => '{"task": "text-classification",
- "model": "ProsusAI/finbert"
- }'::JSONB
+ ]
) AS market_sentiment;
```
-_Result_
+{% endtab %}
+{% tab title="Result" %}
```json
[
@@ -83,30 +109,42 @@ _Result_
]
```
+{% endtab %}
+{% endtabs %}
+
+
### Natural Language Inference (NLI)
NLI, or Natural Language Inference, is a type of model that determines the relationship between two texts. The model takes a premise and a hypothesis as inputs and returns a class, which can be one of three types:
-* Entailment: This means that the hypothesis is true based on the premise.
-* Contradiction: This means that the hypothesis is false based on the premise.
-* Neutral: This means that there is no relationship between the hypothesis and the premise.
+| Class | Description |
+|-------|-------------|
+| Entailment | The hypothesis is true based on the premise. |
+| Contradiction | The hypothesis is false based on the premise. |
+| Neutral | There is no relationship between the hypothesis and the premise. |
-The GLUE dataset is the benchmark dataset for evaluating NLI models. There are different variants of NLI models, such as Multi-Genre NLI, Question NLI, and Winograd NLI.
-If you want to use an NLI model, you can find them on the :hugs: Hugging Face model hub. Look for models with "mnli".
+The [GLUE dataset](https://huggingface.co/datasets/nyu-mll/glue) is the benchmark dataset for evaluating NLI models. There are different variants of NLI models, such as Multi-Genre NLI, Question NLI, and Winograd NLI.
+
+If you want to use an NLI model, you can find them on the Hugging Face. When searching for the model, look for models with "mnli" in their name, for example:
+
+{% tabs %}
+{% tab title="SQL" %}
```sql
SELECT pgml.transform(
+ task => '{
+ "task": "text-classification",
+ "model": "roberta-large-mnli"
+ }'::JSONB,
inputs => ARRAY[
'A soccer game with multiple males playing. Some men are playing a sport.'
- ],
- task => '{"task": "text-classification",
- "model": "roberta-large-mnli"
- }'::JSONB
+ ]
) AS nli;
```
-_Result_
+{% endtab %}
+{% tab title="Result" %}
```json
[
@@ -114,24 +152,32 @@ _Result_
]
```
+{% endtab %}
+{% endtabs %}
+
### Question Natural Language Inference (QNLI)
The QNLI task involves determining whether a given question can be answered by the information in a provided document. If the answer can be found in the document, the label assigned is "entailment". Conversely, if the answer cannot be found in the document, the label assigned is "not entailment".
-If you want to use an QNLI model, you can find them on the :hugs: Hugging Face model hub. Look for models with "qnli".
+If you want to use an QNLI model, you can find them on the Hugging Face, by looking for models with "qnli" in their name, for example:
+
+{% tabs %}
+{% tab title="SQL" %}
```sql
SELECT pgml.transform(
+ task => '{
+ "task": "text-classification",
+ "model": "cross-encoder/qnli-electra-base"
+ }'::JSONB,
inputs => ARRAY[
- 'Where is the capital of France?, Paris is the capital of France.'
- ],
- task => '{"task": "text-classification",
- "model": "cross-encoder/qnli-electra-base"
- }'::JSONB
+ 'Where is the capital of France? Paris is the capital of France.'
+ ]
) AS qnli;
```
-_Result_
+{% endtab %}
+{% tab title="Result" %}
```json
[
@@ -139,24 +185,32 @@ _Result_
]
```
+{% endtab %}
+{% endtabs %}
+
### Quora Question Pairs (QQP)
-The Quora Question Pairs model is designed to evaluate whether two given questions are paraphrases of each other. This model takes the two questions and assigns a binary value as output. LABEL\_0 indicates that the questions are paraphrases of each other and LABEL\_1 indicates that the questions are not paraphrases. The benchmark dataset used for this task is the Quora Question Pairs dataset within the GLUE benchmark, which contains a collection of question pairs and their corresponding labels.
+The Quora Question Pairs model is designed to evaluate whether two given questions are paraphrases of each other. This model takes the two questions and assigns a binary value as output. `LABEL_0` indicates that the questions are paraphrases of each other and `LABEL_1` indicates that the questions are not paraphrases. The benchmark dataset used for this task is the [Quora Question Pairs](https://huggingface.co/datasets/quora) dataset within the GLUE benchmark, which contains a collection of question pairs and their corresponding labels.
-If you want to use an QQP model, you can find them on the :hugs: Hugging Face model hub. Look for models with `qqp`.
+If you want to use an QQP model, you can find them on Hugging Face, by looking for models with `qqp` in their name, for example:
+
+{% tabs %}
+{% tab title="SQL" %}
```sql
SELECT pgml.transform(
+ task => '{
+ "task": "text-classification",
+ "model": "textattack/bert-base-uncased-QQP"
+ }'::JSONB,
inputs => ARRAY[
- 'Which city is the capital of France?, Where is the capital of France?'
- ],
- task => '{"task": "text-classification",
- "model": "textattack/bert-base-uncased-QQP"
- }'::JSONB
+ 'Which city is the capital of France? Where is the capital of France?'
+ ]
) AS qqp;
```
-_Result_
+{% endtab %}
+{% tab title="Result" %}
```json
[
@@ -164,27 +218,38 @@ _Result_
]
```
-### Grammatical Correctness
+{% endtab %}
+{% endtabs %}
+
+### Grammatical correctness
-Linguistic Acceptability is a task that involves evaluating the grammatical correctness of a sentence. The model used for this task assigns one of two classes to the sentence, either "acceptable" or "unacceptable". LABEL\_0 indicates acceptable and LABEL\_1 indicates unacceptable. The benchmark dataset used for training and evaluating models for this task is the Corpus of Linguistic Acceptability (CoLA), which consists of a collection of texts along with their corresponding labels.
+Linguistic Acceptability is a task that involves evaluating the grammatical correctness of a sentence. The model used for this task assigns one of two classes to the sentence, either "acceptable" or "unacceptable". `LABEL_0` indicates acceptable and `LABEL_1` indicates unacceptable. The benchmark dataset used for training and evaluating models for this task is the [Corpus of Linguistic Acceptability (CoLA)](https://huggingface.co/datasets/nyu-mll/glue), which consists of a collection of texts along with their corresponding labels.
-If you want to use a grammatical correctness model, you can find them on the :hugs: Hugging Face model hub. Look for models with `cola`.
+If you want to use a grammatical correctness model, you can find them on the Hugging Face. Look for models with "cola" in their name, for example:
+
+{% tabs %}
+{% tab title="SQL" %}
```sql
SELECT pgml.transform(
+ task => '{
+ "task": "text-classification",
+ "model": "textattack/distilbert-base-uncased-CoLA"
+ }'::JSONB,
inputs => ARRAY[
'I will walk to home when I went through the bus.'
- ],
- task => '{"task": "text-classification",
- "model": "textattack/distilbert-base-uncased-CoLA"
- }'::JSONB
+ ]
) AS grammatical_correctness;
```
-_Result_
+{% endtab %}
+{% tab title="Result" %}
```json
[
{"label": "LABEL_1", "score": 0.9576480388641356}
]
```
+
+{% endtab %}
+{% endtabs %}
diff --git a/pgml-dashboard/package-lock.json b/pgml-dashboard/package-lock.json
index b4b3d4667..d90ae9e7f 100644
--- a/pgml-dashboard/package-lock.json
+++ b/pgml-dashboard/package-lock.json
@@ -15,7 +15,7 @@
"codemirror": "^6.0.1",
"dompurify": "^3.0.6",
"marked": "^9.1.0",
- "postgresml-lang-sql": "^6.6.3-4"
+ "postgresml-lang-sql": "^6.6.3-5"
}
},
"node_modules/@codemirror/autocomplete": {
@@ -237,9 +237,9 @@
}
},
"node_modules/postgresml-lang-sql": {
- "version": "6.6.3-4",
- "resolved": "https://registry.npmjs.org/postgresml-lang-sql/-/postgresml-lang-sql-6.6.3-4.tgz",
- "integrity": "sha512-ybmwlgRVXP5eEjXJ37aoiG+sa2mOO6deVsRkyfPFnAq4JVCtUOuGwvoNtwEJerx54wSF020lgaSjWhbaWEDXpA==",
+ "version": "6.6.3-5",
+ "resolved": "https://registry.npmjs.org/postgresml-lang-sql/-/postgresml-lang-sql-6.6.3-5.tgz",
+ "integrity": "sha512-S90WPsqfmau/Z2HPgLh0tGP07w9HLYighBGjtngNwa0K88ZHBAa8YY2qE83DwBLHVXCEJt7INI28MM9qE5CH0g==",
"dependencies": {
"@codemirror/autocomplete": "^6.0.0",
"@codemirror/language": "^6.0.0",
diff --git a/pgml-dashboard/package.json b/pgml-dashboard/package.json
index ad1f2a0e5..bc2860eaa 100644
--- a/pgml-dashboard/package.json
+++ b/pgml-dashboard/package.json
@@ -3,7 +3,7 @@
"@codemirror/lang-javascript": "^6.2.1",
"@codemirror/lang-python": "^6.1.3",
"@codemirror/lang-rust": "^6.0.1",
- "postgresml-lang-sql": "^6.6.3-4",
+ "postgresml-lang-sql": "^6.6.3-5",
"@codemirror/lang-json": "^6.0.1",
"@codemirror/state": "^6.2.1",
"@codemirror/view": "^6.21.0",
diff --git a/pgml-dashboard/static/css/scss/pages/_docs.scss b/pgml-dashboard/static/css/scss/pages/_docs.scss
index 7741ce643..2bf785658 100644
--- a/pgml-dashboard/static/css/scss/pages/_docs.scss
+++ b/pgml-dashboard/static/css/scss/pages/_docs.scss
@@ -113,7 +113,7 @@
}
}
- code {
+ code, .code-multi-line {
@extend .rounded-1;
color: #{$gray-100};
@@ -121,6 +121,7 @@
border: 1px solid #{$slate-tint-1000};
padding: 2px;
white-space: nowrap;
+ font-size: 0.875em;
}
img {
pFad - Phonifier reborn
Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy