Skip to content

Commit b6d6cb3

Browse files
authored
Added Rust and C docs for Client SDKs (#1506)
1 parent fb2426f commit b6d6cb3

File tree

9 files changed

+1030
-31
lines changed

9 files changed

+1030
-31
lines changed

pgml-cms/docs/api/client-sdk/README.md

Lines changed: 153 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,39 @@ The client SDK can be installed using standard package managers for JavaScript,
1212
Installing the SDK into your project is as simple as:
1313

1414
{% tabs %}
15-
{% tab title="JavaScript " %}
15+
{% tab title="JavaScript" %}
1616
```bash
1717
npm i pgml
1818
```
1919
{% endtab %}
2020

21-
{% tab title="Python " %}
21+
{% tab title="Python" %}
2222
```bash
2323
pip install pgml
2424
```
2525
{% endtab %}
26+
27+
{% tab title="Rust" %}
28+
```bash
29+
cargo add pgml
30+
```
31+
{% endtab %}
32+
33+
{% tab title="C" %}
34+
35+
First clone the `postgresml` repository and navigate to the `pgml-sdks/pgml/c` directory:
36+
```bash
37+
git clone https://github.com/postgresml/postgresml
38+
cd postgresml/pgml-sdks/pgml/c
39+
```
40+
41+
Then build the bindings
42+
```bash
43+
make bindings
44+
```
45+
46+
This will generate the `pgml.h` file and a `.so` on linux and `.dyblib` on MacOS.
47+
{% endtab %}
2648
{% endtabs %}
2749

2850
## Getting started
@@ -41,10 +63,10 @@ export PGML_DATABASE_URL=postgres://user:password@sql.cloud.postgresml.org:6432/
4163

4264
### Create a collection
4365

44-
The SDK is written in asynchronous code, so you need to run it inside an async runtime. Both Python and JavaScript support async functions natively.
66+
The SDK is written in asynchronous code, so you need to run it inside an async runtime. Both Python, JavaScript and Rust support async functions natively.
4567

4668
{% tabs %}
47-
{% tab title="JavaScript " %}
69+
{% tab title="JavaScript" %}
4870
```javascript
4971
const pgml = require("pgml");
5072

@@ -63,6 +85,29 @@ async def main():
6385
collection = Collection("sample_collection")
6486
```
6587
{% endtab %}
88+
89+
{% tab title="Rust" %}
90+
```rust
91+
use pgml::{Collection, Pipeline};
92+
use anyhow::Error;
93+
94+
#[tokio::main]
95+
async fn main() -> Result<(), Error> {
96+
let mut collection = Collection::new("sample_collection", None)?;
97+
}
98+
```
99+
{% endtab %}
100+
101+
{% tab title="C" %}
102+
```c
103+
#include <stdio.h>
104+
#include "pgml.h"
105+
106+
int main() {
107+
CollectionC * collection = pgml_collectionc_new("sample_collection", NULL);
108+
}
109+
```
110+
{% endtab %}
66111
{% endtabs %}
67112

68113
The above example imports the `pgml` module and creates a collection object. By itself, the collection only tracks document contents and identifiers, but once we add a pipeline, we can instruct the SDK to perform additional tasks when documents and are inserted and retrieved.
@@ -93,7 +138,7 @@ await collection.add_pipeline(pipeline);
93138
```python
94139
# Add this code to the end of the main function from the above example.
95140
pipeline = Pipeline(
96-
"test_pipeline",
141+
"sample_pipeline",
97142
{
98143
"text": {
99144
"splitter": { "model": "recursive_character" },
@@ -107,6 +152,37 @@ pipeline = Pipeline(
107152
await collection.add_pipeline(pipeline)
108153
```
109154
{% endtab %}
155+
156+
{% tab title="Rust" %}
157+
```rust
158+
// Add this code to the end of the main function from the above example.
159+
let mut pipeline = Pipeline::new(
160+
"sample_pipeline",
161+
Some(
162+
serde_json::json!({
163+
"text": {
164+
"splitter": { "model": "recursive_character" },
165+
"semantic_search": {
166+
"model": "Alibaba-NLP/gte-base-en-v1.5",
167+
},
168+
},
169+
})
170+
.into(),
171+
),
172+
)?;
173+
174+
collection.add_pipeline(&mut pipeline).await?;
175+
```
176+
{% endtab %}
177+
178+
{% tab title="C" %}
179+
```c
180+
// Add this code to the end of the main function from the above example.
181+
PipelineC * pipeline = pgml_pipelinec_new("sample_pipeline", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"Alibaba-NLP/gte-base-en-v1.5\"}}}");
182+
183+
pgml_collectionc_add_pipeline(collection, pipeline);
184+
```
185+
{% endtab %}
110186
{% endtabs %}
111187
112188
The pipeline configuration is a key/value object, where the key is the name of a column in a document, and the value is the action the SDK should perform on that column.
@@ -153,9 +229,36 @@ documents = [
153229
await collection.upsert_documents(documents)
154230
```
155231
{% endtab %}
156-
{% endtabs %}
157232

158-
If the same document `id` is used, the SDK computes the difference between existing and new documents and only updates the chunks that have changed.
233+
{% tab title="Rust" %}
234+
```rust
235+
// Add this code to the end of the main function in the above example.
236+
let documents = vec![
237+
serde_json::json!({
238+
"id": "Document One",
239+
"text": "document one contents...",
240+
})
241+
.into(),
242+
serde_json::json!({
243+
"id": "Document Two",
244+
"text": "document two contents...",
245+
})
246+
.into(),
247+
];
248+
249+
collection.upsert_documents(documents, None).await?;
250+
```
251+
{% endtab %}
252+
253+
{% tab title="C" %}
254+
```c
255+
// Add this code to the end of the main function in the above example.
256+
char * documents_to_upsert[2] = {"{\"id\": \"Document One\", \"text\": \"document one contents...\"}", "{\"id\": \"Document Two\", \"text\": \"document two contents...\"}"};
257+
258+
pgml_collectionc_upsert_documents(collection, documents_to_upsert, 2, NULL);
259+
```
260+
{% endtab %}
261+
{% endtabs %}
159262
160263
### Search documents
161264
@@ -203,6 +306,47 @@ results = await collection.vector_search(
203306
print(results)
204307
```
205308
{% endtab %}
309+
310+
{% tab title="Rust" %}
311+
```rust
312+
// Add this code to the end of the main function in the above example.
313+
let results = collection
314+
.vector_search(
315+
serde_json::json!({
316+
"query": {
317+
"fields": {
318+
"text": {
319+
"query": "Something about a document...",
320+
},
321+
},
322+
},
323+
"limit": 2,
324+
})
325+
.into(),
326+
&mut pipeline,
327+
)
328+
.await?;
329+
330+
println!("{:?}", results);
331+
332+
Ok(())
333+
```
334+
{% endtab %}
335+
336+
{% tab title="C" %}
337+
```c
338+
// Add this code to the end of the main function in the above example.
339+
r_size = 0;
340+
char** results = pgml_collectionc_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Something about a document...\"}}}, \"limit\": 2}", pipeline, &r_size);
341+
printf("\n\nPrinting results:\n");
342+
for (i = 0; i < r_size; ++i) {
343+
printf("Result %u -> %s\n", i, results[i]);
344+
}
345+
346+
pgml_pipelinec_delete(pipeline);
347+
pgml_collectionc_delete(collection);
348+
```
349+
{% endtab %}
206350
{% endtabs %}
207351
208352
We are using built-in vector search, powered by embeddings and the PostgresML [pgml.embed()](../sql-extension/pgml.embed) function, which embeds the `query` argument, compares it to the embeddings stored in the database, and returns the top two results, ranked by cosine similarity.
@@ -228,6 +372,8 @@ if __name__ == "__main__":
228372
{% endtab %}
229373
{% endtabs %}
230374

375+
Note that `Rust` and `C` example do not require any additional code to run correctly.
376+
231377
Once you run the example, you should see something like this in the terminal:
232378

233379
```bash

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy