Skip to content

Commit e2f2668

Browse files
committed
Update find and Rerank, start reworking the tests
1 parent cd00bbb commit e2f2668

File tree

105 files changed

+2090
-1273
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+2090
-1273
lines changed

TEST.MD

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
2+
## Run against Local HCD Instance
3+
(_to run locally your must have the data api running locally on op of HCD or DSE_)
4+
5+
```bash
6+
mvn clean test -Ptest_local
7+
```
8+
9+
> **Why it works ?**
10+
>
11+
> - In `pom.xml` the environment variable `ASTRA_DB_JAVA_TEST_ENV` is set to `local`
12+
> - Tests are annotated with
13+
>
14+
> ```java
15+
> @EnabledIfSystemProperty(named = "ASTRA_DB_JAVA_TEST_ENV", matches = "local")
16+
> @DisabledIfSystemProperty(named = "ASTRA_DB_JAVA_TEST_ENV", matches = "(?!local)")
17+
> public class MyTest {
18+
> }
19+
> ```
20+
21+
- To run with IDE make sure to set the environment variable `ASTRA_DB_JAVA_TEST_ENV` to `local` in your Run configuration
22+
23+
24+
## Run against Astra DEV
25+
26+
- `ASTRA_DB_APPLICATION_TOKEN_DEV` should be defined in your environment variables, it is not in the different POM for privacy reasons.
27+
28+
- To run with Maven (AWS,`eu-west-2`)
29+
30+
```bash
31+
mvn clean test -Ptest_astra_dev
32+
```
33+
34+
- To run with Maven on One particular region
35+
36+
```java
37+
mvn clean test -Ptest_astra_dev \
38+
-DASTRA_CLOUD_PROVIDER_DEV=GCP \
39+
-DASTRA_CLOUD_REGION_DEV=us-central-1
40+
```
41+
42+
- To run overriding also the token
43+
44+
```java
45+
mvn clean test \
46+
-Ptest_astra_dev \
47+
-DASTRA_CLOUD_PROVIDER_DEV=GCP \
48+
-DASTRA_CLOUD_REGION_DEV=us-central-1 \
49+
-ASTRA_DB_APPLICATION_TOKEN_DEV=AstraCS:... \
50+
-Dtest="com.datastax.astra.test.integration.*.*Test"
51+
```
52+

astra-db-java-tools/src/test/java/com/datastax/astra/samples/CsvPhilosophers.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ public static void main(String[] args) throws Exception {
3030
CsvLoader.load(csvFilename, collection, new CsvRowMapper() {
3131
@Override
3232
public Document map(Document csvRow) {
33+
3334
// Tags should be an Array
3435
csvRow.vectorize(csvRow.getString("quote"));
3536
csvRow.append("tags",csvRow.getString("tags").split(";"));

astra-db-java/pom.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,15 @@
8686
<artifactId>awaitility</artifactId>
8787
<scope>test</scope>
8888
</dependency>
89+
90+
<!-- Embedding model in memory for testing -->
91+
<dependency>
92+
<groupId>dev.langchain4j</groupId>
93+
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
94+
<version>1.0.0-beta2</version>
95+
<scope>test</scope>
96+
</dependency>
97+
8998
</dependencies>
9099

91100
<build>

astra-db-java/src/main/java/com/datastax/astra/client/admin/AstraDBDatabaseAdmin.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.datastax.astra.client.core.options.DataAPIClientOptions;
2626
import com.datastax.astra.client.databases.commands.results.FindEmbeddingProvidersResult;
2727
import com.datastax.astra.client.databases.DatabaseOptions;
28+
import com.datastax.astra.client.databases.commands.results.FindRerankingProvidersResult;
2829
import com.datastax.astra.internal.api.AstraApiEndpoint;
2930
import com.datastax.astra.internal.command.AbstractCommandRunner;
3031
import com.datastax.astra.internal.utils.Assert;
@@ -192,6 +193,14 @@ public FindEmbeddingProvidersResult findEmbeddingProviders() {
192193
return new FindEmbeddingProvidersResult(admin.findEmbeddingProviders().getEmbeddingProviders());
193194
}
194195

196+
/** {@inheritDoc} */
197+
@Override
198+
public FindRerankingProvidersResult findRerankingProviders() {
199+
log.debug("findRerankingProviders");
200+
DataAPIDatabaseAdmin admin = new DataAPIDatabaseAdmin(db, this.options);
201+
return new FindRerankingProvidersResult(admin.findRerankingProviders().getRerankingProviders());
202+
}
203+
195204
/** {@inheritDoc} */
196205
@Override
197206
public void createKeyspace(String keyspace, boolean updateDBKeyspace) {

astra-db-java/src/main/java/com/datastax/astra/client/admin/DataAPIDatabaseAdmin.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@
2323
import com.datastax.astra.client.core.options.BaseOptions;
2424
import com.datastax.astra.client.core.commands.Command;
2525
import com.datastax.astra.client.core.commands.CommandType;
26+
import com.datastax.astra.client.core.rerank.RerankProvider;
2627
import com.datastax.astra.client.databases.commands.results.FindEmbeddingProvidersResult;
2728
import com.datastax.astra.client.core.vectorize.EmbeddingProvider;
2829
import com.datastax.astra.client.databases.Database;
30+
import com.datastax.astra.client.databases.commands.results.FindRerankingProvidersResult;
2931
import com.datastax.astra.client.databases.definition.keyspaces.KeyspaceOptions;
3032
import com.datastax.astra.internal.api.DataAPIResponse;
3133
import com.datastax.astra.internal.command.AbstractCommandRunner;
@@ -109,6 +111,14 @@ public FindEmbeddingProvidersResult findEmbeddingProviders() {
109111
EmbeddingProvider.class));
110112
}
111113

114+
@Override
115+
public FindRerankingProvidersResult findRerankingProviders() {
116+
DataAPIResponse res = runCommand(Command.create("findRerankingProviders"));
117+
return new FindRerankingProvidersResult(
118+
res.getStatusKeyAsMap("rerankingProviders",
119+
RerankProvider.class));
120+
}
121+
112122
/** {@inheritDoc} */
113123
@Override
114124
public Database getDatabase() {

astra-db-java/src/main/java/com/datastax/astra/client/admin/DatabaseAdmin.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@
2121
*/
2222

2323
import com.datastax.astra.client.core.options.BaseOptions;
24+
import com.datastax.astra.client.core.rerank.RerankProvider;
2425
import com.datastax.astra.client.databases.Database;
2526
import com.datastax.astra.client.core.commands.CommandRunner;
2627
import com.datastax.astra.client.core.vectorize.EmbeddingProvider;
2728
import com.datastax.astra.client.databases.commands.results.FindEmbeddingProvidersResult;
29+
import com.datastax.astra.client.databases.commands.results.FindRerankingProvidersResult;
2830
import com.datastax.astra.internal.utils.Assert;
2931

3032
import java.util.Set;
@@ -81,6 +83,24 @@ public interface DatabaseAdmin {
8183
*/
8284
FindEmbeddingProvidersResult findEmbeddingProviders();
8385

86+
/**
87+
* Retrieve the list of reranking providers available in the current database. Reranking providers are services
88+
* that sort a list of record based on a algorithm (eg bm25) . This method returns a map of provider names to
89+
* {@link RerankProvider} instances, allowing applications
90+
* to access and utilize the reranking services.
91+
*
92+
* <p>Example usage:</p>
93+
* <pre>
94+
* {@code
95+
* // Assuming 'client' is an instance of DataApiClient
96+
* Map<String, EmbeddingProvider> providers = client.findEmbeddingProvidersAsMap());
97+
* }
98+
* </pre>
99+
* @return
100+
* list of available providers
101+
*/
102+
FindRerankingProvidersResult findRerankingProviders();
103+
84104
/**
85105
* Asynchronously retrieves a stream of keyspaces names available in the current database. This method facilitates
86106
* non-blocking operations by allowing the application to continue executing other tasks while the list of keyspace

astra-db-java/src/main/java/com/datastax/astra/client/collections/Collection.java

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
import com.datastax.astra.client.core.query.Filter;
6060
import com.datastax.astra.client.core.query.Filters;
6161
import com.datastax.astra.client.core.query.Projection;
62-
import com.datastax.astra.client.core.reranking.RerankResult;
62+
import com.datastax.astra.client.core.rerank.RerankResult;
6363
import com.datastax.astra.client.core.vector.DataAPIVector;
6464
import com.datastax.astra.client.databases.Database;
6565
import com.datastax.astra.client.exceptions.DataAPIException;
@@ -73,6 +73,7 @@
7373
import com.datastax.astra.internal.serdes.collections.DocumentSerializer;
7474
import com.datastax.astra.internal.serdes.tables.RowMapper;
7575
import com.datastax.astra.internal.utils.Assert;
76+
import com.datastax.astra.internal.utils.BetaPreview;
7677
import com.datastax.astra.internal.utils.EscapeUtils;
7778
import lombok.Getter;
7879
import lombok.extern.slf4j.Slf4j;
@@ -1082,31 +1083,32 @@ public Page<T> findPage(Filter filter, CollectionFindOptions options) {
10821083
* @return
10831084
* the find iterable interface
10841085
*/
1086+
@BetaPreview
10851087
public CollectionFindAndRerankCursor<T,T> findAndRerank(Filter filter, CollectionFindAndRerankOptions options) {
10861088
return findAndRerank(filter, options, getDocumentClass());
10871089
}
10881090

1091+
@BetaPreview
10891092
public <R> CollectionFindAndRerankCursor<T, R> findAndRerank(Filter filter, CollectionFindAndRerankOptions options, Class<R> newRowType) {
10901093
return new CollectionFindAndRerankCursor<>(this, filter, options, newRowType);
10911094
}
10921095

1096+
@BetaPreview
10931097
public <R> Page<RerankResult<R>> findAndRerankPage(Filter filter, CollectionFindAndRerankOptions options, Class<R> newRowType) {
10941098
Command findAndRerankCommand = Command
10951099
.create("findAndRerank")
10961100
.withFilter(filter);
10971101
if (options != null) {
10981102
findAndRerankCommand
1099-
.withSort(options.getSortArray())
1100-
.withProjection(options.getProjectionArray())
1101-
.withOptions(new Document()
1102-
.appendIfNotNull("rerankOn", options.rerankOn())
1103-
.appendIfNotNull("limit", options.limit())
1104-
.appendIfNotNull("hybridProjection", options.hybridProjection().getValue())
1105-
.appendIfNotNull("hybridLimits", options.hybridLimits())
1106-
.appendIfNotNull(INPUT_INCLUDE_SORT_VECTOR, options.includeSortVector())
1107-
.appendIfNotNull(INPUT_INCLUDE_SIMILARITY, options.includeSimilarity())
1108-
)
1109-
;
1103+
.withSort(options.getSortArray())
1104+
.withProjection(options.getProjectionArray())
1105+
.withOptions(new Document()
1106+
.appendIfNotNull("rerankOn", options.rerankOn())
1107+
.appendIfNotNull("limit", options.limit())
1108+
.appendIfNotNull("hybridLimits", options.hybridLimits())
1109+
.appendIfNotNull(INPUT_INCLUDE_SORT_VECTOR, options.includeSortVector())
1110+
.appendIfNotNull(INPUT_INCLUDE_SCORES, options.includeScores())
1111+
.appendIfNotNull(INPUT_INCLUDE_SIMILARITY, options.includeSimilarity()));
11101112
}
11111113

11121114
// Responses MOCK for now
@@ -1139,10 +1141,6 @@ public <R> Page<RerankResult<R>> findAndRerankPage(Filter filter, CollectionFind
11391141
DocumentSerializer serializer = new DocumentSerializer();
11401142
R results1 = serializer.convertValue(document, newRowType);
11411143

1142-
// MAP WITH ROW FUNCTION
1143-
Row row = RowMapper.mapAsRow(document);
1144-
R result = RowMapper.mapFromRow(row, getSerializer(), newRowType);
1145-
11461144
// Getting associated document response
11471145
Document documentResponse = documentResponses.get(i);
11481146
Map<String, Double> scores = documentResponse.getMap("scores", String.class, Double.class);
@@ -1199,12 +1197,7 @@ public <R> Page<R> findPage(Filter filter, CollectionFindOptions options, Class<
11991197
return new Page<>(
12001198
apiResponse.getData().getNextPageState(),
12011199
apiResponse.getData().getDocuments().stream()
1202-
.map(d -> {
1203-
Row row = RowMapper.mapAsRow(d);
1204-
return mapFromRow(row, getSerializer(), newRowType);
1205-
})
1206-
// .map(d -> d.map(newRowType))
1207-
//.map(d -> RowMapper.mapFromRow(d, getSerializer(), newRowType))
1200+
.map(d -> d.map(newRowType))
12081201
.collect(Collectors.toList()), sortVector);
12091202
}
12101203

astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/cursor/CollectionFindAndRerankCursor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import com.datastax.astra.client.core.query.Filter;
2727
import com.datastax.astra.client.core.query.Projection;
2828
import com.datastax.astra.client.core.query.Sort;
29-
import com.datastax.astra.client.core.reranking.RerankResult;
29+
import com.datastax.astra.client.core.rerank.RerankResult;
3030
import com.datastax.astra.internal.command.AbstractCursor;
3131
import lombok.Getter;
3232

astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/cursor/CollectionFindCursor.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import com.datastax.astra.client.collections.Collection;
2424
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
25+
import com.datastax.astra.client.collections.definition.documents.Document;
2526
import com.datastax.astra.client.core.paging.CursorState;
2627
import com.datastax.astra.client.core.query.Filter;
2728
import com.datastax.astra.client.core.query.Projection;

astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/options/CollectionFindAndRerankOptions.java

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import com.datastax.astra.client.core.commands.CommandType;
2424
import com.datastax.astra.client.core.hybrid.Hybrid;
2525
import com.datastax.astra.client.core.hybrid.HybridLimits;
26-
import com.datastax.astra.client.core.hybrid.HybridProjection;
2726
import com.datastax.astra.client.core.options.BaseOptions;
2827
import com.datastax.astra.client.core.query.Projection;
2928
import com.datastax.astra.client.core.query.Sort;
@@ -70,7 +69,7 @@ public class CollectionFindAndRerankOptions extends BaseOptions<CollectionFindAn
7069
/**
7170
* Options for hybrid projection
7271
*/
73-
HybridProjection hybridProjection;
72+
Boolean includeScores;
7473

7574
/**
7675
* Flag to include sortVector in the result when operating a semantic search.
@@ -187,15 +186,5 @@ public CollectionFindAndRerankOptions rerankOn(String rerankOn) {
187186
return this;
188187
}
189188

190-
/**
191-
* Add a hybridProjection clause in the find block
192-
*
193-
* @param hybridProjection value for hybridProjection options
194-
* @return current command
195-
*/
196-
public CollectionFindAndRerankOptions hybridProjection(HybridProjection hybridProjection) {
197-
this.hybridProjection = hybridProjection;
198-
return this;
199-
}
200189

201190
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy