Skip to content

Commit e9d50af

Browse files
authored
feat(misc): embeddings script and list sources in ai response (#18455)
1 parent 0c0e61e commit e9d50af

File tree

15 files changed

+1185
-319
lines changed

15 files changed

+1185
-319
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: Generate embeddings
2+
3+
on:
4+
schedule:
5+
- cron: "0 5 * * 0,4" # sunday, thursday 5AM
6+
workflow_dispatch:
7+
jobs:
8+
cache-and-install:
9+
if: github.repository == 'nrwl/nx'
10+
runs-on: ubuntu-latest
11+
strategy:
12+
matrix:
13+
node-version: [18]
14+
15+
steps:
16+
- name: Checkout
17+
uses: actions/checkout@v3
18+
19+
- name: Install Node.js
20+
uses: actions/setup-node@v3
21+
with:
22+
node-version: 18
23+
24+
- name: Install pnpm
25+
uses: pnpm/action-setup@v2
26+
id: pnpm-install
27+
with:
28+
version: 7
29+
run_install: false
30+
31+
- name: Get pnpm store directory
32+
id: pnpm-cache
33+
shell: bash
34+
run: |
35+
echo "STORE_PATH=$(pnpm store path)" >> $GITHUB_OUTPUT
36+
37+
- name: Setup pnpm cache
38+
uses: actions/cache@v3
39+
with:
40+
path: ${{ steps.pnpm-cache.outputs.STORE_PATH }}
41+
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
42+
restore-keys: |
43+
${{ runner.os }}-pnpm-store-
44+
45+
- name: Install dependencies
46+
run: pnpm install --no-frozen-lockfile
47+
48+
- name: Run embeddings script
49+
run: pnpm exec nx run tools-documentation-create-embeddings:run-node
50+
env:
51+
NX_NEXT_PUBLIC_SUPABASE_URL: ${{ secrets.NX_NEXT_PUBLIC_SUPABASE_URL }}
52+
NX_SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.NX_SUPABASE_SERVICE_ROLE_KEY }}
53+
NX_OPENAI_KEY: ${{ secrets.NX_OPENAI_KEY }}

nx-dev/data-access-ai/src/lib/data-access-ai.ts

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,13 @@ import {
1111
ChatCompletionRequestMessageRoleEnum,
1212
CreateCompletionResponseUsage,
1313
} from 'openai';
14-
import { getMessageFromResponse, sanitizeLinksInResponse } from './utils';
14+
import {
15+
PageSection,
16+
getListOfSources,
17+
getMessageFromResponse,
18+
sanitizeLinksInResponse,
19+
toMarkdownList,
20+
} from './utils';
1521

1622
const openAiKey = process.env['NX_OPENAI_KEY'];
1723
const supabaseUrl = process.env['NX_NEXT_PUBLIC_SUPABASE_URL'];
@@ -21,9 +27,12 @@ const config = new Configuration({
2127
});
2228
const openai = new OpenAIApi(config);
2329

24-
export async function nxDevDataAccessAi(
25-
query: string
26-
): Promise<{ textResponse: string; usage?: CreateCompletionResponseUsage }> {
30+
export async function nxDevDataAccessAi(query: string): Promise<{
31+
textResponse: string;
32+
usage?: CreateCompletionResponseUsage;
33+
sources: { heading: string; url: string }[];
34+
sourcesMarkdown: string;
35+
}> {
2736
try {
2837
if (!openAiKey) {
2938
throw new ApplicationError('Missing environment variable NX_OPENAI_KEY');
@@ -80,11 +89,11 @@ export async function nxDevDataAccessAi(
8089
}: CreateEmbeddingResponse = embeddingResponse.data;
8190

8291
const { error: matchError, data: pageSections } = await supabaseClient.rpc(
83-
'match_page_sections',
92+
'match_page_sections_2',
8493
{
8594
embedding,
8695
match_threshold: 0.78,
87-
match_count: 10,
96+
match_count: 15,
8897
min_content_length: 50,
8998
}
9099
);
@@ -97,13 +106,13 @@ export async function nxDevDataAccessAi(
97106
let tokenCount = 0;
98107
let contextText = '';
99108

100-
for (let i = 0; i < pageSections.length; i++) {
101-
const pageSection = pageSections[i];
109+
for (let i = 0; i < (pageSections as PageSection[]).length; i++) {
110+
const pageSection: PageSection = pageSections[i];
102111
const content = pageSection.content;
103112
const encoded = tokenizer.encode(content);
104113
tokenCount += encoded.text.length;
105114

106-
if (tokenCount >= 1500) {
115+
if (tokenCount >= 2500) {
107116
break;
108117
}
109118

@@ -163,9 +172,13 @@ export async function nxDevDataAccessAi(
163172

164173
const responseWithoutBadLinks = await sanitizeLinksInResponse(message);
165174

175+
const sources = getListOfSources(pageSections);
176+
166177
return {
167178
textResponse: responseWithoutBadLinks,
168179
usage: response.data.usage,
180+
sources,
181+
sourcesMarkdown: toMarkdownList(sources),
169182
};
170183
} catch (err: unknown) {
171184
if (err instanceof UserError) {

nx-dev/data-access-ai/src/lib/utils.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
11
import { CreateChatCompletionResponse } from 'openai';
2+
export interface PageSection {
3+
id: number;
4+
page_id: number;
5+
content: string;
6+
heading: string;
7+
similarity: number;
8+
slug: string;
9+
url_partial: string | null;
10+
}
211

312
export function getMessageFromResponse(
413
response: CreateChatCompletionResponse
@@ -11,6 +20,34 @@ export function getMessageFromResponse(
1120
return response.choices[0].message?.content ?? '';
1221
}
1322

23+
export function getListOfSources(
24+
pageSections: PageSection[]
25+
): { heading: string; url: string }[] {
26+
const uniqueUrlPartials = new Set<string | null>();
27+
const result = pageSections
28+
.filter((section) => {
29+
if (section.url_partial && !uniqueUrlPartials.has(section.url_partial)) {
30+
uniqueUrlPartials.add(section.url_partial);
31+
return true;
32+
}
33+
return false;
34+
})
35+
.map((section) => ({
36+
heading: section.heading,
37+
url: `https://nx.dev${section.url_partial}`,
38+
}));
39+
40+
return result;
41+
}
42+
43+
export function toMarkdownList(
44+
sections: { heading: string; url: string }[]
45+
): string {
46+
return sections
47+
.map((section) => `- [${section.heading}](${section.url})`)
48+
.join('\n');
49+
}
50+
1451
export async function sanitizeLinksInResponse(
1552
response: string
1653
): Promise<string> {

nx-dev/feature-ai/src/lib/feature-ai.tsx

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export function FeatureAi(): JSX.Element {
1111
const [query, setSearchTerm] = useState('');
1212
const [loading, setLoading] = useState(false);
1313
const [feedbackSent, setFeedbackSent] = useState<boolean>(false);
14+
const [sources, setSources] = useState('');
1415

1516
const warning = `
1617
{% callout type="warning" title="Always double check!" %}
@@ -23,19 +24,33 @@ export function FeatureAi(): JSX.Element {
2324
setLoading(true);
2425
let completeText = '';
2526
let usage;
27+
let sourcesMarkdown = '';
2628
try {
2729
const aiResponse = await nxDevDataAccessAi(query);
2830
completeText = aiResponse.textResponse;
2931
usage = aiResponse.usage;
32+
setSources(
33+
JSON.stringify(aiResponse.sources?.map((source) => source.url))
34+
);
35+
sourcesMarkdown = aiResponse.sourcesMarkdown;
3036
setLoading(false);
3137
} catch (error) {
3238
setError(error as any);
3339
setLoading(false);
3440
}
35-
sendCustomEvent('ai_query', 'ai', 'query', undefined, { query, ...usage });
41+
sendCustomEvent('ai_query', 'ai', 'query', undefined, {
42+
query,
43+
...usage,
44+
});
3645
setFeedbackSent(false);
46+
47+
const sourcesMd = `
48+
{% callout type="info" title="Sources" %}
49+
${sourcesMarkdown}
50+
{% /callout %}`;
51+
3752
setFinalResult(
38-
renderMarkdown(warning + completeText, { filePath: '' }).node
53+
renderMarkdown(warning + completeText + sourcesMd, { filePath: '' }).node
3954
);
4055
};
4156

@@ -44,6 +59,7 @@ export function FeatureAi(): JSX.Element {
4459
sendCustomEvent('ai_feedback', 'ai', type, undefined, {
4560
query,
4661
result: finalResult,
62+
sources,
4763
});
4864
setFeedbackSent(true);
4965
} catch (error) {

package.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@
175175
"flat": "^5.0.2",
176176
"fork-ts-checker-webpack-plugin": "7.2.13",
177177
"fs-extra": "^11.1.0",
178+
"github-slugger": "^2.0.0",
178179
"gpt3-tokenizer": "^1.1.5",
179180
"html-webpack-plugin": "5.5.0",
180181
"http-server": "14.1.0",
@@ -191,6 +192,7 @@
191192
"jest": "29.4.3",
192193
"jest-config": "^29.4.1",
193194
"jest-environment-jsdom": "29.4.3",
195+
"jest-environment-node": "^29.4.1",
194196
"jest-resolve": "^29.4.1",
195197
"jest-util": "^29.4.1",
196198
"js-tokens": "^4.0.0",
@@ -206,6 +208,9 @@
206208
"loader-utils": "2.0.3",
207209
"magic-string": "~0.30.2",
208210
"markdown-factory": "^0.0.6",
211+
"mdast-util-from-markdown": "^1.3.1",
212+
"mdast-util-to-markdown": "^1.5.0",
213+
"mdast-util-to-string": "^3.2.0",
209214
"memfs": "^3.0.1",
210215
"metro-config": "0.76.7",
211216
"metro-resolver": "0.76.7",
@@ -267,6 +272,7 @@
267272
"typedoc": "0.24.8",
268273
"typedoc-plugin-markdown": "3.15.3",
269274
"typescript": "~5.1.3",
275+
"unist-builder": "^4.0.0",
270276
"unzipper": "^0.10.11",
271277
"url-loader": "^4.1.1",
272278
"use-sync-external-store": "^1.2.0",
@@ -359,4 +365,3 @@
359365
}
360366
}
361367
}
362-

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy