Skip to content
This repository was archived by the owner on Jan 27, 2025. It is now read-only.

Commit 56affdc

Browse files
author
Mark Shields
committed
feat(glean-backend): working indexer
1 parent c23233e commit 56affdc

21 files changed

Lines changed: 905 additions & 696 deletions

plugins/glean-backend/README.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
# Glean Backend
22

3-
> NOTE: This is currently not in use anymore. However, it could be re-purposed
4-
> to index other parts of Backstage (e.g. catalog entities, tools, etc.)
5-
63
Welcome to the Glean backend plugin!
74

85
This backend plugin is used to make our Backstage content available in

plugins/glean-backend/config.d.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,30 @@
1+
import { SchedulerServiceTaskScheduleDefinitionConfig } from '@backstage/backend-plugin-api';
2+
13
export interface Config {
24
/**
35
* Glean plugin configuration.
46
*/
57
glean?: {
68
/**
7-
* The base url of the Glean API
9+
* The index url of the Glean API
10+
*/
11+
apiIndexUrl: string;
12+
13+
/**
14+
* The data source of the Glean API to use
15+
* See: https://support.glean.com/hc/en-us/articles/30038992119451-Data-Sources
816
*/
9-
apiBaseUrl: string;
17+
datasource: string;
1018

1119
/**
1220
* The api token
1321
* @visibility secret
1422
*/
1523
token: string;
24+
25+
/**
26+
* The Scheduler for how often to run Glean indexing
27+
*/
28+
schedule?: SchedulerServiceTaskScheduleDefinitionConfig;
1629
};
1730
}

plugins/glean-backend/dev/index.js

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// This package should be installed as a `dev` dependency
2+
import { createBackend } from '@backstage/backend-defaults';
3+
4+
const backend = createBackend();
5+
// Path to the file where the plugin is export as default
6+
backend.add(import('../src'));
7+
backend.start();

plugins/glean-backend/package.json

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,16 @@
2323
"postpack": "backstage-cli package postpack"
2424
},
2525
"dependencies": {
26-
"@backstage/backend-common": "^0.21.7",
27-
"@backstage/catalog-model": "^1.4.5",
28-
"@backstage/config": "^1.2.0",
29-
"@backstage/core-plugin-api": "^1.9.2",
30-
"@backstage/plugin-techdocs-backend": "^1.10.13",
26+
"@backstage/backend-common": "^0.25.0",
27+
"@backstage/backend-plugin-api": "^1.0.2",
28+
"@backstage/catalog-client": "^1.8.0",
29+
"@backstage/catalog-model": "^1.7.1",
30+
"@backstage/config": "^1.3.0",
31+
"@backstage/core-plugin-api": "^1.10.1",
32+
"@backstage/errors": "^1.2.5",
33+
"@backstage/plugin-catalog-node": "^1.14.0",
34+
"@backstage/plugin-techdocs": "^1.11.2",
35+
"@backstage/plugin-techdocs-backend": "^1.11.3",
3136
"@types/express": "*",
3237
"@types/supertest": "^6.0.2",
3338
"@types/uuid": "^9.0.8",
@@ -36,12 +41,22 @@
3641
"lodash": "^4.17.21",
3742
"node-fetch": "^2.7.0",
3843
"node-html-parser": "^6.1.13",
44+
"react": "^19.0.0",
45+
"react-dom": "^19.0.0",
46+
"react-router-dom": "^7.0.2",
3947
"uuid": "^9.0.1",
40-
"winston": "^3.13.0",
4148
"yn": "^5.0.0"
4249
},
4350
"devDependencies": {
44-
"@backstage/cli": "^0.26.4",
51+
"@backstage/backend-defaults": "^0.5.3",
52+
"@backstage/backend-test-utils": "^1.1.0",
53+
"@backstage/cli": "^0.29.2",
54+
"@backstage/test-utils": "^1.7.2",
55+
"@testing-library/dom": "^10.4.0",
56+
"@testing-library/react": "^16.1.0",
57+
"@types/jest": "^29.5.14",
58+
"@types/react": "^19",
59+
"@types/react-dom": "^19",
4560
"msw": "^1.3.3",
4661
"supertest": "^6.3.4"
4762
},
Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
import { getVoidLogger } from '@backstage/backend-common';
2+
import { mockServices } from '@backstage/backend-test-utils';
3+
import { catalogServiceMock } from '@backstage/plugin-catalog-node/testUtils';
4+
import { Entity } from '@backstage/catalog-model';
5+
import { ConfigReader } from '@backstage/config';
6+
import { TechDocsMetadata } from '@backstage/plugin-techdocs-backend';
7+
import { rest } from 'msw';
8+
import { setupServer } from 'msw/node';
9+
import { GleanIndexClient } from './GleanIndexClient';
10+
import { htmlFixture } from './fixtures/staticTechDocsHtml';
11+
import { GleanDocument } from './types';
12+
13+
describe('GleanIndexClient', () => {
14+
let gleanIndexClient: GleanIndexClient;
15+
const server = setupServer();
16+
const discoveryApi = { getBaseUrl: jest.fn() };
17+
const gleanApiIndexUrl =
18+
'https://customer-be.glean.com/api/index/v1/bulkindexdocuments';
19+
const auth = mockServices.auth();
20+
21+
const config = new ConfigReader({
22+
backend: {
23+
baseUrl: 'http://localhost',
24+
listen: { port: 7000 },
25+
},
26+
app: {
27+
baseUrl: 'http://localhost',
28+
listen: { port: 3000 },
29+
},
30+
glean: {
31+
apiIndexUrl: gleanApiIndexUrl,
32+
token: 'I-am-a-token',
33+
datasource: 'I-am-a-datasource',
34+
},
35+
});
36+
37+
const entityWithUrlRef: Entity = {
38+
apiVersion: 'backstage.io/v1alpha1',
39+
kind: 'Component',
40+
metadata: {
41+
name: 'some-handbook-with-url-ref',
42+
namespace: 'default',
43+
annotations: {
44+
'backstage.io/techdocs-ref': 'url:some_url',
45+
},
46+
spec: {},
47+
},
48+
};
49+
const entityWithDirRef: Entity = {
50+
apiVersion: 'backstage.io/v1alpha1',
51+
kind: 'Component',
52+
metadata: {
53+
name: 'some-handbook-with-dir-ref',
54+
namespace: 'default',
55+
annotations: {
56+
'backstage.io/techdocs-ref': 'dir:.',
57+
},
58+
spec: {},
59+
},
60+
};
61+
const entities = [entityWithUrlRef, entityWithDirRef];
62+
const catalogApi = catalogServiceMock({ entities });
63+
64+
beforeAll(() => server.listen());
65+
66+
beforeEach(() => {
67+
gleanIndexClient = GleanIndexClient.create({
68+
auth,
69+
catalogApi,
70+
config,
71+
discoveryApi,
72+
logger: getVoidLogger(),
73+
});
74+
});
75+
76+
afterEach(() => {
77+
jest.resetAllMocks();
78+
server.resetHandlers();
79+
});
80+
81+
afterAll(() => server.close());
82+
83+
describe('create', () => {
84+
it('returns a new instance of GleanIndexClient', () => {
85+
expect(
86+
GleanIndexClient.create({
87+
auth,
88+
catalogApi,
89+
config,
90+
discoveryApi,
91+
logger: getVoidLogger(),
92+
}),
93+
).toBeInstanceOf(GleanIndexClient);
94+
});
95+
});
96+
97+
describe('parseMainContent', () => {
98+
it('removes all nav elements from HTML', () => {
99+
expect(htmlFixture).toEqual(expect.stringContaining('<nav'));
100+
// eslint-disable-next-line dot-notation
101+
expect(gleanIndexClient['parseMainContent'](htmlFixture)).toEqual(
102+
expect.not.stringContaining('<nav'),
103+
);
104+
});
105+
});
106+
107+
describe('buildDocument', () => {
108+
beforeEach(() => {
109+
// eslint-disable-next-line dot-notation
110+
gleanIndexClient['techDocsClient'].getTechDocsStaticFile = jest
111+
.fn()
112+
.mockResolvedValue(htmlFixture);
113+
});
114+
115+
it('returns a document object', async () => {
116+
expect(
117+
await gleanIndexClient.buildDocument(
118+
entityWithUrlRef,
119+
'foo/index.html',
120+
),
121+
).toEqual({
122+
id: 'default/component/some-handbook-with-url-ref/foo/index.html',
123+
title: 'Engineering Handbook',
124+
container: 'some-handbook-with-url-ref',
125+
datasource: 'I-am-a-datasource',
126+
viewURL:
127+
'http://localhost/docs/default/component/some-handbook-with-url-ref/foo',
128+
body: {
129+
mimeType: 'HTML',
130+
textContent: expect.stringContaining(
131+
"Welcome to Company's Engineering Handbook!",
132+
),
133+
},
134+
updatedAt: Math.floor(new Date('April 6, 2022').getTime() / 1000),
135+
permissions: { allowAnonymousAccess: true },
136+
});
137+
});
138+
});
139+
140+
describe('batchIndexTechDocs', () => {
141+
const mockDocument: GleanDocument = {
142+
id: 'document-1',
143+
title: 'I am a document',
144+
container: 'some-handbook',
145+
datasource: 'I-am-a-datasource',
146+
viewURL: 'http://backstage.w10e.com',
147+
body: {
148+
mimeType: 'HTML',
149+
textContent: 'I am some text content',
150+
},
151+
updatedAt: 1652818028,
152+
permissions: { allowAnonymousAccess: true },
153+
};
154+
155+
const mockTechDocsMetadata: TechDocsMetadata = {
156+
site_name: 'some-handbook',
157+
site_description: 'Company&#x27,s Engineering Handbook',
158+
etag: '38cf6ed97f8c501426a0e311b76d67c69fc46df3',
159+
build_timestamp: 1652796973948,
160+
files: ['index.html', 'interviewing/index.html', 'onboarding.html'],
161+
};
162+
163+
beforeEach(() => {
164+
jest
165+
.spyOn(gleanIndexClient, 'buildDocument')
166+
.mockResolvedValue(mockDocument);
167+
jest
168+
.spyOn(gleanIndexClient, 'indexDocuments')
169+
.mockResolvedValue('response');
170+
171+
// eslint-disable-next-line dot-notation
172+
gleanIndexClient['techDocsClient'].getTechDocsMetadata = jest
173+
.fn()
174+
.mockResolvedValue(mockTechDocsMetadata);
175+
176+
server.use(
177+
rest.post(`${gleanApiIndexUrl}`, (_req, res, ctx) => {
178+
return res(ctx.status(200));
179+
}),
180+
);
181+
});
182+
183+
it('uploads the Glean documents', async () => {
184+
const indexTechDocs = await gleanIndexClient.batchIndexDocuments(
185+
'upload-',
186+
[mockDocument],
187+
);
188+
expect(gleanIndexClient.indexDocuments).toHaveBeenCalledTimes(1);
189+
expect(indexTechDocs).toEqual(1);
190+
});
191+
192+
it('builds and uploads the Glean documents for all entities', async () => {
193+
const batchIndexTechDocs = await gleanIndexClient.batchIndexTechDocs(
194+
entities,
195+
);
196+
expect(batchIndexTechDocs.uploadId).toContain('upload-');
197+
expect(batchIndexTechDocs.batchCount).toEqual(1);
198+
});
199+
200+
describe('when there are no files to index', () => {
201+
beforeEach(() => {
202+
// eslint-disable-next-line dot-notation
203+
gleanIndexClient['techDocsClient'].getTechDocsMetadata = jest
204+
.fn()
205+
.mockResolvedValue({ ...mockTechDocsMetadata, files: [] });
206+
});
207+
208+
it('does not index tech docs with Glean', async () => {
209+
const batchIndexTechDocs = await gleanIndexClient.batchIndexTechDocs(
210+
[],
211+
);
212+
expect(gleanIndexClient.buildDocument).not.toHaveBeenCalled();
213+
expect(batchIndexTechDocs.uploadId).toContain('upload-');
214+
expect(batchIndexTechDocs.batchCount).toEqual(0);
215+
});
216+
});
217+
});
218+
219+
describe('batchIndex', () => {
220+
beforeEach(() => {
221+
jest.spyOn(gleanIndexClient, 'batchIndexTechDocs').mockResolvedValue({
222+
uploadId: 'upload-7bbf4c41-b73a-4ca2-8245-a23a0c4f37e7',
223+
batchCount: 1,
224+
});
225+
});
226+
227+
it('indexes the TechDocs entities', async () => {
228+
await gleanIndexClient.batchIndex(entities);
229+
expect(gleanIndexClient.batchIndexTechDocs).toHaveBeenCalledTimes(1);
230+
});
231+
});
232+
});

0 commit comments

Comments
 (0)