diff --git a/content/copilot/concepts/tools/ai-tools.md b/content/copilot/concepts/tools/ai-tools.md index 5748e82c6bdd..8154c61bc734 100644 --- a/content/copilot/concepts/tools/ai-tools.md +++ b/content/copilot/concepts/tools/ai-tools.md @@ -18,7 +18,7 @@ category: ## Overview -The use of AI tools is increasingly becoming a standard part of a software developer's daily workflow. To be competitive in the job market, it's important to to know which AI tools to use for each task you face as a developer. +The use of AI tools is increasingly becoming a standard part of a software developer's daily workflow. To be competitive in the job market, it's important to know which AI tools to use for each task you face as a developer. {% data variables.product.github %}'s AI tools assist with every phase of the software development lifecycle (SDLC): diff --git a/src/landings/components/shared/LandingArticleGridWithFilter.tsx b/src/landings/components/shared/LandingArticleGridWithFilter.tsx index d3e1fcb87efd..6d0ac66a4d3e 100644 --- a/src/landings/components/shared/LandingArticleGridWithFilter.tsx +++ b/src/landings/components/shared/LandingArticleGridWithFilter.tsx @@ -8,6 +8,7 @@ import { useTranslation } from '@/languages/components/useTranslation' import { ArticleCardItems, ChildTocItem, TocItem } from '@/landings/types' import { LandingType } from '@/landings/context/LandingContext' import type { QueryParams } from '@/search/components/hooks/useMultiQueryParams' +import { fuzzyMatchScore } from '@/landings/lib/fuzzy-match' import styles from './LandingArticleGridWithFilter.module.scss' @@ -151,20 +152,27 @@ export const ArticleGrid = ({ let results = filteredArticlesByLandingType if (searchQuery) { - results = results.filter((token) => { - return Object.values(token).some((value) => { - if (typeof value === 'string') { - return value.toLowerCase().includes(searchQuery.toLowerCase()) - } else if (Array.isArray(value)) { - return value.some((item) => { - if (typeof item === 'string') { - return item.toLowerCase().includes(searchQuery.toLowerCase()) + // Calculate match scores for each article + const scoredResults = results + .map((token) => { + let maxScore = -1 + for (const value of Object.values(token)) { + if (typeof value === 'string') { + maxScore = Math.max(maxScore, fuzzyMatchScore(value, searchQuery)) + } else if (Array.isArray(value)) { + for (const item of value) { + if (typeof item === 'string') { + maxScore = Math.max(maxScore, fuzzyMatchScore(item, searchQuery)) + } } - }) + } } - return false + return { token, score: maxScore } }) - }) + .filter(({ score }) => score >= 0) + .sort((a, b) => b.score - a.score) + + results = scoredResults.map(({ token }) => token) } if (selectedCategory !== ALL_CATEGORIES) { diff --git a/src/landings/lib/fuzzy-match.ts b/src/landings/lib/fuzzy-match.ts new file mode 100644 index 000000000000..9d83a0faeb25 --- /dev/null +++ b/src/landings/lib/fuzzy-match.ts @@ -0,0 +1,56 @@ +// 60% threshold: Empirically chosen to balance precision vs recall. +// Lower values (e.g., 40%) match too loosely (e.g., "agent" matches "urgent"). +// Higher values (e.g., 80%) miss reasonable matches like singular/plural variations. +// 60% captures most typo corrections and word form variations while filtering noise. +const BIGRAM_COVERAGE_THRESHOLD = 0.6 + +// Memoization cache for bigram computation +const bigramCache = new Map>() + +// Extract character bigrams from a string (e.g., "agent" → ["ag", "ge", "en", "nt"]) +const getBigrams = (str: string): Set => { + const key = str.toLowerCase() + if (bigramCache.has(key)) { + return bigramCache.get(key)! + } + + const s = key.replace(/\s+/g, '') + const bigrams = new Set() + for (let i = 0; i < s.length - 1; i++) { + bigrams.add(s.slice(i, i + 2)) + } + + bigramCache.set(key, bigrams) + return bigrams +} + +// Coverage: what percentage of search bigrams are found in text +// Better for matching short queries against long text +export const bigramCoverage = (text: string, search: string): number => { + const textBigrams = getBigrams(text) + const searchBigrams = getBigrams(search) + + if (searchBigrams.size === 0) return 0 + + const found = [...searchBigrams].filter((b) => textBigrams.has(b)).length + return found / searchBigrams.size +} + +// Returns a match score: 1 for exact match, 0-1 for bigram coverage, -1 for no match +export const fuzzyMatchScore = (text: string, searchTerm: string): number => { + const lowerText = text.toLowerCase() + const lowerSearch = searchTerm.toLowerCase() + + // Exact substring match gets highest score + if (lowerText.includes(lowerSearch)) return 1 + + // Bigram coverage: what % of search bigrams appear in text + // This works better than Jaccard when text is much longer than search + const score = bigramCoverage(text, searchTerm) + return score >= BIGRAM_COVERAGE_THRESHOLD ? score : -1 +} + +// Check if searchTerm matches text (for filtering) +export const fuzzyMatch = (text: string, searchTerm: string): boolean => { + return fuzzyMatchScore(text, searchTerm) >= 0 +} diff --git a/src/landings/tests/fuzzy-match.ts b/src/landings/tests/fuzzy-match.ts new file mode 100644 index 000000000000..fb68835fdaa1 --- /dev/null +++ b/src/landings/tests/fuzzy-match.ts @@ -0,0 +1,119 @@ +import { describe, expect, test } from 'vitest' + +import { fuzzyMatch, fuzzyMatchScore, bigramCoverage } from '@/landings/lib/fuzzy-match' + +describe('fuzzyMatch', () => { + test('matches exact substrings', () => { + expect(fuzzyMatch('GitHub Copilot agents', 'agent')).toBe(true) + expect(fuzzyMatch('GitHub Copilot agents', 'copilot')).toBe(true) + }) + + test('matches singular vs plural via bigrams', () => { + expect(fuzzyMatch('GitHub Copilot agent', 'agents')).toBe(true) + expect(fuzzyMatch('Managing your repository', 'repositories')).toBe(true) + }) + + test('is case insensitive', () => { + expect(fuzzyMatch('GitHub Copilot', 'COPILOT')).toBe(true) + expect(fuzzyMatch('AGENTS', 'agents')).toBe(true) + }) + + test('returns false for non-matching text', () => { + expect(fuzzyMatch('GitHub Copilot', 'xyz')).toBe(false) + expect(fuzzyMatch('Repository settings', 'workflow')).toBe(false) + }) + + test('matches multi-word queries via bigram coverage', () => { + expect(fuzzyMatch('About GitHub Copilot agent features', 'copilot agent')).toBe(true) + expect(fuzzyMatch('Using agent in Copilot', 'copilot agent')).toBe(true) + }) + + test('multi-word queries require sufficient bigram overlap', () => { + expect(fuzzyMatch('xyz abc', 'copilot agents')).toBe(false) + }) + + test('handles edge cases gracefully', () => { + // Empty strings + expect(fuzzyMatch('GitHub Copilot', '')).toBe(true) // empty search matches anything + expect(fuzzyMatch('', 'copilot')).toBe(false) + expect(fuzzyMatch('', '')).toBe(true) + + // Whitespace-only queries + expect(fuzzyMatch('GitHub Copilot', ' ')).toBe(false) + + // Multiple consecutive spaces in query + expect(fuzzyMatch('GitHub Copilot agent', 'copilot agent')).toBe(true) + }) +}) + +describe('fuzzyMatchScore', () => { + test('returns 1 for exact substring match', () => { + expect(fuzzyMatchScore('GitHub Copilot agents', 'copilot')).toBe(1) + }) + + test('returns -1 for no match', () => { + expect(fuzzyMatchScore('GitHub Copilot', 'xyz')).toBe(-1) + }) + + test('returns bigram coverage score for fuzzy matches', () => { + // Bigram coverage should give a score between 0.6 and 1 + const score = fuzzyMatchScore('About Copilot memory features', 'memory copilot') + expect(score).toBeGreaterThan(0.6) + expect(score).toBeLessThan(1) + }) + + test('matches singular vs plural via bigrams', () => { + // "agents" bigrams: ag, ge, en, nt, ts (5) + // "agent" in text has: ag, ge, en, nt (4) + // Coverage: 4/5 = 0.8, which is > 0.6 threshold + const score = fuzzyMatchScore('GitHub Copilot agent', 'agents') + expect(score).toBeGreaterThan(0.6) + }) + + test('exact substring matches score higher than fuzzy matches', () => { + const exactScore = fuzzyMatchScore('copilot agent guide', 'copilot agent') + const fuzzyScore = fuzzyMatchScore('About Copilot memory features', 'memory copilot') + expect(exactScore).toBe(1) + expect(fuzzyScore).toBeLessThan(1) + }) +}) + +describe('bigramCoverage', () => { + test('returns 1.0 when all search bigrams are found in text', () => { + expect(bigramCoverage('copilot agent', 'agent')).toBe(1) + }) + + test('returns 0 for completely different texts', () => { + expect(bigramCoverage('xyz', 'abc')).toBe(0) + }) + + test('returns 0 for empty search string', () => { + expect(bigramCoverage('some text', '')).toBe(0) + }) + + test('handles singular vs plural with high coverage', () => { + // "agents" bigrams: ag, ge, en, nt, ts (5) + // "agent" in text has: ag, ge, en, nt (4) + // Coverage: 4/5 = 0.8 + const coverage = bigramCoverage('agent', 'agents') + expect(coverage).toBeCloseTo(4 / 5, 2) + }) + + test('calculates partial coverage correctly', () => { + // Text "hello" has bigrams: he, el, ll, lo + // Search "help" has bigrams: he, el, lp + // Found: he, el (2 of 3) = 0.67 + const coverage = bigramCoverage('hello', 'help') + expect(coverage).toBeCloseTo(2 / 3, 2) + }) + + test('is case insensitive', () => { + expect(bigramCoverage('COPILOT', 'copilot')).toBe(1) + expect(bigramCoverage('copilot', 'COPILOT')).toBe(1) + }) + + test('ignores whitespace in both text and search', () => { + expect(bigramCoverage('co pi lot', 'copilot')).toBe(1) + expect(bigramCoverage('copilot', 'co pi lot')).toBe(1) + }) +})