From e60335087979e5d6afe7ac9d5b261acc326dd06a Mon Sep 17 00:00:00 2001 From: chen__h Date: Mon, 12 Dec 2022 23:32:59 +0800 Subject: [PATCH 1/3] Respect ignoreCase flag in CommonGramsFilterFactory --- .../CommonGrams/CommonGramsFilterFactory.cs | 4 ++-- .../TestCommonGramsFilterFactory.cs | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs index 560b3a8c20..b18756c2ce 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs @@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.CommonGrams /// public class CommonGramsFilterFactory : TokenFilterFactory, IResourceLoaderAware { - // TODO: shared base class for Stop/Keep/CommonGrams? + // TODO: shared base class for Stop/Keep/CommonGrams? private CharArraySet commonWords; private readonly string commonWordFiles; private readonly string format; @@ -71,7 +71,7 @@ public virtual void Inform(IResourceLoader loader) } else { - commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET; + commonWords = new CharArraySet(m_luceneMatchVersion,StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); } } diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs index 9655b6bd0a..5e75c52c1e 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs @@ -28,7 +28,7 @@ namespace Lucene.Net.Analysis.CommonGrams /// Tests pretty much copied from StopFilterFactoryTest We use the test files /// used by the StopFilterFactoryTest TODO: consider creating separate test files /// so this won't break if stop filter test files change - /// + /// /// public class TestCommonGramsFilterFactory : BaseTokenStreamFactoryTestCase { @@ -79,6 +79,22 @@ public virtual void TestDefaults() AssertTokenStreamContents(stream, new string[] { "testing", "testing_the", "the", "the_factory", "factory" }); } + [Test] + public void TestIgnoreCase() + { + IResourceLoader loader = new ClasspathResourceLoader(typeof(TestAnalyzers)); + CommonGramsFilterFactory factory = + (CommonGramsFilterFactory) + TokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, "ignoreCase", "true"); + CharArraySet words = factory.CommonWords; + assertTrue("words is null and it shouldn't be", words != null); + assertTrue(words.contains("the")); + assertTrue(words.contains("The")); + Tokenizer tokenizer = new MockTokenizer(new StringReader("testing The factory"),MockTokenizer.WHITESPACE, false); + TokenStream stream = factory.Create(tokenizer); + AssertTokenStreamContents( + stream, new String[] {"testing", "testing_The", "The", "The_factory", "factory"}); + } /// /// Test that bogus arguments result in exception [Test] From 9ee498d4e773d4d09a26f393dc915208b4eea4a0 Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Mon, 28 Oct 2024 16:52:35 -0600 Subject: [PATCH 2/3] Add LUCENENET-specific backport comment --- .../Analysis/CommonGrams/CommonGramsFilterFactory.cs | 5 +++-- .../Analysis/Commongrams/TestCommonGramsFilterFactory.cs | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs index b18756c2ce..670151703d 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/CommonGrams/CommonGramsFilterFactory.cs @@ -71,7 +71,8 @@ public virtual void Inform(IResourceLoader loader) } else { - commonWords = new CharArraySet(m_luceneMatchVersion,StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); + // LUCENENET-specific: backported ignoreCase fix from Lucene 8.10.0 (lucene#188, LUCENE-10008) + commonWords = new CharArraySet(m_luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); } } @@ -85,4 +86,4 @@ public override TokenStream Create(TokenStream input) return commonGrams; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs index 5e75c52c1e..c568ffe115 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs @@ -79,6 +79,7 @@ public virtual void TestDefaults() AssertTokenStreamContents(stream, new string[] { "testing", "testing_the", "the", "the_factory", "factory" }); } + // LUCENENET-specific: backported ignoreCase fix from Lucene 8.10.0 (lucene#188, LUCENE-10008) [Test] public void TestIgnoreCase() { @@ -93,8 +94,9 @@ public void TestIgnoreCase() Tokenizer tokenizer = new MockTokenizer(new StringReader("testing The factory"),MockTokenizer.WHITESPACE, false); TokenStream stream = factory.Create(tokenizer); AssertTokenStreamContents( - stream, new String[] {"testing", "testing_The", "The", "The_factory", "factory"}); + stream, new string[] {"testing", "testing_The", "The", "The_factory", "factory"}); } + /// /// Test that bogus arguments result in exception [Test] @@ -111,4 +113,4 @@ public virtual void TestBogusArguments() } } } -} \ No newline at end of file +} From 7d62449d241411a4ee6acb1f53277d7de03a5d4a Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Mon, 11 Nov 2024 09:29:10 -0700 Subject: [PATCH 3/3] Use GetType instead of typeof for resource loader --- .../Analysis/Commongrams/TestCommonGramsFilterFactory.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs index c568ffe115..063fc1b22b 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Commongrams/TestCommonGramsFilterFactory.cs @@ -83,7 +83,7 @@ public virtual void TestDefaults() [Test] public void TestIgnoreCase() { - IResourceLoader loader = new ClasspathResourceLoader(typeof(TestAnalyzers)); + IResourceLoader loader = new ClasspathResourceLoader(GetType()); CommonGramsFilterFactory factory = (CommonGramsFilterFactory) TokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, "ignoreCase", "true");