Avoid unnecessary FileInfo/DirectoryInfo allocations, #832

apache · Dec 5, 2024 · b695677 · b695677
1 parent d597a5f
commit b695677
Show file tree

Hide file tree

Showing 36 changed files with 506 additions and 268 deletions.
diff --git a/Lucene.Net.sln.DotSettings b/Lucene.Net.sln.DotSettings
@@ -1,4 +1,6 @@
 <wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Coord/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=LUCENENET/@EntryIndexedValue">True</s:Boolean>
+	<s:Boolean x:Key="/Default/UserDictionary/Words/=stopword/@EntryIndexedValue">True</s:Boolean>
+	<s:Boolean x:Key="/Default/UserDictionary/Words/=stopwords/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=testsettings/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
@@ -127,9 +127,7 @@ protected virtual string UnpackValues(int k)
         /// <param name="filename"> the filename </param>
         /// <exception cref="IOException"> In case the parsing fails </exception>
         public virtual void LoadPatterns(string filename)
-        {
-            LoadPatterns(filename, Encoding.UTF8);
-        }
+            => LoadPatterns(filename, Encoding.UTF8);
 
         /// <summary>
         /// Read hyphenation patterns from an XML file.
@@ -149,9 +147,7 @@ public virtual void LoadPatterns(string filename, Encoding encoding)
         /// <param name="f"> a <see cref="FileInfo"/> object representing the file </param>
         /// <exception cref="IOException"> In case the parsing fails </exception>
         public virtual void LoadPatterns(FileInfo f)
-        {
-            LoadPatterns(f, Encoding.UTF8);
-        }
+            => LoadPatterns(f.FullName, Encoding.UTF8);
 
         /// <summary>
         /// Read hyphenation patterns from an XML file.
@@ -160,20 +156,15 @@ public virtual void LoadPatterns(FileInfo f)
         /// <param name="encoding">The character encoding to use</param>
         /// <exception cref="IOException"> In case the parsing fails </exception>
         public virtual void LoadPatterns(FileInfo f, Encoding encoding)
-        {
-            var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read);
-            LoadPatterns(src, encoding);
-        }
+            => LoadPatterns(f.FullName, encoding);
 
         /// <summary>
         /// Read hyphenation patterns from an XML file.
         /// </summary>
         /// <param name="source"> <see cref="Stream"/> input source for the file </param>
         /// <exception cref="IOException"> In case the parsing fails </exception>
         public virtual void LoadPatterns(Stream source)
-        {
-            LoadPatterns(source, Encoding.UTF8);
-        }
+            => LoadPatterns(source, Encoding.UTF8);
 
         /// <summary>
         /// Read hyphenation patterns from an XML file.

diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
@@ -17,9 +17,9 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
-     * 
+     *
      *      http://www.apache.org/licenses/LICENSE-2.0
-     * 
+     *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -62,7 +62,7 @@ public PatternParser()
             hyphenChar = '-'; // default
         }
 
-        public PatternParser(IPatternConsumer consumer) 
+        public PatternParser(IPatternConsumer consumer)
             : this()
         {
             this.consumer = consumer;
@@ -80,9 +80,7 @@ public virtual IPatternConsumer Consumer
         /// <param name="path">The complete file path to be read.</param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
         public virtual void Parse(string path)
-        {
-            Parse(path, Encoding.UTF8);
-        }
+            => Parse(path, Encoding.UTF8);
 
         /// <summary>
         /// Parses a hyphenation pattern file.
@@ -103,9 +101,7 @@ public virtual void Parse(string path, Encoding encoding)
         /// <param name="file">  a <see cref="FileInfo"/> object representing the file  </param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
         public virtual void Parse(FileInfo file)
-        {
-            Parse(file, Encoding.UTF8);
-        }
+            => Parse(file.FullName, Encoding.UTF8);
 
         /// <summary>
         /// Parses a hyphenation pattern file.
@@ -114,21 +110,16 @@ public virtual void Parse(FileInfo file)
         /// <param name="encoding">The character encoding to use</param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
         public virtual void Parse(FileInfo file, Encoding encoding)
-        {
-            var xmlReaderSettings = GetXmlReaderSettings();
-
-            using var src = XmlReader.Create(new StreamReader(file.OpenRead(), encoding), xmlReaderSettings);
-            Parse(src);
-        }
+            => Parse(file.FullName, encoding);
 
         /// <summary>
         /// Parses a hyphenation pattern file.
         /// </summary>
         /// <param name="xmlStream">
         /// The stream containing the XML data.
         /// <para/>
-        /// The <see cref="PatternParser"/> scans the first bytes of the stream looking for a byte order mark 
-        /// or other sign of encoding. When encoding is determined, the encoding is used to continue reading 
+        /// The <see cref="PatternParser"/> scans the first bytes of the stream looking for a byte order mark
+        /// or other sign of encoding. When encoding is determined, the encoding is used to continue reading
         /// the stream, and processing continues parsing the input as a stream of (Unicode) characters.
         /// </param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
@@ -396,9 +387,9 @@ public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToRe
         /// <summary>
         /// Receive notification of the beginning of an element.
         /// <para/>
-        /// The Parser will invoke this method at the beginning of every element in the XML document; 
-        /// there will be a corresponding <see cref="EndElement"/> event for every <see cref="StartElement"/> event 
-        /// (even when the element is empty). All of the element's content will be reported, 
+        /// The Parser will invoke this method at the beginning of every element in the XML document;
+        /// there will be a corresponding <see cref="EndElement"/> event for every <see cref="StartElement"/> event
+        /// (even when the element is empty). All of the element's content will be reported,
         /// in order, before the corresponding endElement event.
         /// </summary>
         /// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
@@ -442,8 +433,8 @@ public virtual void StartElement(string uri, string local, string raw, IDictiona
         /// <summary>
         /// Receive notification of the end of an element.
         /// <para/>
-        /// The parser will invoke this method at the end of every element in the XML document; 
-        /// there will be a corresponding <see cref="StartElement"/> event for every 
+        /// The parser will invoke this method at the end of every element in the XML document;
+        /// there will be a corresponding <see cref="StartElement"/> event for every
         /// <see cref="EndElement"/> event (even when the element is empty).
         /// </summary>
         /// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
@@ -489,9 +480,9 @@ public virtual void EndElement(string uri, string local, string raw)
         /// <summary>
         /// Receive notification of character data.
         /// <para/>
-        /// The Parser will call this method to report each chunk of character data. Parsers may 
-        /// return all contiguous character data in a single chunk, or they may split it into 
-        /// several chunks; however, all of the characters in any single event must come from 
+        /// The Parser will call this method to report each chunk of character data. Parsers may
+        /// return all contiguous character data in a single chunk, or they may split it into
+        /// several chunks; however, all of the characters in any single event must come from
         /// the same external entity so that the Locator provides useful information.
         /// <para/>
         /// The application must not attempt to read from the array outside of the specified range.
@@ -526,4 +517,4 @@ public virtual void Characters(char[] ch, int start, int length)
             }
         }
     }
-}
+}
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs
@@ -46,7 +46,7 @@ public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
         private readonly HyphenationTree hyphenator;
 
         /// <summary>
-        /// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance. 
+        /// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance.
         /// </summary>
         /// <param name="matchVersion">
         ///          Lucene version to enable correct Unicode 4.0 behavior in the
@@ -59,9 +59,9 @@ public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
         ///          the hyphenation pattern tree to use for hyphenation </param>
         /// <param name="dictionary">
         ///          the word dictionary to match against. </param>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
             HyphenationTree hyphenator, CharArraySet dictionary)
-            : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, 
+            : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
                   DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
         {
         }
@@ -88,10 +88,10 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
         ///          only subwords shorter than this get to the output stream </param>
         /// <param name="onlyLongestMatch">
         ///          Add only the longest matching subword to the stream </param>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
-            HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, 
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
+            HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize,
             int maxSubwordSize, bool onlyLongestMatch)
-            : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, 
+            : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
                   onlyLongestMatch)
         {
             this.hyphenator = hyphenator;
@@ -103,10 +103,10 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
         /// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, CharArraySet, int, int, int, bool)"/>
         /// </para>
         /// </summary>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
-            HyphenationTree hyphenator, int minWordSize, int minSubwordSize, 
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
+            HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
             int maxSubwordSize)
-            : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, 
+            : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
                   maxSubwordSize, false)
         {
         }
@@ -117,9 +117,9 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
         /// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, int, int, int)"/>
         /// </para>
         /// </summary>
-        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, 
+        public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
             HyphenationTree hyphenator)
-            : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, 
+            : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
                   DEFAULT_MAX_SUBWORD_SIZE)
         {
         }
@@ -131,9 +131,7 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
         /// <returns> An object representing the hyphenation patterns </returns>
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
         public static HyphenationTree GetHyphenationTree(string hyphenationFilename)
-        {
-            return GetHyphenationTree(hyphenationFilename, Encoding.UTF8);
-        }
+            => GetHyphenationTree(hyphenationFilename, Encoding.UTF8);
 
         /// <summary>
         /// Create a hyphenator tree
@@ -143,9 +141,7 @@ public static HyphenationTree GetHyphenationTree(string hyphenationFilename)
         /// <returns> An object representing the hyphenation patterns </returns>
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
         public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Encoding encoding)
-        {
-            return GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding);
-        }
+            => GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding);
 
         /// <summary>
         /// Create a hyphenator tree
@@ -154,9 +150,7 @@ public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Enc
         /// <returns> An object representing the hyphenation patterns </returns>
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
         public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile)
-        {
-            return GetHyphenationTree(hyphenationFile, Encoding.UTF8);
-        }
+            => GetHyphenationTree(hyphenationFile.FullName, Encoding.UTF8);
 
         /// <summary>
         /// Create a hyphenator tree
@@ -166,9 +160,7 @@ public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile)
         /// <returns> An object representing the hyphenation patterns </returns>
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
         public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encoding encoding)
-        {
-            return GetHyphenationTree(new FileStream(hyphenationFile.FullName, FileMode.Open, FileAccess.Read), encoding);
-        }
+            => GetHyphenationTree(hyphenationFile.FullName, encoding);
 
         /// <summary>
         /// Create a hyphenator tree
@@ -177,9 +169,7 @@ public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encod
         /// <returns> An object representing the hyphenation patterns </returns>
         /// <exception cref="IOException"> If there is a low-level I/O error. </exception>
         public static HyphenationTree GetHyphenationTree(Stream hyphenationSource)
-        {
-            return GetHyphenationTree(hyphenationSource, Encoding.UTF8);
-        }
+            => GetHyphenationTree(hyphenationSource, Encoding.UTF8);
 
         /// <summary>
         /// Create a hyphenator tree
@@ -227,7 +217,7 @@ protected override void Decompose()
                     // that are longer than minPartSize
                     if (partLength < this.m_minSubwordSize)
                     {
-                        // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the 
+                        // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
                         // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
                         continue;
                     }
@@ -287,4 +277,4 @@ protected override void Decompose()
             }
         }
     }
-}
+}
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
@@ -77,6 +77,19 @@ public StopAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
         {
         }
 
+        /// <summary>
+        /// Builds an analyzer with the stop words from the given file. </summary>
+        /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
+        /// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param>
+        /// <param name="stopwordsFileName"> File name to load stop words from  </param>
+        /// <remarks>
+        /// LUCENENET: This overload takes a string file name to avoid allocating a <see cref="FileInfo"/> object.
+        /// </remarks>
+        public StopAnalyzer(LuceneVersion matchVersion, string stopwordsFileName)
+            : this(matchVersion, LoadStopwordSet(stopwordsFileName, matchVersion))
+        {
+        }
+
         /// <summary>
         /// Builds an analyzer with the stop words from the given file. </summary>
         /// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
@@ -111,4 +124,4 @@ protected internal override TokenStreamComponents CreateComponents(string fieldN
             return new TokenStreamComponents(source, new StopFilter(m_matchVersion, source, m_stopwords));
         }
     }
-}
+}