Skip to content

Commit

Permalink
Avoid unnecessary FileInfo/DirectoryInfo allocations, #832
Browse files Browse the repository at this point in the history
  • Loading branch information
paulirwin committed Dec 5, 2024
1 parent d597a5f commit b695677
Show file tree
Hide file tree
Showing 36 changed files with 506 additions and 268 deletions.
2 changes: 2 additions & 0 deletions Lucene.Net.sln.DotSettings
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
<s:Boolean x:Key="/Default/UserDictionary/Words/=Coord/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=LUCENENET/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=stopword/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=stopwords/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=testsettings/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,7 @@ protected virtual string UnpackValues(int k)
/// <param name="filename"> the filename </param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(string filename)
{
LoadPatterns(filename, Encoding.UTF8);
}
=> LoadPatterns(filename, Encoding.UTF8);

/// <summary>
/// Read hyphenation patterns from an XML file.
Expand All @@ -149,9 +147,7 @@ public virtual void LoadPatterns(string filename, Encoding encoding)
/// <param name="f"> a <see cref="FileInfo"/> object representing the file </param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(FileInfo f)
{
LoadPatterns(f, Encoding.UTF8);
}
=> LoadPatterns(f.FullName, Encoding.UTF8);

/// <summary>
/// Read hyphenation patterns from an XML file.
Expand All @@ -160,20 +156,15 @@ public virtual void LoadPatterns(FileInfo f)
/// <param name="encoding">The character encoding to use</param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(FileInfo f, Encoding encoding)
{
var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read);
LoadPatterns(src, encoding);
}
=> LoadPatterns(f.FullName, encoding);

/// <summary>
/// Read hyphenation patterns from an XML file.
/// </summary>
/// <param name="source"> <see cref="Stream"/> input source for the file </param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(Stream source)
{
LoadPatterns(source, Encoding.UTF8);
}
=> LoadPatterns(source, Encoding.UTF8);

/// <summary>
/// Read hyphenation patterns from an XML file.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand Down Expand Up @@ -62,7 +62,7 @@ public PatternParser()
hyphenChar = '-'; // default
}

public PatternParser(IPatternConsumer consumer)
public PatternParser(IPatternConsumer consumer)
: this()
{
this.consumer = consumer;
Expand All @@ -80,9 +80,7 @@ public virtual IPatternConsumer Consumer
/// <param name="path">The complete file path to be read.</param>
/// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(string path)
{
Parse(path, Encoding.UTF8);
}
=> Parse(path, Encoding.UTF8);

/// <summary>
/// Parses a hyphenation pattern file.
Expand All @@ -103,9 +101,7 @@ public virtual void Parse(string path, Encoding encoding)
/// <param name="file"> a <see cref="FileInfo"/> object representing the file </param>
/// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(FileInfo file)
{
Parse(file, Encoding.UTF8);
}
=> Parse(file.FullName, Encoding.UTF8);

/// <summary>
/// Parses a hyphenation pattern file.
Expand All @@ -114,21 +110,16 @@ public virtual void Parse(FileInfo file)
/// <param name="encoding">The character encoding to use</param>
/// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(FileInfo file, Encoding encoding)
{
var xmlReaderSettings = GetXmlReaderSettings();

using var src = XmlReader.Create(new StreamReader(file.OpenRead(), encoding), xmlReaderSettings);
Parse(src);
}
=> Parse(file.FullName, encoding);

/// <summary>
/// Parses a hyphenation pattern file.
/// </summary>
/// <param name="xmlStream">
/// The stream containing the XML data.
/// <para/>
/// The <see cref="PatternParser"/> scans the first bytes of the stream looking for a byte order mark
/// or other sign of encoding. When encoding is determined, the encoding is used to continue reading
/// The <see cref="PatternParser"/> scans the first bytes of the stream looking for a byte order mark
/// or other sign of encoding. When encoding is determined, the encoding is used to continue reading
/// the stream, and processing continues parsing the input as a stream of (Unicode) characters.
/// </param>
/// <exception cref="IOException"> In case of an exception while parsing </exception>
Expand Down Expand Up @@ -396,9 +387,9 @@ public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToRe
/// <summary>
/// Receive notification of the beginning of an element.
/// <para/>
/// The Parser will invoke this method at the beginning of every element in the XML document;
/// there will be a corresponding <see cref="EndElement"/> event for every <see cref="StartElement"/> event
/// (even when the element is empty). All of the element's content will be reported,
/// The Parser will invoke this method at the beginning of every element in the XML document;
/// there will be a corresponding <see cref="EndElement"/> event for every <see cref="StartElement"/> event
/// (even when the element is empty). All of the element's content will be reported,
/// in order, before the corresponding endElement event.
/// </summary>
/// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
Expand Down Expand Up @@ -442,8 +433,8 @@ public virtual void StartElement(string uri, string local, string raw, IDictiona
/// <summary>
/// Receive notification of the end of an element.
/// <para/>
/// The parser will invoke this method at the end of every element in the XML document;
/// there will be a corresponding <see cref="StartElement"/> event for every
/// The parser will invoke this method at the end of every element in the XML document;
/// there will be a corresponding <see cref="StartElement"/> event for every
/// <see cref="EndElement"/> event (even when the element is empty).
/// </summary>
/// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
Expand Down Expand Up @@ -489,9 +480,9 @@ public virtual void EndElement(string uri, string local, string raw)
/// <summary>
/// Receive notification of character data.
/// <para/>
/// The Parser will call this method to report each chunk of character data. Parsers may
/// return all contiguous character data in a single chunk, or they may split it into
/// several chunks; however, all of the characters in any single event must come from
/// The Parser will call this method to report each chunk of character data. Parsers may
/// return all contiguous character data in a single chunk, or they may split it into
/// several chunks; however, all of the characters in any single event must come from
/// the same external entity so that the Locator provides useful information.
/// <para/>
/// The application must not attempt to read from the array outside of the specified range.
Expand Down Expand Up @@ -526,4 +517,4 @@ public virtual void Characters(char[] ch, int start, int length)
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
private readonly HyphenationTree hyphenator;

/// <summary>
/// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance.
/// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance.
/// </summary>
/// <param name="matchVersion">
/// Lucene version to enable correct Unicode 4.0 behavior in the
Expand All @@ -59,9 +59,9 @@ public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
/// the hyphenation pattern tree to use for hyphenation </param>
/// <param name="dictionary">
/// the word dictionary to match against. </param>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, CharArraySet dictionary)
: this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
: this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
{
}
Expand All @@ -88,10 +88,10 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
/// only subwords shorter than this get to the output stream </param>
/// <param name="onlyLongestMatch">
/// Add only the longest matching subword to the stream </param>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize,
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize,
int maxSubwordSize, bool onlyLongestMatch)
: base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
: base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch)
{
this.hyphenator = hyphenator;
Expand All @@ -103,10 +103,10 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
/// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, CharArraySet, int, int, int, bool)"/>
/// </para>
/// </summary>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
int maxSubwordSize)
: this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
: this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
maxSubwordSize, false)
{
}
Expand All @@ -117,9 +117,9 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
/// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, int, int, int)"/>
/// </para>
/// </summary>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator)
: this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
: this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
DEFAULT_MAX_SUBWORD_SIZE)
{
}
Expand All @@ -131,9 +131,7 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(string hyphenationFilename)
{
return GetHyphenationTree(hyphenationFilename, Encoding.UTF8);
}
=> GetHyphenationTree(hyphenationFilename, Encoding.UTF8);

/// <summary>
/// Create a hyphenator tree
Expand All @@ -143,9 +141,7 @@ public static HyphenationTree GetHyphenationTree(string hyphenationFilename)
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Encoding encoding)
{
return GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding);
}
=> GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding);

/// <summary>
/// Create a hyphenator tree
Expand All @@ -154,9 +150,7 @@ public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Enc
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile)
{
return GetHyphenationTree(hyphenationFile, Encoding.UTF8);
}
=> GetHyphenationTree(hyphenationFile.FullName, Encoding.UTF8);

/// <summary>
/// Create a hyphenator tree
Expand All @@ -166,9 +160,7 @@ public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile)
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encoding encoding)
{
return GetHyphenationTree(new FileStream(hyphenationFile.FullName, FileMode.Open, FileAccess.Read), encoding);
}
=> GetHyphenationTree(hyphenationFile.FullName, encoding);

/// <summary>
/// Create a hyphenator tree
Expand All @@ -177,9 +169,7 @@ public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encod
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(Stream hyphenationSource)
{
return GetHyphenationTree(hyphenationSource, Encoding.UTF8);
}
=> GetHyphenationTree(hyphenationSource, Encoding.UTF8);

/// <summary>
/// Create a hyphenator tree
Expand Down Expand Up @@ -227,7 +217,7 @@ protected override void Decompose()
// that are longer than minPartSize
if (partLength < this.m_minSubwordSize)
{
// BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
// BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
// calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
continue;
}
Expand Down Expand Up @@ -287,4 +277,4 @@ protected override void Decompose()
}
}
}
}
}
15 changes: 14 additions & 1 deletion src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,19 @@ public StopAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
{
}

/// <summary>
/// Builds an analyzer with the stop words from the given file. </summary>
/// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
/// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param>
/// <param name="stopwordsFileName"> File name to load stop words from </param>
/// <remarks>
/// LUCENENET: This overload takes a string file name to avoid allocating a <see cref="FileInfo"/> object.
/// </remarks>
public StopAnalyzer(LuceneVersion matchVersion, string stopwordsFileName)
: this(matchVersion, LoadStopwordSet(stopwordsFileName, matchVersion))
{
}

/// <summary>
/// Builds an analyzer with the stop words from the given file. </summary>
/// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
Expand Down Expand Up @@ -111,4 +124,4 @@ protected internal override TokenStreamComponents CreateComponents(string fieldN
return new TokenStreamComponents(source, new StopFilter(m_matchVersion, source, m_stopwords));
}
}
}
}
Loading

0 comments on commit b695677

Please sign in to comment.