From b695677ff233885eb181ee3ec412823600873709 Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Thu, 5 Dec 2024 16:46:04 -0700 Subject: [PATCH] Avoid unnecessary FileInfo/DirectoryInfo allocations, #832 --- Lucene.Net.sln.DotSettings | 2 + .../Compound/Hyphenation/HyphenationTree.cs | 17 +- .../Compound/Hyphenation/PatternParser.cs | 43 ++--- .../HyphenationCompoundWordTokenFilter.cs | 46 ++---- .../Analysis/Core/StopAnalyzer.cs | 15 +- .../Analysis/Util/FilesystemResourceLoader.cs | 51 ++++-- .../Analysis/Util/StopwordAnalyzerBase.cs | 33 +++- .../Dict/BinaryDictionary.cs | 11 +- .../Tools/TokenInfoDictionaryBuilder.cs | 13 +- .../AnalyzerProfile.cs | 11 +- .../Hhmm/BigramDictionary.cs | 4 +- src/Lucene.Net.Benchmark/ByTask/Benchmark.cs | 8 +- .../ByTask/Feeds/EnwikiContentSource.cs | 7 +- .../ByTask/Feeds/FileBasedQueryMaker.cs | 7 +- .../ByTask/Feeds/LineDocSource.cs | 23 ++- .../ByTask/PerfRunData.cs | 20 +-- .../ByTask/Tasks/AnalyzerFactoryTask.cs | 2 +- .../ByTask/Tasks/CreateIndexTask.cs | 4 +- .../ByTask/Tasks/WriteEnwikiLineDocTask.cs | 17 +- .../ByTask/Tasks/WriteLineDocTask.cs | 8 +- .../ByTask/Utils/FileUtils.cs | 20 ++- .../ByTask/Utils/StreamUtils.cs | 43 +++-- .../Quality/Trec/QueryDriver.cs | 7 +- .../Index/CompoundFileExtractor.cs | 11 +- src/Lucene.Net.Misc/Index/IndexSplitter.cs | 12 +- .../Spell/PlainTextDictionary.cs | 19 ++- .../Jaspell/JaspellTernarySearchTrie.cs | 154 ++++++++++++++---- .../ConfigurationBuilderExtensions.cs | 6 +- .../Tasks/WriteEnwikiLineDocTaskTest.cs | 29 ++-- src/Lucene.Net/Store/MMapDirectory.cs | 4 +- src/Lucene.Net/Store/NIOFSDirectory.cs | 16 +- src/Lucene.Net/Store/SimpleFSDirectory.cs | 18 +- src/Lucene.Net/Store/SimpleFSLockFactory.cs | 32 ++-- src/Lucene.Net/Util/CommandLineUtil.cs | 16 +- src/Lucene.Net/Util/Fst/FST.cs | 14 +- src/Lucene.Net/Util/IOUtils.cs | 31 ++++ 36 files changed, 506 insertions(+), 268 deletions(-) diff --git a/Lucene.Net.sln.DotSettings b/Lucene.Net.sln.DotSettings index 6fd109800d..4e7652b496 100644 --- a/Lucene.Net.sln.DotSettings +++ b/Lucene.Net.sln.DotSettings @@ -1,4 +1,6 @@  True True + True + True True \ No newline at end of file diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs index 4db6c3761c..a5b59b0ec9 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs @@ -127,9 +127,7 @@ protected virtual string UnpackValues(int k) /// the filename /// In case the parsing fails public virtual void LoadPatterns(string filename) - { - LoadPatterns(filename, Encoding.UTF8); - } + => LoadPatterns(filename, Encoding.UTF8); /// /// Read hyphenation patterns from an XML file. @@ -149,9 +147,7 @@ public virtual void LoadPatterns(string filename, Encoding encoding) /// a object representing the file /// In case the parsing fails public virtual void LoadPatterns(FileInfo f) - { - LoadPatterns(f, Encoding.UTF8); - } + => LoadPatterns(f.FullName, Encoding.UTF8); /// /// Read hyphenation patterns from an XML file. @@ -160,10 +156,7 @@ public virtual void LoadPatterns(FileInfo f) /// The character encoding to use /// In case the parsing fails public virtual void LoadPatterns(FileInfo f, Encoding encoding) - { - var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read); - LoadPatterns(src, encoding); - } + => LoadPatterns(f.FullName, encoding); /// /// Read hyphenation patterns from an XML file. @@ -171,9 +164,7 @@ public virtual void LoadPatterns(FileInfo f, Encoding encoding) /// input source for the file /// In case the parsing fails public virtual void LoadPatterns(Stream source) - { - LoadPatterns(source, Encoding.UTF8); - } + => LoadPatterns(source, Encoding.UTF8); /// /// Read hyphenation patterns from an XML file. diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs index 7d9c5adaa9..845b221476 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs @@ -17,9 +17,9 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -62,7 +62,7 @@ public PatternParser() hyphenChar = '-'; // default } - public PatternParser(IPatternConsumer consumer) + public PatternParser(IPatternConsumer consumer) : this() { this.consumer = consumer; @@ -80,9 +80,7 @@ public virtual IPatternConsumer Consumer /// The complete file path to be read. /// In case of an exception while parsing public virtual void Parse(string path) - { - Parse(path, Encoding.UTF8); - } + => Parse(path, Encoding.UTF8); /// /// Parses a hyphenation pattern file. @@ -103,9 +101,7 @@ public virtual void Parse(string path, Encoding encoding) /// a object representing the file /// In case of an exception while parsing public virtual void Parse(FileInfo file) - { - Parse(file, Encoding.UTF8); - } + => Parse(file.FullName, Encoding.UTF8); /// /// Parses a hyphenation pattern file. @@ -114,12 +110,7 @@ public virtual void Parse(FileInfo file) /// The character encoding to use /// In case of an exception while parsing public virtual void Parse(FileInfo file, Encoding encoding) - { - var xmlReaderSettings = GetXmlReaderSettings(); - - using var src = XmlReader.Create(new StreamReader(file.OpenRead(), encoding), xmlReaderSettings); - Parse(src); - } + => Parse(file.FullName, encoding); /// /// Parses a hyphenation pattern file. @@ -127,8 +118,8 @@ public virtual void Parse(FileInfo file, Encoding encoding) /// /// The stream containing the XML data. /// - /// The scans the first bytes of the stream looking for a byte order mark - /// or other sign of encoding. When encoding is determined, the encoding is used to continue reading + /// The scans the first bytes of the stream looking for a byte order mark + /// or other sign of encoding. When encoding is determined, the encoding is used to continue reading /// the stream, and processing continues parsing the input as a stream of (Unicode) characters. /// /// In case of an exception while parsing @@ -396,9 +387,9 @@ public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToRe /// /// Receive notification of the beginning of an element. /// - /// The Parser will invoke this method at the beginning of every element in the XML document; - /// there will be a corresponding event for every event - /// (even when the element is empty). All of the element's content will be reported, + /// The Parser will invoke this method at the beginning of every element in the XML document; + /// there will be a corresponding event for every event + /// (even when the element is empty). All of the element's content will be reported, /// in order, before the corresponding endElement event. /// /// the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed @@ -442,8 +433,8 @@ public virtual void StartElement(string uri, string local, string raw, IDictiona /// /// Receive notification of the end of an element. /// - /// The parser will invoke this method at the end of every element in the XML document; - /// there will be a corresponding event for every + /// The parser will invoke this method at the end of every element in the XML document; + /// there will be a corresponding event for every /// event (even when the element is empty). /// /// the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed @@ -489,9 +480,9 @@ public virtual void EndElement(string uri, string local, string raw) /// /// Receive notification of character data. /// - /// The Parser will call this method to report each chunk of character data. Parsers may - /// return all contiguous character data in a single chunk, or they may split it into - /// several chunks; however, all of the characters in any single event must come from + /// The Parser will call this method to report each chunk of character data. Parsers may + /// return all contiguous character data in a single chunk, or they may split it into + /// several chunks; however, all of the characters in any single event must come from /// the same external entity so that the Locator provides useful information. /// /// The application must not attempt to read from the array outside of the specified range. @@ -526,4 +517,4 @@ public virtual void Characters(char[] ch, int start, int length) } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs index ae7fed70be..037025e433 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/HyphenationCompoundWordTokenFilter.cs @@ -46,7 +46,7 @@ public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase private readonly HyphenationTree hyphenator; /// - /// Creates a new instance. + /// Creates a new instance. /// /// /// Lucene version to enable correct Unicode 4.0 behavior in the @@ -59,9 +59,9 @@ public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase /// the hyphenation pattern tree to use for hyphenation /// /// the word dictionary to match against. - public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, + public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator, CharArraySet dictionary) - : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, + : this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false) { } @@ -88,10 +88,10 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea /// only subwords shorter than this get to the output stream /// /// Add only the longest matching subword to the stream - public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, - HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, + public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, + HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize, int maxSubwordSize, bool onlyLongestMatch) - : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, + : base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch) { this.hyphenator = hyphenator; @@ -103,10 +103,10 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea /// Calls /// /// - public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, - HyphenationTree hyphenator, int minWordSize, int minSubwordSize, + public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, + HyphenationTree hyphenator, int minWordSize, int minSubwordSize, int maxSubwordSize) - : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, + : this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize, maxSubwordSize, false) { } @@ -117,9 +117,9 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea /// Calls /// /// - public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, + public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input, HyphenationTree hyphenator) - : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, + : this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE) { } @@ -131,9 +131,7 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea /// An object representing the hyphenation patterns /// If there is a low-level I/O error. public static HyphenationTree GetHyphenationTree(string hyphenationFilename) - { - return GetHyphenationTree(hyphenationFilename, Encoding.UTF8); - } + => GetHyphenationTree(hyphenationFilename, Encoding.UTF8); /// /// Create a hyphenator tree @@ -143,9 +141,7 @@ public static HyphenationTree GetHyphenationTree(string hyphenationFilename) /// An object representing the hyphenation patterns /// If there is a low-level I/O error. public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Encoding encoding) - { - return GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding); - } + => GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding); /// /// Create a hyphenator tree @@ -154,9 +150,7 @@ public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Enc /// An object representing the hyphenation patterns /// If there is a low-level I/O error. public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile) - { - return GetHyphenationTree(hyphenationFile, Encoding.UTF8); - } + => GetHyphenationTree(hyphenationFile.FullName, Encoding.UTF8); /// /// Create a hyphenator tree @@ -166,9 +160,7 @@ public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile) /// An object representing the hyphenation patterns /// If there is a low-level I/O error. public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encoding encoding) - { - return GetHyphenationTree(new FileStream(hyphenationFile.FullName, FileMode.Open, FileAccess.Read), encoding); - } + => GetHyphenationTree(hyphenationFile.FullName, encoding); /// /// Create a hyphenator tree @@ -177,9 +169,7 @@ public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encod /// An object representing the hyphenation patterns /// If there is a low-level I/O error. public static HyphenationTree GetHyphenationTree(Stream hyphenationSource) - { - return GetHyphenationTree(hyphenationSource, Encoding.UTF8); - } + => GetHyphenationTree(hyphenationSource, Encoding.UTF8); /// /// Create a hyphenator tree @@ -227,7 +217,7 @@ protected override void Decompose() // that are longer than minPartSize if (partLength < this.m_minSubwordSize) { - // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the + // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the // calculation above, and we rely upon minSubwordSize being >=0 to filter them out... continue; } @@ -287,4 +277,4 @@ protected override void Decompose() } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs index b941d97888..18c5cf0f9f 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs @@ -77,6 +77,19 @@ public StopAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords) { } + /// + /// Builds an analyzer with the stop words from the given file. + /// + /// See + /// File name to load stop words from + /// + /// LUCENENET: This overload takes a string file name to avoid allocating a object. + /// + public StopAnalyzer(LuceneVersion matchVersion, string stopwordsFileName) + : this(matchVersion, LoadStopwordSet(stopwordsFileName, matchVersion)) + { + } + /// /// Builds an analyzer with the stop words from the given file. /// @@ -111,4 +124,4 @@ protected internal override TokenStreamComponents CreateComponents(string fieldN return new TokenStreamComponents(source, new StopFilter(m_matchVersion, source, m_stopwords)); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs index 7b05a8f12f..538cd2b1f5 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/FilesystemResourceLoader.cs @@ -1,6 +1,7 @@ // Lucene version compatibility level 4.8.1 using System; using System.IO; +#nullable enable namespace Lucene.Net.Analysis.Util { @@ -25,12 +26,12 @@ namespace Lucene.Net.Analysis.Util /// Simple that opens resource files /// from the local file system, optionally resolving against /// a base directory. - /// + /// /// This loader wraps a delegate /// that is used to resolve all files, the current base directory /// does not contain. is always resolved /// against the delegate, as an is needed. - /// + /// /// /// You can chain several s /// to allow lookup of files in more than one base directory. @@ -38,7 +39,7 @@ namespace Lucene.Net.Analysis.Util /// public sealed class FilesystemResourceLoader : IResourceLoader { - private readonly DirectoryInfo baseDirectory; + private readonly string? baseDirectory; // LUCENENET specific: changed to use string directory name instead of allocating a DirectoryInfo (#832) private readonly IResourceLoader @delegate; /// @@ -47,7 +48,7 @@ public sealed class FilesystemResourceLoader : IResourceLoader /// are delegated to context classloader. /// public FilesystemResourceLoader() - : this((DirectoryInfo)null) + : this((string?)null) { } @@ -57,22 +58,44 @@ public FilesystemResourceLoader() /// Files not found in file system and class lookups are delegated to context /// classloader. /// - public FilesystemResourceLoader(DirectoryInfo baseDirectory) + public FilesystemResourceLoader(string? baseDirectory) : this(baseDirectory, new ClasspathResourceLoader(typeof(FilesystemResourceLoader))) { } + /// + /// Creates a resource loader that resolves resources against the given + /// base directory (may be null to refer to CWD). + /// Files not found in file system and class lookups are delegated to context + /// classloader. + /// + public FilesystemResourceLoader(DirectoryInfo? baseDirectory) + : this(baseDirectory?.FullName, new ClasspathResourceLoader(typeof(FilesystemResourceLoader))) + { + } + + /// + /// Creates a resource loader that resolves resources against the given + /// base directory (may be null to refer to CWD). + /// Files not found in file system and class lookups are delegated + /// to the given delegate . + /// + public FilesystemResourceLoader(DirectoryInfo? baseDirectory, IResourceLoader @delegate) + : this(baseDirectory?.FullName, @delegate) + { + } + /// /// Creates a resource loader that resolves resources against the given /// base directory (may be null to refer to CWD). /// Files not found in file system and class lookups are delegated /// to the given delegate . /// - public FilesystemResourceLoader(DirectoryInfo baseDirectory, IResourceLoader @delegate) + public FilesystemResourceLoader(string? baseDirectory, IResourceLoader @delegate) { // LUCENENET NOTE: If you call DirectoryInfo.Create() it doesn't set the DirectoryInfo.Exists // flag to true, so we use the Directory object to check the path explicitly. - if (!(baseDirectory is null) && !Directory.Exists(baseDirectory.FullName)) + if (baseDirectory is not null && !Directory.Exists(baseDirectory)) { throw new ArgumentException("baseDirectory is not a directory or is null"); } @@ -89,12 +112,12 @@ public Stream OpenResource(string resource) { try { - FileInfo file = null; + string? file = null; // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) // First try absolute. if (File.Exists(resource)) { - file = new FileInfo(resource); + file = resource; } else { @@ -102,22 +125,22 @@ public Stream OpenResource(string resource) var fullPath = System.IO.Path.GetFullPath(resource); if (File.Exists(fullPath)) { - file = new FileInfo(fullPath); + file = fullPath; } else if (baseDirectory != null) { // Try to combine with the base directory - string based = System.IO.Path.Combine(baseDirectory.FullName, resource); + string based = System.IO.Path.Combine(baseDirectory, resource); if (File.Exists(based)) { - file = new FileInfo(based); + file = based; } } } if (file != null) { - return file.OpenRead(); + return new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read); } // Fallback on the inner resource loader (this could fail) @@ -139,4 +162,4 @@ public Type FindType(string cname) return @delegate.FindType(cname); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs index cded2217ba..59bf9123c5 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/StopwordAnalyzerBase.cs @@ -26,7 +26,7 @@ namespace Lucene.Net.Analysis.Util */ /// - /// Base class for s that need to make use of stopword sets. + /// Base class for s that need to make use of stopword sets. /// public abstract class StopwordAnalyzerBase : Analyzer { @@ -104,6 +104,35 @@ protected static CharArraySet LoadStopwordSet(bool ignoreCase, Type aClass, stri } } + /// + /// Creates a from a file. + /// + /// + /// the stopwords file name to load + /// + /// + /// the Lucene version for cross version compatibility + /// a containing the distinct stopwords from the given + /// file + /// + /// if loading the stopwords throws an + /// + /// LUCENENET: This overload takes a string file name to avoid allocating a object. + /// + protected static CharArraySet LoadStopwordSet(string stopwordsFileName, LuceneVersion matchVersion) + { + TextReader reader = null; + try + { + reader = IOUtils.GetDecodingReader(stopwordsFileName, Encoding.UTF8); + return WordlistLoader.GetWordSet(reader, matchVersion); + } + finally + { + IOUtils.Dispose(reader); + } + } + /// /// Creates a from a file. /// @@ -154,4 +183,4 @@ protected static CharArraySet LoadStopwordSet(TextReader stopwords, LuceneVersio } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Analysis.Kuromoji/Dict/BinaryDictionary.cs b/src/Lucene.Net.Analysis.Kuromoji/Dict/BinaryDictionary.cs index 13c530afd6..232ef4a65b 100644 --- a/src/Lucene.Net.Analysis.Kuromoji/Dict/BinaryDictionary.cs +++ b/src/Lucene.Net.Analysis.Kuromoji/Dict/BinaryDictionary.cs @@ -7,6 +7,7 @@ using System; using System.IO; using System.Security; +using Directory = System.IO.Directory; namespace Lucene.Net.Analysis.Ja.Dict { @@ -67,21 +68,21 @@ private static string LoadDataDir() // variable. If it is null or empty after this process, we need to // load the embedded files. string candidatePath = System.IO.Path.Combine(currentPath, DATA_SUBDIR); - if (System.IO.Directory.Exists(candidatePath)) + if (Directory.Exists(candidatePath)) { return candidatePath; } - while (new DirectoryInfo(currentPath).Parent != null) + while (Directory.GetParent(currentPath) is { } parent) // LUCENENET: Reduce DirectoryInfo allocations by only getting parent once per iteration (#832) { try { - candidatePath = System.IO.Path.Combine(new DirectoryInfo(currentPath).Parent.FullName, DATA_SUBDIR); - if (System.IO.Directory.Exists(candidatePath)) + candidatePath = System.IO.Path.Combine(parent.FullName, DATA_SUBDIR); + if (Directory.Exists(candidatePath)) { return candidatePath; } - currentPath = new DirectoryInfo(currentPath).Parent.FullName; + currentPath = parent.FullName; } catch (SecurityException) { diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryBuilder.cs b/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryBuilder.cs index 3ba3ac3a5b..ba216c888e 100644 --- a/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryBuilder.cs +++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/TokenInfoDictionaryBuilder.cs @@ -53,9 +53,10 @@ public TokenInfoDictionaryBuilder(DictionaryBuilder.DictionaryFormat format, str public virtual TokenInfoDictionaryWriter Build(string dirname) { JCG.List csvFiles = new JCG.List(); - foreach (FileInfo file in new DirectoryInfo(dirname).EnumerateFiles("*.csv")) + // LUCENENET specific: changed to use string file names instead of allocating a FileInfo (#832) + foreach (string file in Directory.EnumerateFiles(dirname, "*.csv")) { - csvFiles.Add(file.FullName); + csvFiles.Add(file); } csvFiles.Sort(StringComparer.Ordinal); return BuildDictionary(csvFiles); @@ -159,10 +160,10 @@ public virtual TokenInfoDictionaryWriter BuildDictionary(IList csvFiles) return dictionary; } - + /// /// IPADIC features - /// + /// /// 0 - surface /// 1 - left cost /// 2 - right cost @@ -171,9 +172,9 @@ public virtual TokenInfoDictionaryWriter BuildDictionary(IList csvFiles) /// 10 - base form /// 11 - reading /// 12 - pronounciation - /// + /// /// UniDic features - /// + /// /// 0 - surface /// 1 - left cost /// 2 - right cost diff --git a/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs b/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs index 70c85a4400..033c5d0015 100644 --- a/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs +++ b/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs @@ -34,7 +34,7 @@ namespace Lucene.Net.Analysis.Cn.Smart /// To place the files in an alternate location, set an environment variable named "smartcn.data.dir" /// with the name of the directory the "bigramdict.dct" and "coredict.dct" files can be located within. /// - /// The default "bigramdict.dct" and "coredict.dct" files can be found at: + /// The default "bigramdict.dct" and "coredict.dct" files can be found at: /// https://issues.apache.org/jira/browse/LUCENE-1629. /// /// @lucene.experimental @@ -51,7 +51,7 @@ static AnalyzerProfile() Init(); } - // LUCENENET specific - changed the logic here to leave the + // LUCENENET specific - changed the logic here to leave the // ANALYSIS_DATA_DIR an empty string if it is not found. This // allows us to skip loading files from disk if there are no files // to load (and fixes LUCENE-1817 that prevents the on-disk files @@ -79,19 +79,18 @@ private static void Init() ANALYSIS_DATA_DIR = candidatePath; return; } - try { - while (new DirectoryInfo(currentPath).Parent != null) + while (Directory.GetParent(currentPath) is { } parent) // LUCENENET: Reduce DirectoryInfo allocations by only getting parent once per iteration (#832) { - candidatePath = System.IO.Path.Combine(new DirectoryInfo(currentPath).Parent.FullName, dirName); + candidatePath = System.IO.Path.Combine(parent.FullName, dirName); if (Directory.Exists(candidatePath)) { ANALYSIS_DATA_DIR = candidatePath; return; } - currentPath = new DirectoryInfo(currentPath).Parent.FullName; + currentPath = parent.FullName; } } catch (SecurityException) diff --git a/src/Lucene.Net.Analysis.SmartCn/Hhmm/BigramDictionary.cs b/src/Lucene.Net.Analysis.SmartCn/Hhmm/BigramDictionary.cs index b9d16273ae..64f90bc8a1 100644 --- a/src/Lucene.Net.Analysis.SmartCn/Hhmm/BigramDictionary.cs +++ b/src/Lucene.Net.Analysis.SmartCn/Hhmm/BigramDictionary.cs @@ -93,8 +93,8 @@ private bool LoadFromObj(FileInfo serialObj) { try { - using (Stream input = new FileStream(serialObj.FullName, FileMode.Open, FileAccess.Read)) - LoadFromInputStream(input); + using Stream input = new FileStream(serialObj.FullName, FileMode.Open, FileAccess.Read); + LoadFromInputStream(input); return true; } catch (Exception e) when (e.IsException()) diff --git a/src/Lucene.Net.Benchmark/ByTask/Benchmark.cs b/src/Lucene.Net.Benchmark/ByTask/Benchmark.cs index 46223a2ade..5c08260b8d 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Benchmark.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Benchmark.cs @@ -129,14 +129,14 @@ public static void Exec(string[] args) } // verify input files - FileInfo algFile = new FileInfo(args[0]); - if (!algFile.Exists /*|| !algFile.isFile() ||!algFile.canRead()*/ ) + string algFile = args[0]; // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + if (!File.Exists(algFile) /*|| !algFile.isFile() ||!algFile.canRead()*/ ) { - Console.WriteLine("cannot find/read algorithm file: " + algFile.FullName); + Console.WriteLine("cannot find/read algorithm file: " + algFile); Environment.Exit(1); } - Console.WriteLine("Running algorithm from: " + algFile.FullName); + Console.WriteLine("Running algorithm from: " + algFile); Benchmark benchmark = null; try diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/EnwikiContentSource.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/EnwikiContentSource.cs index fb10a7bcd3..fe963aeca3 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Feeds/EnwikiContentSource.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/EnwikiContentSource.cs @@ -225,7 +225,7 @@ public void Run() { Stream localFileIS = outerInstance.@is; if (localFileIS != null) - { // null means fileIS was closed on us + { // null means fileIS was closed on us try { // To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader. @@ -374,7 +374,7 @@ private static int GetElementType(string elem) return ELEMENTS.TryGetValue(elem, out int val) ? val : -1; } - private FileInfo file; + private string file; // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) private bool keepImages = true; private Stream @is; private readonly Parser parser; @@ -430,7 +430,8 @@ public override void SetConfig(Config config) string fileName = config.Get("docs.file", null); if (fileName != null) { - file = new FileInfo(fileName); + // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + file = fileName; } } } diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/FileBasedQueryMaker.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/FileBasedQueryMaker.cs index bbbdf3a671..247ae36f45 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Feeds/FileBasedQueryMaker.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/FileBasedQueryMaker.cs @@ -65,12 +65,13 @@ protected override Query[] PrepareQueries() string fileName = m_config.Get("file.query.maker.file", null); if (fileName != null) { - FileInfo file = new FileInfo(fileName); + // LUCENENET: not used, preferring fileName overloads: FileInfo file = new FileInfo(fileName); TextReader reader = null; // note: we use a decoding reader, so if your queries are screwed up you know - if (file.Exists) + // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + if (File.Exists(fileName)) { - reader = IOUtils.GetDecodingReader(file, Encoding.UTF8); + reader = IOUtils.GetDecodingReader(fileName, Encoding.UTF8); } else { diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/LineDocSource.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/LineDocSource.cs index 6f15b1f470..973d9aa349 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Feeds/LineDocSource.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/LineDocSource.cs @@ -43,7 +43,7 @@ public class LineDocSource : ContentSource { // LUCENENET specific - de-nested LineParser, SimpleLineParser, HeaderLineParser - private FileInfo file; + private string file; // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) private TextReader reader; private int readCount; @@ -153,7 +153,7 @@ private LineParser CreateDocDataLineReader(string line) } } - // if this the simple case, + // if this the simple case, if (Arrays.Equals(header, WriteLineDocTask.DEFAULT_FIELDS)) { return new SimpleLineParser(header); @@ -171,11 +171,10 @@ public override void SetConfig(Config config) { base.SetConfig(config); string fileName = config.Get("docs.file", null); - if (fileName is null) - { - throw new ArgumentException("docs.file must be set"); - } - file = new FileInfo(fileName); + + // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + file = fileName ?? throw new ArgumentException("docs.file must be set"); + if (m_encoding is null) { m_encoding = Encoding.UTF8; @@ -189,7 +188,7 @@ public abstract class LineParser protected readonly string[] m_header; /// - /// Construct with the header + /// Construct with the header /// /// header line found in the input file, or null if none. protected LineParser(string[] header) // LUCENENET: CA1012: Abstract types should not have constructors (marked protected) @@ -205,8 +204,8 @@ protected LineParser(string[] header) // LUCENENET: CA1012: Abstract types shoul /// /// which ignores the header passed to its constructor - /// and assumes simply that field names and their order are the same - /// as in . + /// and assumes simply that field names and their order are the same + /// as in . /// public class SimpleLineParser : LineParser { @@ -243,11 +242,11 @@ public override void ParseLine(DocData docData, string line) } /// - /// which sets field names and order by + /// which sets field names and order by /// the header - any header - of the lines file. /// It is less efficient than but more powerful. /// - public class HeaderLineParser : LineParser + public class HeaderLineParser : LineParser { private enum FieldName { NAME, TITLE, DATE, BODY, PROP } private readonly FieldName[] posToF; diff --git a/src/Lucene.Net.Benchmark/ByTask/PerfRunData.cs b/src/Lucene.Net.Benchmark/ByTask/PerfRunData.cs index a06949ccec..da4ba5a0fc 100644 --- a/src/Lucene.Net.Benchmark/ByTask/PerfRunData.cs +++ b/src/Lucene.Net.Benchmark/ByTask/PerfRunData.cs @@ -103,7 +103,7 @@ public PerfRunData(Config config) : this( config, performReinit: true, logQueries: string.Equals(config?.Get("log.queries", "false") ?? "false", "true", StringComparison.OrdinalIgnoreCase)) - { + { } // LUCENENET specific - added performReinit parameter to allow subclasses to skip reinit @@ -114,7 +114,7 @@ public PerfRunData(Config config) : this( protected PerfRunData(Config config, bool performReinit, bool logQueries) { this.config = config ?? throw new ArgumentNullException(nameof(config)); - + // analyzer (default is standard analyzer) analyzer = NewAnalyzerTask.CreateAnalyzer(config.Get("analyzer", typeof(Lucene.Net.Analysis.Standard.StandardAnalyzer).AssemblyQualifiedName)); @@ -178,7 +178,7 @@ protected virtual void Dispose(bool disposing) } } - // clean old stuff, reopen + // clean old stuff, reopen public virtual void Reinit(bool eraseIndex) { // cleanup index @@ -210,8 +210,8 @@ protected virtual Store.Directory CreateDirectory(bool eraseIndex, string dirNam { if ("FSDirectory".Equals(config.Get(dirParam, "RAMDirectory"), StringComparison.Ordinal)) { - DirectoryInfo workDir = new DirectoryInfo(config.Get("work.dir", "work")); - DirectoryInfo indexDir = new DirectoryInfo(System.IO.Path.Combine(workDir.FullName, dirName)); + string workDir = config.Get("work.dir", "work"); // LUCENENET specific: changed to use string directory name instead of allocating a DirectoryInfo (#832) + DirectoryInfo indexDir = new DirectoryInfo(Path.Combine(workDir, dirName)); if (eraseIndex && indexDir.Exists) { FileUtils.FullyDelete(indexDir); @@ -290,9 +290,9 @@ public virtual Store.Directory Directory /// /// Set the taxonomy reader. Takes ownership of that taxonomy reader, that is, - /// internally performs taxoReader.IncRef() (If caller no longer needs that - /// reader it should DecRef()/Dispose() it after calling this method, otherwise, - /// the reader will remain open). + /// internally performs taxoReader.IncRef() (If caller no longer needs that + /// reader it should DecRef()/Dispose() it after calling this method, otherwise, + /// the reader will remain open). /// /// The taxonomy reader to set. public virtual void SetTaxonomyReader(TaxonomyReader taxoReader) @@ -399,9 +399,9 @@ public virtual IndexSearcher GetIndexSearcher() /// /// Set the index reader. Takes ownership of that index reader, that is, - /// internally performs indexReader.incRef() (If caller no longer needs that + /// internally performs indexReader.incRef() (If caller no longer needs that /// reader it should decRef()/close() it after calling this method, otherwise, - /// the reader will remain open). + /// the reader will remain open). /// /// The indexReader to set. public virtual void SetIndexReader(DirectoryReader indexReader) diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/AnalyzerFactoryTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/AnalyzerFactoryTask.cs index b950a49030..ec1ebecacd 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Tasks/AnalyzerFactoryTask.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/AnalyzerFactoryTask.cs @@ -487,7 +487,7 @@ private void CreateAnalysisPipelineComponent(StreamTokenizer stok, Type clazz) } if (instance is IResourceLoaderAware resourceLoaderAware) { - DirectoryInfo baseDir = new DirectoryInfo(RunData.Config.Get("work.dir", "work")); + string baseDir = RunData.Config.Get("work.dir", "work"); // LUCENENET specific: changed to use string directory name instead of allocating a DirectoryInfo (#832) resourceLoaderAware.Inform(new FilesystemResourceLoader(baseDir)); } if (typeof(CharFilterFactory).IsAssignableFrom(clazz)) diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs index 6fd4cba208..90bd6194df 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/CreateIndexTask.cs @@ -214,8 +214,8 @@ public static IndexWriter ConfigureWriter(Config config, PerfRunData runData, Op } else { - FileInfo f = new FileInfo(infoStreamVal); - iwc.SetInfoStream(new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.GetEncoding(0))); + // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + iwc.SetInfoStream(new StreamWriter(new FileStream(infoStreamVal, FileMode.Create, FileAccess.Write), Encoding.GetEncoding(0))); } } IndexWriter writer = new IndexWriter(runData.Directory, iwc); diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs index 7e61359b05..7a44d56b5d 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs @@ -26,9 +26,9 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks */ /// - /// A which for Wikipedia input, will write category pages + /// A which for Wikipedia input, will write category pages /// to another file, while remaining pages will be written to the original file. - /// The categories file is derived from the original file, by adding a prefix "categories-". + /// The categories file is derived from the original file, by adding a prefix "categories-". /// public class WriteEnwikiLineDocTask : WriteLineDocTask { @@ -37,17 +37,26 @@ public class WriteEnwikiLineDocTask : WriteLineDocTask public WriteEnwikiLineDocTask(PerfRunData runData) : base(runData) { - Stream @out = StreamUtils.GetOutputStream(CategoriesLineFile(new FileInfo(m_fname))); + Stream @out = StreamUtils.GetOutputStream(CategoriesLineFile(m_fname)); // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) categoryLineFileOut = new StreamWriter(@out, Encoding.UTF8); WriteHeader(categoryLineFileOut); } + /// Compose categories line file out of original line file + // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + public static string CategoriesLineFile(string fileName) + { + string dir = Path.GetDirectoryName(fileName); + string categoriesName = "categories-" + Path.GetFileName(fileName); + return dir is null ? categoriesName : Path.Combine(dir, categoriesName); + } + /// Compose categories line file out of original line file public static FileInfo CategoriesLineFile(FileInfo f) { DirectoryInfo dir = f.Directory; string categoriesName = "categories-" + f.Name; - return dir is null ? new FileInfo(categoriesName) : new FileInfo(System.IO.Path.Combine(dir.FullName, categoriesName)); + return dir is null ? new FileInfo(categoriesName) : new FileInfo(Path.Combine(dir.FullName, categoriesName)); } protected override void Dispose(bool disposing) diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs index d724cfc516..7584c04692 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs @@ -49,8 +49,8 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks /// line.file.outthe name of the file to write the output to. That parameter is mandatory. NOTE: the file is re-created. /// line.fieldswhich fields should be written in each line. (optional, default: ). /// sufficient.fields - /// list of field names, separated by comma, which, - /// if all of them are missing, the document will be skipped. For example, to require + /// list of field names, separated by comma, which, + /// if all of them are missing, the document will be skipped. For example, to require /// that at least one of f1,f2 is not empty, specify: "f1,f2" in this field. To specify /// that no field is required, i.e. that even empty docs should be emitted, specify "," /// (optional, default: ). @@ -111,11 +111,11 @@ public WriteLineDocTask(PerfRunData runData, bool performWriteHeader) { throw new ArgumentException("line.file.out must be set"); } - Stream @out = StreamUtils.GetOutputStream(new FileInfo(m_fname)); + Stream @out = StreamUtils.GetOutputStream(m_fname); // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) m_lineFileOut = new StreamWriter(@out, Encoding.UTF8); docMaker = runData.DocMaker; - // init fields + // init fields string f2r = config.Get("line.fields", null); if (f2r is null) { diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs index 96bbf5c235..75e12343ad 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Utils/FileUtils.cs @@ -27,19 +27,31 @@ public static class FileUtils // LUCENENET specific: CA1052 Static holder types /// /// Delete files and directories, even if non-empty. /// - /// File or directory. + /// Directory to delete. /// true on success, false if no or part of files have been deleted. /// If there is a low-level I/O error. - public static bool FullyDelete(DirectoryInfo dir) + public static bool FullyDelete(DirectoryInfo dir) + => FullyDelete(dir.FullName); + + /// + /// Delete files and directories, even if non-empty. + /// + /// Directory path to delete. + /// true on success, false if no or part of files have been deleted. + /// If there is a low-level I/O error. + /// + /// LUCENENET: This overload takes a string to avoid allocating a object. + /// + public static bool FullyDelete(string dirName) { try { - Directory.Delete(dir.FullName, true); + Directory.Delete(dirName, true); return true; } catch { - return !Directory.Exists(dir.FullName); + return !Directory.Exists(dirName); } } } diff --git a/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs b/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs index 9aa90a9ac7..d932a405ad 100644 --- a/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs +++ b/src/Lucene.Net.Benchmark/ByTask/Utils/StreamUtils.cs @@ -47,38 +47,59 @@ public static class StreamUtils // LUCENENET specific: CA1052 Static holder type /// based on the file name (e.g., if it ends with .bz2 or .bzip, return a /// 'bzip' ). /// - public static Stream GetInputStream(FileInfo file) + /// + /// LUCENENET: This overload takes a string file name to avoid allocating a object. + /// + public static Stream GetInputStream(string fileName) { // First, create a FileInputStream, as this will be required by all types. // Wrap with BufferedInputStream for better performance - Stream @in = new FileStream(file.FullName, FileMode.Open, FileAccess.Read); - return GetFileType(file).GetInputStream(@in); + Stream @in = new FileStream(fileName, FileMode.Open, FileAccess.Read); + return GetFileType(fileName).GetInputStream(@in); } + /// + /// Returns an over the requested file. This method + /// attempts to identify the appropriate instance to return + /// based on the file name (e.g., if it ends with .bz2 or .bzip, return a + /// 'bzip' ). + /// + public static Stream GetInputStream(FileInfo file) + => GetInputStream(file.FullName); + /// Return the type of the file, or null if unknown. - private static FileType GetFileType(FileInfo file) + private static FileType GetFileType(string fileName) { FileType? type = null; - string fileName = file.Name; int idx = fileName.LastIndexOf('.'); if (idx != -1) { extensionToType.TryGetValue(fileName.Substring(idx).ToLowerInvariant(), out type); } - return type ?? FileType.PLAIN ; + return type ?? FileType.PLAIN; } /// /// Returns an over the requested file, identifying - /// the appropriate instance similar to . + /// the appropriate instance similar to . /// - public static Stream GetOutputStream(FileInfo file) + /// + /// LUCENENET: This overload takes a string file name to avoid allocating a object. + /// + public static Stream GetOutputStream(string fileName) { // First, create a FileInputStream, as this will be required by all types. // Wrap with BufferedInputStream for better performance - Stream os = new FileStream(file.FullName, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite); - return GetFileType(file).GetOutputStream(os); + Stream os = new FileStream(fileName, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite); + return GetFileType(fileName).GetOutputStream(os); } + + /// + /// Returns an over the requested file, identifying + /// the appropriate instance similar to . + /// + public static Stream GetOutputStream(FileInfo file) + => GetOutputStream(file.FullName); } /// File format type. @@ -109,7 +130,7 @@ public static Stream GetInputStream(this FileType fileType, Stream input) case FileType.BZIP2: return new BZip2InputStream(input); case FileType.GZIP: - return new GZipStream(input, CompressionMode.Decompress); + return new GZipStream(input, CompressionMode.Decompress); default: return input; } diff --git a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs index b75eb55b50..55330c99fc 100644 --- a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs +++ b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs @@ -69,10 +69,11 @@ public static void Main(string[] args) //Environment.Exit(1); } - FileInfo topicsFile = new FileInfo(args[0]); - FileInfo qrelsFile = new FileInfo(args[1]); + // LUCENENET specific: changed to use string file names instead of allocating a FileInfo (#832) + string topicsFile = args[0]; + string qrelsFile = args[1]; SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene"); - using Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3])); + using Store.FSDirectory dir = Store.FSDirectory.Open(args[3]); // LUCENENET specific: changed to use string path instead of allocating a DirectoryInfo (#832) using IndexReader reader = DirectoryReader.Open(dir); string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified. IndexSearcher searcher = new IndexSearcher(reader); diff --git a/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs b/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs index 514bfdcf59..746acbcaee 100644 --- a/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs +++ b/src/Lucene.Net.Misc/Index/CompoundFileExtractor.cs @@ -98,15 +98,18 @@ public static void Main(string[] args) try { - FileInfo file = new FileInfo(filename); - string dirname = file.DirectoryName; - filename = file.Name; + // LUCENENET specific: changed to use string filename instead of allocating a FileInfo (#832) + string dirname = Path.GetDirectoryName(filename) + ?? throw new InvalidOperationException($"Could not determine directory name from filename: {filename}"); + if (dirImpl is null) { - dir = FSDirectory.Open(new DirectoryInfo(dirname)); + dir = FSDirectory.Open(dirname); } else { + // LUCENENET NOTE: We need the DirectoryInfo instance here, as there's no benefit to using a string. + // See comments on CommandLineUtil.NewFSDirectory(string, DirectoryInfo) for more information. (#832) dir = CommandLineUtil.NewFSDirectory(dirImpl, new DirectoryInfo(dirname)); } diff --git a/src/Lucene.Net.Misc/Index/IndexSplitter.cs b/src/Lucene.Net.Misc/Index/IndexSplitter.cs index 26f51ed5f4..37dfca0007 100644 --- a/src/Lucene.Net.Misc/Index/IndexSplitter.cs +++ b/src/Lucene.Net.Misc/Index/IndexSplitter.cs @@ -181,8 +181,9 @@ public virtual void Split(DirectoryInfo destDir, ICollection segs) // LU ICollection files = infoPerCommit.GetFiles(); foreach (string srcName in files) { - FileInfo srcFile = new FileInfo(Path.Combine(dir.FullName, srcName)); - FileInfo destFile = new FileInfo(Path.Combine(destDir.FullName, srcName)); + // LUCENENET specific: changed to use string file names instead of allocating a FileInfo (#832) + string srcFile = Path.Combine(dir.FullName, srcName); + string destFile = Path.Combine(destDir.FullName, srcName); CopyFile(srcFile, destFile); } } @@ -191,10 +192,11 @@ public virtual void Split(DirectoryInfo destDir, ICollection segs) // LU // Console.WriteLine("destDir:"+destDir.getAbsolutePath()); } - private static void CopyFile(FileInfo src, FileInfo dst) + // LUCENENET specific: changed to use string file names instead of allocating a FileInfo (#832) + private static void CopyFile(string src, string dst) { - using Stream @in = new FileStream(src.FullName, FileMode.Open, FileAccess.Read); - using Stream @out = new FileStream(dst.FullName, FileMode.OpenOrCreate, FileAccess.Write); + using Stream @in = new FileStream(src, FileMode.Open, FileAccess.Read); + using Stream @out = new FileStream(dst, FileMode.OpenOrCreate, FileAccess.Write); @in.CopyTo(@out); } } diff --git a/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs b/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs index b743492ff5..eedb708849 100644 --- a/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs +++ b/src/Lucene.Net.Suggest/Spell/PlainTextDictionary.cs @@ -26,7 +26,7 @@ namespace Lucene.Net.Search.Spell /// /// Dictionary represented by a text file. - /// + /// /// Format allowed: 1 word per line: /// word1 /// word2 @@ -34,9 +34,22 @@ namespace Lucene.Net.Search.Spell /// public class PlainTextDictionary : IDictionary { - private readonly TextReader @in; + /// + /// Creates a dictionary based on a File. + /// + /// NOTE: content is treated as UTF-8 + /// + /// + /// + /// LUCENENET: This overload takes a string to avoid having to allocate a object. + /// + public PlainTextDictionary(string fileName) + { + @in = IOUtils.GetDecodingReader(fileName, Encoding.UTF8); + } + /// /// Creates a dictionary based on a File. /// @@ -124,4 +137,4 @@ public bool MoveNext() public IComparer Comparer => null; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs b/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs index 21943db8d6..366aa6772e 100644 --- a/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs +++ b/src/Lucene.Net.Suggest/Suggest/Jaspell/JaspellTernarySearchTrie.cs @@ -1,10 +1,10 @@ // Copyright (c) 2005 Bruno Martins // All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions // are met: -// 1. Redistributions of source code must retain the above copyright +// 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the @@ -12,9 +12,9 @@ // 3. Neither the name of the organization nor the names of its contributors // may be used to endorse or promote products derived from this software // without specific prior written permission. -// +// // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR @@ -46,13 +46,13 @@ namespace Lucene.Net.Search.Suggest.Jaspell /// tree with the speed of a digital search trie, and is therefore ideal for /// practical use in sorting and searching data. /// - /// + /// /// This data structure is faster than hashing for many typical search problems, /// and supports a broader range of useful problems and operations. Ternary /// searches are faster than hashing and more powerful, too. /// /// - /// + /// /// The theory of ternary search trees was described at a symposium in 1997 (see /// "Fast Algorithms for Sorting and Searching Strings," by J.L. Bentley and R. /// Sedgewick, Proceedings of the 8th Annual ACM-SIAM Symposium on Discrete @@ -131,14 +131,14 @@ public long GetSizeInBytes() /// The culture used for lowercasing. /// A negative number, 0 or a positive number if the second char is /// less, equal or greater. - /// + /// private static int CompareCharsAlphabetically(char cCompare2, char cRef, CultureInfo culture) { var textInfo = culture.TextInfo; return textInfo.ToLower(cCompare2) - textInfo.ToLower(cRef); } - /* what follows is the original Jaspell code. + /* what follows is the original Jaspell code. private static int compareCharsAlphabetically(int cCompare2, int cRef) { int cCompare = 0; if (cCompare2 >= 65) { @@ -209,12 +209,11 @@ internal virtual TSTNode Root get => rootNode; } - /// /// Constructs a Ternary Search Trie and loads data from a /// into the Trie. The file is a normal text document, where each line is of /// the form word TAB float. - /// + /// /// Uses the culture of the current thread to lowercase words before comparing. /// /// @@ -222,7 +221,26 @@ internal virtual TSTNode Root /// /// A problem occured while reading the data. public JaspellTernarySearchTrie(FileInfo file) - : this(file, false, CultureInfo.CurrentCulture) + : this(file.FullName, false, CultureInfo.CurrentCulture) + { + } + + /// + /// Constructs a Ternary Search Trie and loads data from a given + /// into the Trie. The file is a normal text document, where each line is of + /// the form word TAB float. + /// + /// Uses the culture of the current thread to lowercase words before comparing. + /// + /// + /// The file name with the data to load into the Trie. + /// + /// A problem occured while reading the data. + /// + /// LUCENENET: This constructor overload takes a string to avoid having to allocate a object. + /// + public JaspellTernarySearchTrie(string fileName) + : this(fileName, false, CultureInfo.CurrentCulture) { } @@ -230,7 +248,7 @@ public JaspellTernarySearchTrie(FileInfo file) /// Constructs a Ternary Search Trie and loads data from a /// into the Trie. The file is a normal text document, where each line is of /// the form word TAB float. - /// + /// /// Uses the supplied culture to lowercase words before comparing. /// /// @@ -239,15 +257,33 @@ public JaspellTernarySearchTrie(FileInfo file) /// /// A problem occured while reading the data. public JaspellTernarySearchTrie(FileInfo file, CultureInfo culture) - : this(file, false, culture) - { - } + : this(file.FullName, false, culture) + { } + + /// + /// Constructs a Ternary Search Trie and loads data from a given + /// into the Trie. The file is a normal text document, where each line is of + /// the form word TAB float. + /// + /// Uses the supplied culture to lowercase words before comparing. + /// + /// + /// The file name with the data to load into the Trie. + /// The culture used for lowercasing. + /// + /// A problem occured while reading the data. + /// + /// LUCENENET: This constructor overload takes a string to avoid having to allocate a object. + /// + public JaspellTernarySearchTrie(string fileName, CultureInfo culture) + : this(fileName, false, culture) + { } /// /// Constructs a Ternary Search Trie and loads data from a /// into the Trie. The file is a normal text document, where each line is of /// the form "word TAB float". - /// + /// /// Uses the culture of the current thread to lowercase words before comparing. /// /// @@ -258,14 +294,35 @@ public JaspellTernarySearchTrie(FileInfo file, CultureInfo culture) /// /// A problem occured while reading the data. public JaspellTernarySearchTrie(FileInfo file, bool compression) - : this(file, compression, CultureInfo.CurrentCulture) + : this(file.FullName, compression, CultureInfo.CurrentCulture) + { } + + /// + /// Constructs a Ternary Search Trie and loads data from a given + /// into the Trie. The file is a normal text document, where each line is of + /// the form "word TAB float". + /// + /// Uses the culture of the current thread to lowercase words before comparing. + /// + /// + /// The file name with the data to load into the Trie. + /// + /// If true, the file is compressed with the GZIP algorithm, and if + /// false, the file is a normal text document. + /// + /// A problem occured while reading the data. + /// + /// LUCENENET: This constructor overload takes a string to avoid having to allocate a object. + /// + public JaspellTernarySearchTrie(string fileName, bool compression) + : this(fileName, compression, CultureInfo.CurrentCulture) { } /// /// Constructs a Ternary Search Trie and loads data from a /// into the Trie. The file is a normal text document, where each line is of /// the form "word TAB float". - /// + /// /// Uses the supplied culture to lowercase words before comparing. /// NOTE for subclasses: this constructor calls a virtual method, which could /// result in your override of it being called before the class is properly initialized. @@ -281,14 +338,45 @@ public JaspellTernarySearchTrie(FileInfo file, bool compression) /// The culture used for lowercasing. /// /// A problem occured while reading the data. + /// + /// LUCENENET: This constructor overload takes a string to avoid having to allocate a object. + /// + public JaspellTernarySearchTrie(FileInfo file, bool compression, CultureInfo culture) + : this(file.FullName, compression, culture) + { + } + + /// + /// Constructs a Ternary Search Trie and loads data from a given + /// into the Trie. The file is a normal text document, where each line is of + /// the form "word TAB float". + /// + /// Uses the supplied culture to lowercase words before comparing. + /// NOTE for subclasses: this constructor calls a virtual method, which could + /// result in your override of it being called before the class is properly initialized. + /// To overcome the issue, you could override + /// constructor and then call the logic in a way that suits your needs. + /// + /// + /// + /// The file name with the data to load into the Trie. + /// + /// If true, the file is compressed with the GZIP algorithm, and if + /// false, the file is a normal text document. + /// The culture used for lowercasing. + /// + /// A problem occured while reading the data. + /// + /// LUCENENET: This constructor overload takes a string to avoid having to allocate a object. + /// [SuppressMessage("CodeQuality", "IDE0079:Remove unnecessary suppression", Justification = "This is a SonarCloud issue")] [SuppressMessage("CodeQuality", "S1699:Constructors should only call non-overridable methods", Justification = "This class gets deprecated and removed in later versions")] - public JaspellTernarySearchTrie(FileInfo file, bool compression, CultureInfo culture) + public JaspellTernarySearchTrie(string fileName, bool compression, CultureInfo culture) : this(culture) { - using TextReader @in = (compression) ? - IOUtils.GetDecodingReader(new GZipStream(new FileStream(file.FullName, FileMode.Open), CompressionMode.Decompress), Encoding.UTF8) : - IOUtils.GetDecodingReader(new FileStream(file.FullName, FileMode.Open), Encoding.UTF8); + using TextReader @in = compression ? + IOUtils.GetDecodingReader(new GZipStream(new FileStream(fileName, FileMode.Open), CompressionMode.Decompress), Encoding.UTF8) : + IOUtils.GetDecodingReader(new FileStream(fileName, FileMode.Open), Encoding.UTF8); string word; int pos; float occur, one = 1f; @@ -379,13 +467,13 @@ private void DeleteNode(TSTNode nodeToDelete) /// /// Recursively visits each node to be deleted. - /// + /// /// To delete a node, first set its data to null, then pass it into this /// method, then pass the node returned by this method into this method (make /// sure you don't delete the data of any of the nodes returned from this /// method!) and continue in this fashion until the node returned by this /// method is null. - /// + /// /// The TSTNode instance returned by this method will be next node to be /// operated on by (This emulates recursive /// method call while avoiding the overhead normally associated with a @@ -686,7 +774,7 @@ protected internal virtual TSTNode GetOrCreateNode(string key) /// If the method is called before the /// property has been called for the first time, /// then diff = 0. - /// + /// /// /// /// @@ -706,7 +794,7 @@ public virtual IList MatchAlmost(string key) /// If the method is called before the /// property has been called for the first time, /// then diff = 0. - /// + /// /// /// /// The target key. @@ -732,7 +820,7 @@ public virtual IList MatchAlmost(string key, int numReturnValues) /// /// The results so far. /// - /// If true all keys having up to and including + /// If true all keys having up to and including /// mismatched letters will be included in the result (including a key /// that is exactly the same as the target string) otherwise keys will /// be included in the result only if they have exactly @@ -906,7 +994,7 @@ public virtual void Remove(string key) /// /// Arguments less than 0 will set the char difference to 0, and arguments /// greater than 3 will set the char difference to 3. - /// + /// /// /// public virtual int MatchAlmostDiff @@ -945,7 +1033,7 @@ public virtual int NumReturnValues /// /// The number of keys returned is limited to numReturnValues. To get a list /// that isn't limited in size, set numReturnValues to -1. - /// + /// /// /// /// @@ -965,7 +1053,7 @@ protected virtual IList SortKeys(TSTNode startNode, int numReturnValues) /// Sorted keys will be appended to the end of the resulting . /// The result may be empty when this method is invoked, but may not be /// null. - /// + /// /// /// /// @@ -1008,4 +1096,4 @@ public virtual long GetSizeInBytes() return mem; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.TestFramework/Support/Configuration/ConfigurationBuilderExtensions.cs b/src/Lucene.Net.TestFramework/Support/Configuration/ConfigurationBuilderExtensions.cs index e4289cb812..7a5e603b2e 100644 --- a/src/Lucene.Net.TestFramework/Support/Configuration/ConfigurationBuilderExtensions.cs +++ b/src/Lucene.Net.TestFramework/Support/Configuration/ConfigurationBuilderExtensions.cs @@ -101,14 +101,14 @@ private static Stack ScanConfigurationFiles(string currentPath, string f try { - while (new DirectoryInfo(currentPath).Parent != null) + while (Directory.GetParent(currentPath) is { } parent) // LUCENENET: Reduce DirectoryInfo allocations by only getting parent once per iteration (#832) { - candidatePath = System.IO.Path.Combine(new DirectoryInfo(currentPath).Parent.FullName, fileName); + candidatePath = Path.Combine(parent.FullName, fileName); if (File.Exists(candidatePath)) { locations.Push(candidatePath); } - currentPath = new DirectoryInfo(currentPath).Parent.FullName; + currentPath = parent.FullName; } } catch (SecurityException) diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTaskTest.cs b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTaskTest.cs index 44c4efbf1f..a50b04c0b5 100644 --- a/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTaskTest.cs +++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTaskTest.cs @@ -4,7 +4,6 @@ using Lucene.Net.Benchmarks.ByTask.Utils; using Lucene.Net.Documents; using NUnit.Framework; -using System; using System.Collections.Generic; using System.IO; using System.Text; @@ -52,37 +51,40 @@ public override Document MakeDocument() } - private PerfRunData createPerfRunData(FileInfo file, String docMakerName) + // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + private PerfRunData createPerfRunData(string fileName, string docMakerName) { Dictionary props = new Dictionary(); props["doc.maker"] = docMakerName; - props["line.file.out"] = file.FullName; + props["line.file.out"] = fileName; props["directory"] = "RAMDirectory"; // no accidental FS dir. Config config = new Config(props); return new PerfRunData(config); } - private void doReadTest(FileInfo file, String expTitle, - String expDate, String expBody) + // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + private void doReadTest(string fileName, string expTitle, + string expDate, string expBody) { - doReadTest(2, file, expTitle, expDate, expBody); - FileInfo categoriesFile = WriteEnwikiLineDocTask.CategoriesLineFile(file); + doReadTest(2, fileName, expTitle, expDate, expBody); + string categoriesFile = WriteEnwikiLineDocTask.CategoriesLineFile(fileName); doReadTest(2, categoriesFile, "Category:" + expTitle, expDate, expBody); } - private void doReadTest(int n, FileInfo file, String expTitle, String expDate, String expBody) + // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + private void doReadTest(int n, string fileName, string expTitle, string expDate, string expBody) { - Stream @in = new FileStream(file.FullName, FileMode.Open, FileAccess.Read); + Stream @in = new FileStream(fileName, FileMode.Open, FileAccess.Read); TextReader br = new StreamReader(@in, Encoding.UTF8); try { - String line = br.ReadLine(); + string line = br.ReadLine(); WriteLineDocTaskTest.assertHeaderLine(line); for (int i = 0; i < n; i++) { line = br.ReadLine(); assertNotNull(line); - String[] parts = line.Split(WriteLineDocTask.SEP).TrimEnd(); + string[] parts = line.Split(WriteLineDocTask.SEP).TrimEnd(); int numExpParts = expBody is null ? 2 : 3; assertEquals(numExpParts, parts.Length); assertEquals(expTitle, parts[0]); @@ -106,11 +108,12 @@ public void TestCategoryLines() // WriteLineDocTask replaced only \t characters w/ a space, since that's its // separator char. However, it didn't replace newline characters, which // resulted in errors in LineDocSource. - FileInfo file = new FileInfo(Path.Combine(getWorkDir().FullName, "two-lines-each.txt")); + // LUCENENET specific: changed to use string fileName instead of allocating a FileInfo (#832) + string file = Path.Combine(getWorkDir().FullName, "two-lines-each.txt"); PerfRunData runData = createPerfRunData(file, typeof(WriteLineCategoryDocMaker).AssemblyQualifiedName); WriteLineDocTask wldt = new WriteEnwikiLineDocTask(runData); for (int i = 0; i < 4; i++) - { // four times so that each file should have 2 lines. + { // four times so that each file should have 2 lines. wldt.DoLogic(); } wldt.Dispose(); diff --git a/src/Lucene.Net/Store/MMapDirectory.cs b/src/Lucene.Net/Store/MMapDirectory.cs index 59e1cccf88..22a5cd8d06 100644 --- a/src/Lucene.Net/Store/MMapDirectory.cs +++ b/src/Lucene.Net/Store/MMapDirectory.cs @@ -183,8 +183,8 @@ public MMapDirectory(string path, LockFactory lockFactory, int maxChunkSize) public override IndexInput OpenInput(string name, IOContext context) { EnsureOpen(); - var file = new FileInfo(Path.Combine(Directory.FullName, name)); - var fc = new FileStream(file.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + var file = Path.Combine(Directory.FullName, name); // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) + var fc = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); return new MMapIndexInput(this, "MMapIndexInput(path=\"" + file + "\")", fc); } diff --git a/src/Lucene.Net/Store/NIOFSDirectory.cs b/src/Lucene.Net/Store/NIOFSDirectory.cs index 684f42b68c..68675162a3 100644 --- a/src/Lucene.Net/Store/NIOFSDirectory.cs +++ b/src/Lucene.Net/Store/NIOFSDirectory.cs @@ -102,27 +102,27 @@ public NIOFSDirectory(string path) public override IndexInput OpenInput(string name, IOContext context) { EnsureOpen(); - var path = new FileInfo(Path.Combine(Directory.FullName, name)); - var fc = new FileStream(path.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete); + var path = Path.Combine(Directory.FullName, name); // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) + var fc = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete); return new NIOFSIndexInput("NIOFSIndexInput(path=\"" + path + "\")", fc, context); } public override IndexInputSlicer CreateSlicer(string name, IOContext context) { EnsureOpen(); - var path = new FileInfo(Path.Combine(Directory.FullName, name)); - var fc = new FileStream(path.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete); + var path = Path.Combine(Directory.FullName, name); // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) + var fc = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite | FileShare.Delete); return new IndexInputSlicerAnonymousClass(context, path, fc); } private sealed class IndexInputSlicerAnonymousClass : IndexInputSlicer { private readonly IOContext context; - private readonly FileInfo path; + private readonly string path; // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) private readonly FileStream descriptor; private int disposed = 0; // LUCENENET specific - allow double-dispose - public IndexInputSlicerAnonymousClass(IOContext context, FileInfo path, FileStream descriptor) + public IndexInputSlicerAnonymousClass(IOContext context, string path, FileStream descriptor) { this.context = context; this.path = path; @@ -141,7 +141,7 @@ protected override void Dispose(bool disposing) public override IndexInput OpenSlice(string sliceDescription, long offset, long length) { - return new NIOFSIndexInput("NIOFSIndexInput(" + sliceDescription + " in path=\"" + path + "\" slice=" + offset + ":" + (offset + length) + ")", descriptor, offset, length, + return new NIOFSIndexInput("NIOFSIndexInput(" + sliceDescription + " in path=\"" + path + "\" slice=" + offset + ":" + (offset + length) + ")", descriptor, offset, length, BufferedIndexInput.GetBufferSize(context)); } @@ -288,4 +288,4 @@ protected override void SeekInternal(long pos) } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net/Store/SimpleFSDirectory.cs b/src/Lucene.Net/Store/SimpleFSDirectory.cs index 923ca685bc..ff83605ce1 100644 --- a/src/Lucene.Net/Store/SimpleFSDirectory.cs +++ b/src/Lucene.Net/Store/SimpleFSDirectory.cs @@ -98,27 +98,27 @@ public SimpleFSDirectory(string path) public override IndexInput OpenInput(string name, IOContext context) { EnsureOpen(); - var path = new FileInfo(Path.Combine(Directory.FullName, name)); - var raf = new FileStream(path.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); - return new SimpleFSIndexInput("SimpleFSIndexInput(path=\"" + path.FullName + "\")", raf, context); + var path = Path.Combine(Directory.FullName, name); // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) + var raf = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + return new SimpleFSIndexInput("SimpleFSIndexInput(path=\"" + path + "\")", raf, context); } public override IndexInputSlicer CreateSlicer(string name, IOContext context) { EnsureOpen(); - var file = new FileInfo(Path.Combine(Directory.FullName, name)); - var descriptor = new FileStream(file.FullName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + var file = Path.Combine(Directory.FullName, name); // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) + var descriptor = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); return new IndexInputSlicerAnonymousClass(context, file, descriptor); } private sealed class IndexInputSlicerAnonymousClass : IndexInputSlicer { private readonly IOContext context; - private readonly FileInfo file; + private readonly string file; // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) private readonly FileStream descriptor; private int disposed = 0; // LUCENENET specific - allow double-dispose - public IndexInputSlicerAnonymousClass(IOContext context, FileInfo file, FileStream descriptor) + public IndexInputSlicerAnonymousClass(IOContext context, string file, FileStream descriptor) { this.context = context; this.file = file; @@ -137,7 +137,7 @@ protected override void Dispose(bool disposing) public override IndexInput OpenSlice(string sliceDescription, long offset, long length) { - return new SimpleFSIndexInput("SimpleFSIndexInput(" + sliceDescription + " in path=\"" + file.FullName + "\" slice=" + offset + ":" + (offset + length) + ")", descriptor, offset, length, BufferedIndexInput.GetBufferSize(context)); + return new SimpleFSIndexInput("SimpleFSIndexInput(" + sliceDescription + " in path=\"" + file + "\" slice=" + offset + ":" + (offset + length) + ")", descriptor, offset, length, BufferedIndexInput.GetBufferSize(context)); } [Obsolete("Only for reading CFS files from 3.x indexes.")] @@ -277,4 +277,4 @@ protected override void SeekInternal(long position) public virtual bool IsFDValid => m_file != null; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net/Store/SimpleFSLockFactory.cs b/src/Lucene.Net/Store/SimpleFSLockFactory.cs index 29733cd43b..3008a3c9d7 100644 --- a/src/Lucene.Net/Store/SimpleFSLockFactory.cs +++ b/src/Lucene.Net/Store/SimpleFSLockFactory.cs @@ -22,8 +22,8 @@ namespace Lucene.Net.Store */ /// - /// Implements using - /// + /// Implements using + /// /// (writes the file with UTF8 encoding and no byte order mark). /// /// Special care needs to be taken if you change the locking @@ -36,7 +36,7 @@ namespace Lucene.Net.Store /// /// If you suspect that this or any other is /// not working properly in your environment, you can easily - /// test it by using , + /// test it by using , /// and . /// /// @@ -87,14 +87,14 @@ public override void ClearLock(string lockName) { lockName = m_lockPrefix + "-" + lockName; } - FileInfo lockFile = new FileInfo(Path.Combine(m_lockDir.FullName, lockName)); + string lockFile = Path.Combine(m_lockDir.FullName, lockName); // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) try { - lockFile.Delete(); + File.Delete(lockFile); } catch (Exception e) { - if (lockFile.Exists) // Delete failed and lockFile exists + if (File.Exists(lockFile)) // Delete failed and lockFile exists throw new IOException("Cannot delete " + lockFile, e); // LUCENENET specific: wrapped inner exception } } @@ -103,13 +103,13 @@ public override void ClearLock(string lockName) internal class SimpleFSLock : Lock { - internal FileInfo lockFile; + internal string lockFile; // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832) internal DirectoryInfo lockDir; public SimpleFSLock(DirectoryInfo lockDir, string lockFileName) { this.lockDir = lockDir; - lockFile = new FileInfo(Path.Combine(lockDir.FullName, lockFileName)); + lockFile = Path.Combine(lockDir.FullName, lockFileName); } public override bool Obtain() @@ -134,16 +134,16 @@ public override bool Obtain() // LUCENENET: Since WriteAllText doesn't care if the file exists or not, // we need to make that check first. We create a new IOException "failure reason" // in this case to simulate what happens in Java - if (File.Exists(lockFile.FullName)) + if (File.Exists(lockFile)) { - FailureReason = new IOException(string.Format("lockFile '{0}' alredy exists.", lockFile.FullName)); + FailureReason = new IOException($"lockFile '{lockFile}' already exists."); return false; } try { // Create the file, and close it immediately - File.WriteAllText(lockFile.FullName, string.Empty, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false) /* No BOM */); + File.WriteAllText(lockFile, string.Empty, new UTF8Encoding(encoderShouldEmitUTF8Identifier: false) /* No BOM */); return true; } catch (Exception e) // LUCENENET: Some of the exceptions that can happen are not IOException, so we catch everything @@ -161,12 +161,12 @@ protected override void Dispose(bool disposing) { if (disposing) { - if (File.Exists(lockFile.FullName)) + if (File.Exists(lockFile)) { - File.Delete(lockFile.FullName); + File.Delete(lockFile); // If lockFile still exists, delete failed - if (File.Exists(lockFile.FullName)) + if (File.Exists(lockFile)) { throw new LockReleaseFailedException("failed to delete " + lockFile); } @@ -176,7 +176,7 @@ protected override void Dispose(bool disposing) public override bool IsLocked() { - return File.Exists(lockFile.FullName); + return File.Exists(lockFile); } public override string ToString() @@ -184,4 +184,4 @@ public override string ToString() return "SimpleFSLock@" + lockFile; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net/Util/CommandLineUtil.cs b/src/Lucene.Net/Util/CommandLineUtil.cs index 2b844a7fc2..428d309a91 100644 --- a/src/Lucene.Net/Util/CommandLineUtil.cs +++ b/src/Lucene.Net/Util/CommandLineUtil.cs @@ -35,6 +35,8 @@ public static class CommandLineUtil // LUCENENET specific - made static /// The name of the class to load. /// The to be used as parameter constructor. /// The new instance + // LUCENENET NOTE: We do not benefit from creating a string overload here to avoid DirectoryInfo allocations, + // because the FSDirectory implementations that take string just convert it to a DirectoryInfo anyway. (#832) public static FSDirectory NewFSDirectory(string clazzName, DirectoryInfo dir) { try @@ -43,21 +45,21 @@ public static FSDirectory NewFSDirectory(string clazzName, DirectoryInfo dir) // LUCENENET: In .NET, we get a null when the class is not found, so we need to throw here for compatibility if (clazz is null) - throw new ArgumentException(typeof(FSDirectory).Name + " implementation not found: " + clazzName); + throw new ArgumentException(nameof(FSDirectory) + " implementation not found: " + clazzName); return NewFSDirectory(clazz, dir); } catch (Exception e) when (e.IsClassNotFoundException()) { - throw new ArgumentException(typeof(FSDirectory).Name + " implementation not found: " + clazzName, e); + throw new ArgumentException(nameof(FSDirectory) + " implementation not found: " + clazzName, e); } catch (Exception e) when (e.IsClassCastException()) { - throw new ArgumentException(clazzName + " is not a " + typeof(FSDirectory).Name + " implementation", e); + throw new ArgumentException(clazzName + " is not a " + nameof(FSDirectory) + " implementation", e); } catch (Exception e) when (e.IsNoSuchMethodException()) { - throw new ArgumentException(clazzName + " constructor with " + typeof(FileInfo).Name + " as parameter not found", e); + throw new ArgumentException(clazzName + " constructor with " + nameof(FileInfo) + " as parameter not found", e); } catch (Exception e) { @@ -91,7 +93,7 @@ private static string AdjustDirectoryClassName(string clazzName) { if (clazzName is null || clazzName.Trim().Length == 0) { - throw new ArgumentException("The " + typeof(FSDirectory).Name + " implementation cannot be null or empty"); + throw new ArgumentException("The " + nameof(FSDirectory) + " implementation cannot be null or empty"); } // LUCENENET specific: Changed to use char rather than string so we get StringComparison.Ordinal, @@ -114,6 +116,8 @@ private static string AdjustDirectoryClassName(string clazzName) /// If the class is abstract or an interface. /// If the constructor does not have public visibility. /// If the constructor throws an exception + // LUCENENET NOTE: We do not benefit from creating a string overload here to avoid DirectoryInfo allocations, + // because the FSDirectory implementations that take string just convert it to a DirectoryInfo anyway. (#832) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static FSDirectory NewFSDirectory(Type clazz, DirectoryInfo dir) { @@ -122,4 +126,4 @@ public static FSDirectory NewFSDirectory(Type clazz, DirectoryInfo dir) return (FSDirectory)Activator.CreateInstance(clazz, dir); } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net/Util/Fst/FST.cs b/src/Lucene.Net/Util/Fst/FST.cs index c10f55aefa..bf9fa9299f 100644 --- a/src/Lucene.Net/Util/Fst/FST.cs +++ b/src/Lucene.Net/Util/Fst/FST.cs @@ -567,10 +567,13 @@ public void Save(DataOutput @out) /// /// Writes an automaton to a file. /// - public void Save(FileInfo file) + /// + /// LUCENENET: This overload takes a string file name to avoid allocating a object. + /// + public void Save(string fileName) { bool success = false; - var bs = file.OpenWrite(); + var bs = new FileStream(fileName, FileMode.OpenOrCreate, FileAccess.Write, FileShare.None); try { Save(new OutputStreamDataOutput(bs)); @@ -589,6 +592,13 @@ public void Save(FileInfo file) } } + /// + /// Writes an automaton to a file. + /// + /// + public void Save(FileInfo file) + => Save(file.FullName); + // LUCENENET NOTE: static Read() was moved into the FST class [MethodImpl(MethodImplOptions.AggressiveInlining)] private void WriteLabel(DataOutput @out, int v) diff --git a/src/Lucene.Net/Util/IOUtils.cs b/src/Lucene.Net/Util/IOUtils.cs index dc17cdbdd9..7dc63f65b5 100644 --- a/src/Lucene.Net/Util/IOUtils.cs +++ b/src/Lucene.Net/Util/IOUtils.cs @@ -376,6 +376,37 @@ public static TextReader GetDecodingReader(Stream stream, Encoding charSet) return new StreamReader(stream, charSet); } + /// + /// Opens a for the given using a . + /// Unlike Java's defaults this reader will throw an exception if your it detects + /// the read charset doesn't match the expected . + /// + /// Decoding readers are useful to load configuration files, stopword lists or synonym files + /// to detect character set problems. However, its not recommended to use as a common purpose + /// reader. + /// The file name to open a reader on + /// The expected charset + /// A reader to read the given file + public static TextReader GetDecodingReader(string fileName, Encoding charSet) + { + FileStream stream = null; + bool success = false; + try + { + stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); + TextReader reader = GetDecodingReader(stream, charSet); + success = true; + return reader; + } + finally + { + if (!success) + { + IOUtils.Dispose(stream); + } + } + } + /// /// Opens a for the given using a . /// Unlike Java's defaults this reader will throw an exception if your it detects