Skip to content

Commit

Permalink
improved performance by having lazing processing of xml metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
«ratal» committed Dec 18, 2023
1 parent 05eadaf commit 813e8c1
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 49 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "mdfr"
version = "0.4.2"
version = "0.4.3"
description = "A package for reading and writing MDF files"
authors = ["ratal <[email protected]>"]
edition = "2021"
Expand Down
12 changes: 7 additions & 5 deletions src/mdfinfo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,11 @@ impl MdfInfo {
cc: HashMap::new(),
si: HashMap::new(),
};

// Read HD block
let (hd, position) = hd4_parser(&mut rdr, &mut sharable)?;
// parse HD metadata
sharable.parse_hd_comments(hd.hd_md_comment);

// FH block
let (fh, position) = parse_fh(&mut rdr, &mut sharable, hd.hd_fh_first, position)?;
Expand All @@ -170,7 +173,6 @@ impl MdfInfo {
// Read DG Block
let (mut dg, _, n_cg, n_cn) =
parse_dg4(&mut rdr, hd.hd_dg_first, position, &mut sharable)?;
sharable.extract_xml()?; // extract TX xml tag from text

// make channel names unique, list channels and create master dictionnary
let channel_names_set = build_channel_db(&mut dg, &sharable, n_cg, n_cn);
Expand Down Expand Up @@ -363,7 +365,7 @@ impl MdfInfo {
}
}
/// get comment from position
pub fn get_comments(&self, position: i64) -> Option<HashMap<String, String>> {
pub fn get_comments(&mut self, position: i64) -> Option<HashMap<String, String>> {
match self {
MdfInfo::V3(_mdfinfo3) => None,
MdfInfo::V4(mdfinfo4) => Some(mdfinfo4.sharable.get_comments(position)),
Expand All @@ -377,7 +379,7 @@ impl MdfInfo {
}
}
/// list attachments
pub fn list_attachments(&self) -> String {
pub fn list_attachments(&mut self) -> String {
match self {
MdfInfo::V3(_) => "".to_string(),
MdfInfo::V4(mdfinfo4) => mdfinfo4.list_attachments(),
Expand Down Expand Up @@ -405,7 +407,7 @@ impl MdfInfo {
}
}
/// list events
pub fn list_events(&self) -> String {
pub fn list_events(&mut self) -> String {
match self {
MdfInfo::V3(_) => "".to_string(),
MdfInfo::V4(mdfinfo4) => mdfinfo4.list_events(),
Expand Down Expand Up @@ -462,7 +464,7 @@ impl fmt::Display for MdfInfo {
writeln!(f, "{}\n", mdfinfo4.hd_block)?;
let comments = &mdfinfo4
.sharable
.get_comments(mdfinfo4.hd_block.hd_md_comment);
.get_hd_comments(mdfinfo4.hd_block.hd_md_comment);
for c in comments.iter() {
writeln!(f, "{} {}", c.0, c.1)?;
}
Expand Down
75 changes: 47 additions & 28 deletions src/mdfinfo/mdfinfo4.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Parsing of file metadata into MdfInfo4 struct
use crate::mdfreader::channel_data::Order;
use crate::mdfreader::{DataSignature, MasterSignature};
use anyhow::{anyhow, Context, Error, Result};
use anyhow::{anyhow, Context, Result};
use arrow2::bitmap::MutableBitmap;
use binrw::{binrw, BinReaderExt, BinWriterExt};
use byteorder::{LittleEndian, ReadBytesExt};
Expand All @@ -26,6 +26,7 @@ use crate::mdfreader::channel_data::{data_type_init, ChannelData};

use super::sym_buf_reader::SymBufReader;

/// ChannelId : (Option<master_channelname>, dg_pos, (cg_pos, rec_id), (cn_pos, rec_pos))
pub(crate) type ChannelId = (Option<String>, i64, (i64, u64), (i64, i32));
pub(crate) type ChannelNamesSet = HashMap<String, ChannelId>;

Expand Down Expand Up @@ -487,7 +488,7 @@ impl MdfInfo4 {
}
}
/// list attachments
pub fn list_attachments(&self) -> String {
pub fn list_attachments(&mut self) -> String {
let mut output = String::new();
for (key, (block, _embedded_data)) in self.at.iter() {
output.push_str(&format!(
Expand Down Expand Up @@ -571,7 +572,7 @@ impl MdfInfo4 {
output
}
/// list events
pub fn list_events(&self) -> String {
pub fn list_events(&mut self) -> String {
let mut output = String::new();
for (key, block) in self.ev.iter() {
output.push_str(&format!(
Expand Down Expand Up @@ -614,7 +615,7 @@ impl fmt::Display for MdfInfo4 {
writeln!(f, "MdfInfo4: {}", self.file_name)?;
writeln!(f, "Version : {}\n", self.id_block.id_ver)?;
writeln!(f, "{}\n", self.hd_block)?;
let comments = &self.sharable.get_comments(self.hd_block.hd_md_comment);
let comments = &self.sharable.get_hd_comments(self.hd_block.hd_md_comment);
for c in comments.iter() {
writeln!(f, "{} {}\n", c.0, c.1)?;
}
Expand Down Expand Up @@ -1667,25 +1668,43 @@ impl SharableBlocks {
}
/// Returns metadata from MD Block
/// keys are tag and related value text of tag
pub fn get_comments(&self, position: i64) -> HashMap<String, String> {
pub fn get_comments(&mut self, position: i64) -> HashMap<String, String> {
let mut comments: HashMap<String, String> = HashMap::new();
if let Some(md) = self.md_tx.get_mut(&position) {
match md.block_type {
MetaDataBlockType::MdParsed => {
comments = md.comments.clone();
}
MetaDataBlockType::MdBlock => {
// not yet parsed, so let's parse it
let _ = md.parse_xml();
comments = md.comments.clone();
}
MetaDataBlockType::TX => {
// should not happen
}
}
};
comments
}
/// Returns metadata from MD Block linked by HD Block
/// keys are tag and related value text of tag
pub fn get_hd_comments(&self, position: i64) -> HashMap<String, String> {
// this method assumes the xml was already parsed
let mut comments: HashMap<String, String> = HashMap::new();
if let Some(md) = self.md_tx.get(&position) {
if let MetaDataBlockType::MdParsed = md.block_type {
if md.block_type == MetaDataBlockType::MdParsed {
comments = md.comments.clone();
}
};
comments
}
/// Parallely extract metadata from raw xml string
pub fn extract_xml(&mut self) -> Result<()> {
self.md_tx
.par_iter_mut()
.filter(|(_k, v)| v.block_type == MetaDataBlockType::MdBlock)
.try_for_each(|(_k, val)| -> Result<(), Error> {
val.parse_xml()?;
Ok(())
})?;
Ok(())
/// parses the HD Block metadata comments
/// done right after reading HD block
pub fn parse_hd_comments(&mut self, position: i64) {
if let Some(md) = self.md_tx.get_mut(&position) {
let _ = md.parse_hd_xml();
};
}
/// Create new Shared Block
pub fn new(n_channels: usize) -> SharableBlocks {
Expand Down Expand Up @@ -3072,12 +3091,12 @@ pub fn build_channel_db(
let mut channel_list: ChannelNamesSet = HashMap::with_capacity(n_cn);
let mut master_channel_list: HashMap<i64, String> = HashMap::with_capacity(n_cg);
// creating channel list for whole file and making channel names unique
for (dg_position, dg) in dg.iter_mut() {
for (record_id, cg) in dg.cg.iter_mut() {
dg.iter_mut().for_each(|(dg_position, dg)| {
dg.cg.iter_mut().for_each(|(record_id, cg)| {
let gn = cg.get_cg_name(sharable);
let gs = cg.get_cg_source_name(sharable);
let gp = cg.get_cg_source_path(sharable);
for (cn_record_position, cn) in cg.cn.iter_mut() {
cg.cn.iter_mut().for_each(|(cn_record_position, cn)| {
if channel_list.contains_key(&cn.unique_name) {
let mut changed: bool = false;
let space_char = String::from(" ");
Expand Down Expand Up @@ -3127,13 +3146,13 @@ pub fn build_channel_db(
// Master channel
master_channel_list.insert(cg.block_position, cn.unique_name.clone());
}
}
}
}
});
});
});
// identifying master channels
let avg_ncn_per_cg = n_cn / n_cg;
for (_dg_position, dg) in dg.iter_mut() {
for (_record_id, cg) in dg.cg.iter_mut() {
dg.iter_mut().for_each(|(_dg_position, dg)| {
dg.cg.iter_mut().for_each(|(_record_id, cg)| {
let mut cg_channel_list: HashSet<String> = HashSet::with_capacity(avg_ncn_per_cg);
let mut master_channel_name: Option<String> = None;
if let Some(name) = master_channel_list.get(&cg.block_position) {
Expand All @@ -3144,17 +3163,17 @@ pub fn build_channel_db(
master_channel_name = Some(name.to_string());
}
}
for (_cn_record_position, cn) in cg.cn.iter_mut() {
cg.cn.iter_mut().for_each(|(_cn_record_position, cn)| {
cg_channel_list.insert(cn.unique_name.clone());
// assigns master in channel_list
if let Some(id) = channel_list.get_mut(&cn.unique_name) {
id.0 = master_channel_name.clone();
}
}
});
cg.channel_names = cg_channel_list;
cg.master_channel_name = master_channel_name;
}
}
});
});
channel_list
}

Expand Down
28 changes: 14 additions & 14 deletions src/mdfr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,12 +275,12 @@ df=polars.DataFrame(series)
mdf.set_channel_desc(channel_name, desc);
}
/// list attachments
pub fn list_attachments(&self) -> PyResult<String> {
pub fn list_attachments(&mut self) -> PyResult<String> {
let Mdfr(mdf) = self;
Ok(mdf.mdf_info.list_attachments())
}
/// get attachment blocks
pub fn get_attachment_blocks(&self) -> Py<PyAny> {
pub fn get_attachment_blocks(&mut self) -> Py<PyAny> {
let Mdfr(mdf) = self;
let atbs = mdf.mdf_info.get_attachement_blocks();
pyo3::Python::with_gil(|py| {
Expand Down Expand Up @@ -319,12 +319,12 @@ df=polars.DataFrame(series)
})
}
/// list events
pub fn list_events(&self) -> PyResult<String> {
pub fn list_events(&mut self) -> PyResult<String> {
let Mdfr(mdf) = self;
Ok(mdf.mdf_info.list_events())
}
/// get event blocks
pub fn get_event_blocks(&self) -> Py<PyAny> {
pub fn get_event_blocks(&mut self) -> Py<PyAny> {
let Mdfr(mdf) = self;
let evbs = mdf.mdf_info.get_event_blocks();
pyo3::Python::with_gil(|py| {
Expand All @@ -349,7 +349,7 @@ df=polars.DataFrame(series)
})
}
/// get file history
pub fn get_file_history_blocks(&self) -> Py<PyAny> {
pub fn get_file_history_blocks(&mut self) -> Py<PyAny> {
let Mdfr(mdf) = self;
let fhbs = mdf.mdf_info.get_file_history_blocks();
pyo3::Python::with_gil(|py| {
Expand Down Expand Up @@ -446,7 +446,7 @@ pyplot.show()
}
fn __repr__(&mut self) -> PyResult<String> {
let mut output: String;
match &self.0.mdf_info {
match &mut self.0.mdf_info {
MdfInfo::V3(mdfinfo3) => {
output = format!("Version : {}\n", mdfinfo3.id_block.id_ver);
writeln!(
Expand Down Expand Up @@ -477,8 +477,8 @@ pyplot.show()
.expect("cannot print thre is no master channel");
}
for channel in list.iter() {
let unit = self.get_channel_unit(channel.to_string());
let desc = self.get_channel_desc(channel.to_string());
let unit = self.get_channel_unit(channel.to_string())?;
let desc = self.get_channel_desc(channel.to_string())?;
write!(output, " {channel} ").expect("cannot print channel name");
if let Some(data) = self.0.get_channel_data(channel) {
if !data.is_empty() {
Expand All @@ -490,10 +490,10 @@ pyplot.show()
}
writeln!(
output,
" {unit:?} {desc:?} "
" {unit} {desc} "
).expect("cannot print channel unit and description with first and last item");
} else {
writeln!(output, " {unit:?} {desc:?} ")
writeln!(output, " {unit} {desc} ")
.expect("cannot print channel unit and description");
}
}
Expand All @@ -518,8 +518,8 @@ pyplot.show()
.expect("cannot print thre is no master channel");
}
for channel in list.iter() {
let unit = self.get_channel_unit(channel.to_string());
let desc = self.get_channel_desc(channel.to_string());
let unit = self.get_channel_unit(channel.to_string())?;
let desc = self.get_channel_desc(channel.to_string())?;
write!(output, " {channel} ").expect("cannot print channel name");
if let Some(data) = self.0.get_channel_data(channel) {
if !data.is_empty() {
Expand All @@ -532,10 +532,10 @@ pyplot.show()
}
writeln!(
output,
" {unit:?} {desc:?} "
" {unit} {desc} "
).expect("cannot print channel unit and description with first and last item");
} else {
writeln!(output, " {unit:?} {desc:?} ")
writeln!(output, " {unit} {desc} ")
.expect("cannot print channel unit and description");
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/mdfreader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ impl fmt::Display for Mdf {
writeln!(f, "{}\n", mdfinfo4.hd_block)?;
let comments = &mdfinfo4
.sharable
.get_comments(mdfinfo4.hd_block.hd_md_comment);
.get_hd_comments(mdfinfo4.hd_block.hd_md_comment);
for c in comments.iter() {
writeln!(f, "{} {}", c.0, c.1)?;
}
Expand Down

0 comments on commit 813e8c1

Please sign in to comment.