Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

High flush latencies with SWMR in HDF5 Version 1.14.5 (and earlier) #5263

Open
gheber opened this issue Jan 23, 2025 · 0 comments
Open

High flush latencies with SWMR in HDF5 Version 1.14.5 (and earlier) #5263

gheber opened this issue Jan 23, 2025 · 0 comments
Labels
Component - C Library Core C library issues (usually in the src directory) Priority - 1. High 🔼 These are important issues that should be resolved in the next release
Milestone

Comments

@gheber
Copy link
Member

gheber commented Jan 23, 2025

Reporter: Dr. Rodrigo Castro, Spanish National Fusion Laboratory

Description:

I am reporting an issue related to flush latencies observed in HDF5 version 1.14.5 (and earlier). See below for the source code of a reproducer.

Steps to Reproduce:

  • Use the attached source code to test flush performance.
  • Run tests in an environment similar to the one described in the attachment.

Expected Behavior:

Flush operations should complete within expected latency thresholds for typical use cases and grow linearly with the number of datasets.

Actual Behavior:

Significant delays are observed during flush operations, and runtime grows superlinearly with the number of datasets.

Reproducer:

#include "hdf5.h"
#include "hdf5_hl.h"
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <unistd.h>

// Number of tables to create
#define CHUNK_SIZE 300
#define NUM_RECORDS 300

typedef struct
{
	double data;
      	long long timestamp;
} data_t;

int main(int argc, char* argv[]) {
    if (argc < 2) {
        fprintf(stderr, "Usage: %s <number_of_datasets>\n", argv[0]);
        return -1;
    }

    int num_datasets = atoi(argv[1]);
    if (num_datasets <= 0) {
        fprintf(stderr, "Invalid number of datasets: %d\n", num_datasets);
        return -1;
    }

    hid_t fid, sid, dcpl, fapl;
    hid_t* pdsets; // Dynamic array for dataset IDs
    char dname[300];
    hsize_t dims[2] = {1, 0}; // Initial dimensions
    hsize_t max_dims[2] = {1, H5S_UNLIMITED}; // Max dimensions
    hsize_t chunk_dims[2] = {1, CHUNK_SIZE}; // Chunk dimensions
    int i, k;

    // Allocate memory for dataset IDs
    pdsets = (hid_t*)malloc(num_datasets * sizeof(hid_t));
    if (!pdsets) {
        fprintf(stderr, "Memory allocation failed for dataset IDs.\n");
        return -1;
    }

	printf("Creating file\n");
	clock_t tstart,tend;
	tstart=clock();


	if((fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0)
        return -1;
    H5Pset_cache(fapl, 0, 0, 0, 1);

 	// Open file
	fid = H5Fcreate("packet.h5", H5F_ACC_TRUNC | H5F_ACC_SWMR_WRITE, H5P_DEFAULT, fapl);


	//fid = H5Fcreate("packet.h5", H5F_ACC_TRUNC | H5F_ACC_SWMR_WRITE, H5P_DEFAULT, H5P_DEFAULT);
	// Create compound data type
	hid_t datatype = H5Tcreate(H5T_COMPOUND, sizeof(data_t));
	H5Tinsert(datatype, "Data", HOFFSET(data_t, data), H5T_NATIVE_DOUBLE);
	H5Tinsert(datatype, "Timestamp", HOFFSET(data_t, timestamp), H5T_NATIVE_LLONG);

    /* Create dataspace for creating datasets */
    if((sid = H5Screate_simple(2, dims, max_dims)) < 0)
        return 1;

	 /* Create dataset creation property list */
    if((dcpl = H5Pcreate(H5P_DATASET_CREATE)) < 0)
        return -1;
    if(H5Pset_chunk(dcpl, 2, chunk_dims) < 0)
        return -1;
 

	printf("Creating %d datasets\n", num_datasets);
	// Create datasets
	for (i = 0; i < num_datasets; i++) {
		sprintf(dname,"dset_%d",i);
 		if((pdsets[i] = H5Dcreate2(fid, dname, datatype, sid, H5P_DEFAULT, dcpl, H5P_DEFAULT)) < 0)
             return 1;
         //RC if(H5Dclose(pdsets[i]) < 0)
         //    return -1;
	}


	tend=clock();
	printf("File creation time = %lf secs\n", (tend - tstart) / 1e6);

	// --- Writing records ----
	hsize_t start[2] = {0, 0}, count[2] = {1, 1};
	hid_t mem_sid,file_sid;
	data_t records[NUM_RECORDS];


	dims[1]=NUM_RECORDS;
	start[1]=0;
	count[1] = NUM_RECORDS;

	for (k=0;k<1000;k++) {
		printf("Writing datasets...\n", num_datasets);
		tstart=clock();

		for (i = 0; i < num_datasets; i++) {

			sprintf(dname,"dset_%d",i);
			if((pdsets[i] = H5Dopen2(fid, dname, H5P_DEFAULT)) < 0)
	             return 1;
	 
			// --- set memory space -----	
			if((mem_sid = H5Screate_simple(2,count,NULL)) < 0)
				return -1;
			if(H5Sset_extent_simple(mem_sid, 2, count, NULL) < 0)
		             return -1;
		    if(H5Dset_extent(pdsets[i], dims) < 0)
		        return -1;
		    // --- Get the dataset's dataspace ---
		    if((file_sid = H5Dget_space(pdsets[i])) < 0)
		        return -1;
		    // --- Choose the last record in the dataset ---
		    if(H5Sselect_hyperslab(file_sid, H5S_SELECT_SET, start, NULL, count, NULL) < 0)
		        return -1;
		    // --- Write record to the dataset ---
		    if(H5Dwrite(pdsets[i], datatype, mem_sid, file_sid, H5P_DEFAULT, records) < 0)
		         return -1;

		     H5Sclose(file_sid);
		     H5Sclose(mem_sid);

		     //RC if(H5Dclose(pdsets[i]) < 0)
	         //		return -1;

		}
		

		printf("Flusing everything\n");

		H5Fflush(fid,H5F_SCOPE_GLOBAL);
		
		tend=clock();
		printf("Write data time = %lf secs\n", (tend - tstart) / 1e6);

		dims[1] += NUM_RECORDS;
		start[1] += NUM_RECORDS;

	}

	printf("Closing everything\n");

	for (i=0;i<num_datasets;i++) {
     if(H5Dclose(pdsets[i]) < 0)
        return -1;
	}
	if(H5Pclose(dcpl) < 0)
        return -1;
	if(H5Sclose(sid) < 0)
        return -1;
    if(H5Tclose(datatype) < 0)
        return -1;
    if(H5Fclose(fid) < 0)
        return -1;
	printf("After closing...\n");
	
	sleep(50);

   	return 0;
}
@gheber gheber added Component - C Library Core C library issues (usually in the src directory) Priority - 1. High 🔼 These are important issues that should be resolved in the next release labels Jan 23, 2025
@gheber gheber added this to the 2.0.0 milestone Jan 23, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Component - C Library Core C library issues (usually in the src directory) Priority - 1. High 🔼 These are important issues that should be resolved in the next release
Projects
None yet
Development

No branches or pull requests

1 participant