-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathpublications.bib
452 lines (421 loc) · 48.8 KB
/
publications.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
@inproceedings{verborgh_ldow_2014,
title = {Web-Scale Querying through {Linked Data Fragments}},
author = {Verborgh, Ruben and Vander Sande, Miel and Colpaert, Pieter and Coppens, Sam and Mannens, Erik and Van de Walle, Rik},
year = 2014,
month = apr,
series = {CEUR Workshop Proceedings},
volume = 1184,
issn = {1613-0073},
booktitle = {Proceedings of the 7th Workshop on Linked Data on the Web},
abstract = {To unlock the full potential of Linked Data sources, we need flexible ways to query them. Public SPARQL endpoints aim to fulfill that need, but their availability is notoriously problematic. We therefore introduce Linked Data Fragments, a publishing method that allows efficient offloading of query execution from servers to clients through a lightweight partitioning strategy. It enables servers to maintain availability rates as high as any regular HTTP server, allowing querying to scale reliably to much larger numbers of clients. This paper explains the core concepts behind Linked Data Fragments and experimentally verifies their Web-level scalability, at the cost of increased query times. We show how trading server-side query execution for inexpensive data resources with relevant affordances enables a new generation of intelligent clients.},
url = {http://ceur-ws.org/Vol-1184/ldow2014_paper_04.pdf},
}
@inproceedings{verborgh_iswc_2014,
author = {Verborgh, Ruben and Hartig, Olaf and De Meester, Ben and Haesendonck, Gerald and De Vocht, Laurens and Vander Sande, Miel and Cyganiak, Richard and Colpaert, Pieter and Mannens, Erik and Van de Walle, Rik},
booktitle = {Proceedings of the 13th International Semantic Web Conference},
title = {Querying Datasets on the {Web} with High Availability},
year = 2014,
month = oct,
volume = 8796,
pages = {180--196},
isbn = {978-3-319-11963-2},
series = {Lecture Notes in Computer Science},
editor = {Mika, Peter and Tudorache, Tania and Bernstein, Abraham and Welty, Chris and Knoblock, Craig and Vrandečić, Denny and Groth, Paul and Noy, Natasha and Janowicz, Krzysztof and Goble, Carole},
publisher = {Springer},
doi = {10.1007/978-3-319-11964-9_12},
abstract = {As the Web of Data is growing at an ever increasing speed, the lack of reliable query solutions for live public data becomes apparent. SPARQL implementations have matured and deliver impressive performance for public SPARQL endpoints, but poor availability—especially under high loads—prevents their use in real-world applications. We propose to tackle this availability problem with basic Linked Data Fragments, a concept and related techniques to publish and consume queryable data by moving intelligence from the server to the client. This paper formalizes the concept, introduces a client-side query processing algorithm using a dynamic iterator pipeline, and verifies its availability under load. The results indicate that, at the cost of lower performance, query techniques with basic Linked Data Fragments lead to high availability, thereby allowing for reliable applications on top of public, queryable Linked Data.},
url = {http://linkeddatafragments.org/publications/iswc2014.pdf},
}
@inproceedings{verborgh_iswc_demo_2014,
author = {Verborgh, Ruben and Hartig, Olaf and De Meester, Ben and Haesendonck, Gerald and De Vocht, Laurens and Vander Sande, Miel and Cyganiak, Richard and Colpaert, Pieter and Mannens, Erik and Van de Walle, Rik},
booktitle = {Proceedings of the 13th International Semantic Web Conference: Posters and Demos},
title = {Low-Cost Queryable {Linked Data} through {Triple Pattern Fragments}},
year = 2014,
month = oct,
series = {CEUR Workshop Proceedings},
volume = 1272,
issn = {1613-0073},
pages = {13--16},
kind = {demo},
abstract = {For publishers of Linked Open Data, providing queryable access to their dataset is costly. Those that offer a public SPARQL end-point often have to sacrifice high availability; others merely provide non-queryable means of access such as data dumps. We have developed a client-side query execution approach for which servers only need to provide a lightweight triple-pattern-based interface, enabling queryable access at low cost. This paper describes the implementation of a client that can evaluate SPARQL queries over such triple pattern fragments of a Linked Data dataset. Graph patterns of SPARQL queries can be solved efficiently by using metadata in server responses. The demonstration consists of SPARQL client for triple pattern fragments that can run as a standalone application, browser application, or library.},
url = {http://ceur-ws.org/Vol-1272/paper_10.pdf},
}
@inproceedings{vanherwegen_eswc_2015,
author = {Van Herwegen, Joachim and Verborgh, Ruben and Mannens, Erik and Van de Walle, Rik},
booktitle = {Proceedings of the 12th Extended Semantic Web Conference},
title = {Query Execution Optimization for Clients of {Triple Pattern Fragments}},
booktitle = {The Semantic Web. Latest Advances and New Domains},
series = {Lecture Notes in Computer Science},
editor = {Gandon, Fabien and Sabou, Marta and Sack, Harald and d'Amato, Claudia and Cudr\'e-Mauroux, Philippe and Zimmermann, Antoine},
volume = 9088,
pages = {302--318},
year = 2015,
month = may,
abstract = {In order to reduce the server-side cost of publishing queryable Linked Data, Triple Pattern Fragments (TPF) were introduced as a simple interface to RDF triples. They allow for SPARQL query execution at low server cost, by partially shifting the load from servers to clients. The previously proposed query execution algorithm uses more http requests than necessary, and only makes partial use of the available metadata. In this paper, we propose a new query execution algorithm for a client communicating with a TPF server. In contrast to a greedy solution, we maintain an overview of the entire query to find the optimal steps for solving a given query. We show multiple cases in which our algorithm reaches solutions with far fewer http requests, without significantly increasing the cost in other cases. This improves the efficiency of common SPARQL queries against TPF interfaces, augmenting their viability compared to the more powerful, but more costly, SPARQL interface.},
url = {http://linkeddatafragments.org/publications/eswc2015.pdf},
}
@inproceedings{vanherwegen_eswc_demo_2015,
author = {Van Herwegen, Joachim and Verborgh, Ruben and Mannens, Erik and Van de Walle, Rik},
title = {Interactive Comparison of {Triple Pattern Fragments} Query Approaches},
booktitle = {Proceedings of the 12th Extended Semantic Web Conference: Posters and Demos},
series = {Lecture Notes in Computer Science},
editor = {Gandon, Fabien and Gu\'eret, Christophe and Villata, Serena and Breslin, John and Faron-Zucker, Catherine and Zimmermann, Antoine},
volume = 9341,
pages = {165--168},
year = 2015,
month = jun,
publisher = {Springer},
isbn = {978-3-319-25639-9},
doi = {10.1007/978-3-319-25639-9_32},
kind = {demo},
abstract = {In order to reduce the server-side cost of publishing queryable Linked Data, Triple Pattern Fragments (TPF) were introduced as a simple interface to RDF triples. They allow for SPARQL query execution at low server cost, by partially shifting the load from servers to clients. The previously proposed query execution algorithm provides a solution that is highly inefficient, often requiring an amount of http calls that is magnitudes larger than the optimal solution. We have proposed a new query execution algorithm with the aim to solve this problem. Our solution significantly improves on the current work by maintaining a complete overview of the query instead of just looking at local optima. In this paper, we describe a demo that allows a user to easily compare the results of both implementations. We show both query results and number of executed http calls, proving a clear picture of the difference between the two algorithms.},
url = {http://dx.doi.org/10.1007/978-3-319-25639-9_32}
}
@inproceedings{rietveld_eswc_2015,
author = {Rietveld, Laurens and Verborgh, Ruben and Beek, Wouter and Vander Sande, Miel and Schlobach, Stefan},
title = {{Linked Data-as-a-Service:} The Semantic Web Redeployed},
booktitle = {The Semantic Web. Latest Advances and New Domains},
series = {Lecture Notes in Computer Science},
editor = {Gandon, Fabien and Sabou, Marta and Sack, Harald and d'Amato, Claudia and Cudr\'e-Mauroux, Philippe and Zimmermann, Antoine},
volume = 9088,
pages = {471--487},
year = 2015,
month = may,
publisher = {Springer},
abstract = {Ad-hoc querying is crucial to access information from Linked Data, yet publishing queryable RDF datasets on the Web is not a trivial exercise. The most compelling argument to support this claim is that the Web contains hundreds of thousands of data documents, while only 260 queryable SPARQL end- points are provided. Even worse, the SPARQL endpoints we do have are often unstable, may not comply with the standards, and may differ in supported features. In other words, hosting data online is easy, but publishing Linked Data via a queryable API such as SPARQL appears to be too difficult. As a consequence, in practice, there is no single uniform way to query the LOD Cloud today. In this paper, we therefore combine a large-scale Linked Data publication project (LOD Laundromat) with a low-cost server-side interface (Triple Pattern Fragments), in order to bridge the gap between the Web of downloadable data documents and the Web of live queryable data. The result is a repeatable, low-cost, open-source data publication process. To demonstrate its applicability, we made over 650,000 data documents available as data APIs, consisting of 30 billion triples.},
url = {http://linkeddatafragments.org/publications/eswc2015-lodl.pdf},
}
@inproceedings{verborgh_usewod_2015,
author = {Verborgh, Ruben and Mannens, Erik and Van de Walle, Rik},
booktitle = {Proceedings of the 5th USEWOD Workshop on Usage Analysis and the Web of Data},
title = {Initial Usage Analysis of {DBpedia's Triple Pattern Fragments}},
year = 2015,
month = jun,
abstract = {Queryable Linked Data is available through several interfaces, including SPARQL endpoints and Linked Data documents. Recently, the popular DBpedia dataset was made available through a Triple Pattern Fragments interface, which proposes to improve query availability by dividing query execution between clients and servers. In this paper, we present an initial usage analysis of this interface so far. In 4 months time, the server had an availability of 99.999\%, handling 4,455,813 requests, more than a quarter of which were served from cache. These numbers provide promising evidence that Triple Pattern Fragments are a viable strategy for live applications on top of public queryable datasets.},
topic = {shows that the Triple Pattern Fragments edition of DBpedia had 99.999\% availability during its first 4 months},
url = {http://linkeddatafragments.org/publications/usewod2015.pdf},
}
@inproceedings{verborgh_eswc_2015,
author = {Verborgh, Ruben},
title = {{DBpedia's Triple Pattern Fragments:} Usage Patterns and Insights},
booktitle = {The Semantic Web: ESWC 2015 Satellite Events},
editor = {Gandon, Fabien and Gu\'eret, Christophe and Villata, Serena and Breslin, John and Faron-Zucker, Catherine and Zimmermann, Antoine},
series = {Lecture Notes in Computer Science},
volume = 9341,
publisher = {Springer},
pages = {431--442},
year = 2015,
month = jun,
abstract = {Queryable Linked Data is published through several interfaces, including SPARQL endpoints and Linked Data documents. In October 2014, the DBpedia Association announced an official Triple Pattern Fragments interface to its popular DBpedia dataset. This interface proposes to improve the availability of live queryable data by dividing query execution between clients and servers. In this paper, we present a usage analysis between November 2014 and July 2015. In 9 months time, the interface had an average availability of 99.99\%, handling 16,776,170 requests, 43.0\% of which were served from cache. These numbers provide promising evidence that low-cost Triple Pattern Fragments interfaces provide a viable strategy for live applications on top of public, queryable datasets.},
topic = {shows that the DBpedia Triple Pattern Fragments was used 16.8 million times with 99.99\% availability during its first 9 months},
isbn = {978-3-319-25638-2},
doi = {10.1007/978-3-319-25639-9_54},
url = {http://linkeddatafragments.org/publications/eswc2015-workshops.pdf},
}
@inproceedings{hochstenbach_elag_2015,
author = {Patrick Hochstenbach and Ruben Verborgh},
booktitle = {Proceedings of the European Libraries Automation Group Conference},
title = {Scaling data streams with {Catmandu} and {Linked Data Fragments}},
year = 2015,
month = jun,
abstract = {In this talk we’ll explore Catmandu and Linked Data Fragments and how they can cooperate to build an environment for data stream processing at large.},
url = {http://elag2015.org/program/scaling-data-streams-with-catmandu-and-linked-data-fragments/},
}
@inproceedings{vandersande_iswc_2015,
author = {Vander Sande, Miel and Verborgh, Ruben and Van Herwegen, Joachim and Mannens, Erik and Van de Walle, Rik},
title = {Opportunistic {Linked Data} Querying through Approximate Membership Metadata},
booktitle = {The Semantic Web -- ISWC 2015},
editor = {Arenas, Marcelo and Corcho, Oscar and Simperl, Elena and Strohmaier, Markus and d'Aquin, Mathieu and Srinivas, Kavitha and Groth, Paul and Dumontier, Michel and Heflin, Jeff and Thirunarayan, Krishnaprasad and Staab, Steffen},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = 9366,
pages = {92--110},
year = 2015,
month = oct,
abstract = {Between URI dereferencing and the SPARQL protocol lies a largely unexplored axis of possible interfaces to Linked Data, each of which comes with its own combination of trade-offs. One of these interfaces is Triple Pattern Fragments, which allows clients to execute SPARQL queries against low-cost servers, at the cost of higher bandwidth. To increase a client's efficiency, we need to lower the number of requests, and one of the means for this is the incorporation of additional metadata in responses. We analyzed typical SPARQL query evaluations against Triple Pattern Fragments, and noted that a significant portion of requests consists of membership subqueries, which check the presence of a specific triple rather than a variable pattern. In this paper, we therefore study the impact of adding approximate membership functions, i.e., Bloom filters and Golomb-coded sets, as extra metadata. In addition to reducing http requests, such functions allow to achieve full result recall earlier when temporarily allowing lower precision. Half of the tested queries a WatDiv benchmark test set could be executed with up to a third fewer http requests with only marginally higher server cost. Query times, however, did not improve, likely due to slower generation time and transfer time. This indicates that approximate membership functions can partly improve the client-side query process with minimal impact on the server and its interface.},
topic = {combines Triple Pattern Fragments with a metadata feature to reduce the number of requests and achieve full precision earlier},
url = {http://linkeddatafragments.org/publications/iswc2015-amf.pdf},
}
@inproceedings{vanherwegen_iswc_2015,
author = {Van Herwegen, Joachim and De Vocht, Laurens and Verborgh, Ruben and Mannens, Erik and Van de Walle, Rik},
title = {Substring Filtering for Low-Cost {Linked Data} Interfaces},
booktitle = {The Semantic Web -- ISWC 2015},
editor = {Arenas, Marcelo and Corcho, Oscar and Simperl, Elena and Strohmaier, Markus and d'Aquin, Mathieu and Srinivas, Kavitha and Groth, Paul and Dumontier, Michel and Heflin, Jeff and Thirunarayan, Krishnaprasad and Staab, Steffen},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = 9366,
pages = {128--143},
year = 2015,
month = oct,
abstract = {Recently, Triple Pattern Fragments (TPFs) were introduced as an alternative to reduce server load when high numbers of clients need to evaluate SPARQL queries. This is achieved by moving part of the query execution to the client, at the cost of elevated query times. Since the TPF interface purposely does not support complex constructs such as SPARQL filters, queries that use them need to be executed mostly on the client, resulting in long execution times. We therefore investigated the impact of adding literal substring matching to the interface, with the goal of improving query performance while maintaining low server cost. In this paper, we discuss the client/server setup and compare performance of SPARQL queries on multiple implementations with existing solutions, including Elastic Search and case-insensitive FM-index. Our evaluations indicate that these improvements allow for faster query execution without significantly increasing the load on the server. Offering these additions on TPF servers allows users to obtain faster responses for filter-based SPARQL queries. Furthermore, substring matching can be used to support other filters such as complete regular expressions or range queries.},
topic = {defines a substring matching feature to evaluate FILTER queries faster},
url = {http://linkeddatafragments.org/publications/iswc2015-substring.pdf},
}
@inproceedings{acosta_iswc_2015,
author = {Acosta, Maribel and Vidal, Maria-Esther},
title = {Networks of {Linked Data} Eddies: An Adaptive Web Query Processing Engine for {RDF} Data},
booktitle = {The Semantic Web -- ISWC 2015},
editor = {Arenas, Marcelo and Corcho, Oscar and Simperl, Elena and Strohmaier, Markus and d'Aquin, Mathieu and Srinivas, Kavitha and Groth, Paul and Dumontier, Michel and Heflin, Jeff and Thirunarayan, Krishnaprasad and Staab, Steffen},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = 9366,
pages = {111--127},
year = 2015,
month = oct,
abstract = {Client-side query processing techniques that rely on the materialization of fragments of the original RDF dataset provide a promising solution for Web query processing. However, because of unexpected data transfers, the traditional optimize-then-execute paradigm, used by existing approaches, is not always applicable in this context, i.e., performance of client-side execution plans can be negatively affected by live conditions where rate at which data arrive from sources changes. We tackle adaptivity for client-side query processing, and present a network of Linked Data Eddies that is able to adjust query execution schedulers to data availability and runtime conditions. Experimental studies suggest that the network of Linked Data Eddies outperforms static Web query schedulers in scenarios with unpredictable transfer delays and data distributions.},
url = {http://iswc2015.semanticweb.org/sites/iswc2015.semanticweb.org/files/93660097.pdf},
}
@inproceedings{colpaert_iswc_2015,
author = {Colpaert, Pieter and Llaves, Alejandro and Verborgh, Ruben and Corcho, Oscar and Mannens, Erik and Van de Walle, Rik},
booktitle = {Proceedings of the 14th International Semantic Web Conference: Posters and Demos},
title = {Intermodal public transit routing using {Linked Connections}},
year = 2015,
month = oct,
series = {CEUR Workshop Proceedings},
volume = 1486,
issn = {1613-0073},
kind = {demo},
abstract = {Ever since public transit agencies have found their way to the Web, they inform travelers using route planning software made available on their website. These travelers also need to be informed about other modes of transport, for which they have to consult other websites, or for which they have to ask the transit agency's server maintainer to implement new functionalities. In this demo, we introduce an affordable publishing method for transit data, called Linked Connections, that can be used for intermodal route planning, by allowing user agents to execute the route planning algorithm. We publish paged documents containing a stream of hops between transit stops sorted by departure time. Using these documents, clients are able to perform intermodal route planning in a reasonable time. Furthermore, such clients are fully in charge of the algorithm, and can now also route in different ways by integrating datasets of a user’s choice. When visiting our demo, conference attendees will be able to calculate intermodal routes by querying the Web of data using their phone’s browser, without expensive server infrastructure.},
url = {http://ceur-ws.org/Vol-1486/paper_28.pdf},
}
@inproceedings{devocht_iswc_2015,
author = {De Vocht, Laurens and Vander Sande, Miel and Van Herwegen, Joachim and Verborgh, Ruben and Mannens, Erik and Van de Walle, Rik},
booktitle = {Proceedings of the 14th International Semantic Web Conference: Posters and Demos},
title = {The Highway to Queryable {Linked Data:} Self-Describing {Web APIs} with Varying Features},
year = 2015,
month = oct,
series = {CEUR Workshop Proceedings},
volume = 1486,
issn = {1613-0073},
kind = {demo},
abstract = {Making Linked Data queryable on the Web is not an easy task for publishers, for technical and logistical reasons. Can they afford to offer a SPARQL endpoint, or should they offer an API or data dump instead? And what technical knowledge is needed for that? This demo presents a user-friendly pipeline to compose APIs for Linked Datasets, consisting of a customizable set of reusable features, e.g., Triple Pattern Fragments, substring search, membership metadata, etc. These APIs indicate their supported features in hypermedia responses, so that clients can discover which server-provided functionality they understand, and divide the evaluation of SPARQL queries accordingly between client and server. That way, publishers can determine the complexity of the resulting API, and thus the maximal set of server tasks. This demo shows how publishers can easily set up an API with this pipeline, and demonstrates the client-side execution of federated SPARQL queries against such APIs.},
url = {http://ceur-ws.org/Vol-1486/paper_57.pdf},
}
@article{vandersande_ijswis_2016,
title = {Hypermedia-based Discovery for Source Selection using Low-Cost {Linked Data} Interfaces},
author = {Vander Sande, Miel and Verborgh, Ruben and Dimou, Anastasia and Colpaert, Pieter and Mannens, Erik},
journal = {International Journal on Semantic Web and Information Systems},
volume = 12,
issue = 3,
pages = {79--110},
year = 2016,
abstract = {Evaluating federated Linked Data queries requires consulting multiple sources on the Web. Before a client can execute queries, it must discover data sources, and determine which ones are relevant. Federated query execution research focuses on the actual execution, while data source discovery is often marginally discussed—even though it has a strong impact on selecting sources that contribute to the query results. Therefore, we introduce a discovery approach for Linked Data interfaces based on hypermedia links and controls, and apply it to federated query execution with Triple Pattern Fragments. In addition, we identify quantitative metrics to evaluate this discovery approach. This article describes generic evaluation measures and results for our concrete approach. With low-cost data summaries as seed, interfaces to eight large real-world datasets can discover each other within 7 minutes. Hypermedia-based client-side querying shows a promising gain of up to 50\% in execution time, but demands algorithms that visit a higher number of interfaces to improve result completeness. With these findings, we conclude that using hypermedia for interface discovery brings us closer to a queryable global dataspace. However, clients require more intelligent methods to effectively consume such space.},
url = {http://www.igi-global.com/article/hypermedia-based-discovery-for-source-selection-using-low-cost-linked-data-interfaces/160173},
}
@article{verborgh_jws_2016,
title = {{Triple Pattern Fragments:} a Low-cost Knowledge Graph Interface for the {Web}},
author = {Verborgh, Ruben and Vander Sande, Miel and Hartig, Olaf and Van Herwegen, Joachim and De Vocht, Laurens and De Meester, Ben and Haesendonck, Gerald and Colpaert, Pieter},
journal = {Journal of Web Semantics},
month = mar,
year = 2016,
volume = {37--38},
pages = {184--206},
issn = {1570-8268},
doi = {doi:10.1016/j.websem.2016.03.003},
abstract = {Billions of Linked Data triples exist in thousands of RDF knowledge graphs on the Web, but few of those graphs can be queried live from Web applications. Only a limited number of knowledge graphs are available in a queryable interface, and existing interfaces can be expensive to host at high availability. To mitigate this shortage of live queryable Linked Data, we designed a low-cost Triple Pattern Fragments interface for servers, and a client-side algorithm that evaluates SPARQL queries against this interface. This article describes the Linked Data Fragments framework to analyze Web interfaces to Linked Data and uses this framework as a basis to define Triple Pattern Fragments. We describe client-side querying for single knowledge graphs and federations thereof. Our evaluation verifies that this technique reduces server load and increases caching effectiveness, which leads to lower costs to maintain high server availability. These benefits come at the expense of increased bandwidth and slower, but more stable query execution times. These results substantiate the claim that lightweight interfaces can lower the cost for knowledge publishers compared to more expressive endpoints, while enabling applications to query the publishers' data with the necessary reliability.},
topic = {explains Linked Data Fragments, Triple Pattern Fragments, and experimentally verifies their performance in single-source and federated scenarios},
url = {http://linkeddatafragments.org/publications/jws2016.pdf},
}
@inproceedings{folz_eswc_2016,
title = {CyCLaDEs: A Decentralized Cache for Triple Pattern Fragments},
author = {Folz, Pauline and Skaf-Molli, Hala and Molli, Pascal},
editor = {Sack, Harald and Blomqvist, Eva and d'Aquin, Mathieu and Ghidini, Chiara and Ponzetto, Simone Paolo and Lange, Christoph},
bookTitle = {Proceedings of the 13th ESWC},
year = 2016,
publisher = {Springer},
pages = {455--469},
isbn = {978-3-319-34129-3},
doi = {10.1007/978-3-319-34129-3_28},
url = {http://dx.doi.org/10.1007/978-3-319-34129-3_28},
}
@inproceedings{taelman_mepdaw_bp_2016,
title = {Continuous Client-Side Query Evaluation over Dynamic {Linked Data}},
author = {Taelman, Ruben and Verborgh, Ruben and Colpaert, Pieter and Mannens, Erik},
booktitle = {The Semantic Web: ESWC 2016 Satellite Events, Heraklion, Crete, Greece, May 29 -- June 2, 2016, Revised Selected Papers},
pages = {273--289},
year = 2016,
month = may,
organization = {Springer International Publishing},
url = {http://rubensworks.net/raw/publications/2016/Continuous_Client-Side_Query_Evaluation_over_Dynamic_Linked_Data.pdf},
abstract = {
Existing solutions to query dynamic Linked Data sources extend the sparql language, and require continuous server processing for each query.
Traditional sparql endpoints already accept highly expressive queries, so extending these endpoints for time-sensitive queries increases the server cost even further.
To make continuous querying over dynamic Linked Data more affordable, we extend the low-cost Triple Pattern Fragments (TPF) interface with support for time-sensitive queries.
In this paper, we introduce the TPF Query Streamer that allows clients to evaluate sparql queries with continuously updating results.
Our experiments indicate that this extension significantly lowers the server complexity, at the expense of an increase in the execution time per query.
We prove that by moving the complexity of continuously evaluating queries over dynamic Linked Data to the clients and thus increasing bandwidth usage,
the cost at the server side is significantly reduced.
Our results show that this solution makes real-time querying more scalable for a large amount of concurrent clients when compared to the alternatives.}
}
@inproceedings{taelman_mepdaw_2016,
title = {Continuously Updating Query Results over Real-Time {Linked Data}},
author = {Taelman, Ruben and Verborgh, Ruben and Colpaert, Pieter and Mannens, Erik and Van de Walle, Rik},
booktitle = {Proceedings of the 2nd Workshop on Managing the Evolution and Preservation of the Data Web},
year = 2016,
month = may,
series = {CEUR Workshop Proceedings},
volume = 1585,
pages = {1--10},
issn = {1613-0073},
abstract = {Existing solutions to query dynamic Linked Data sources extend the SPARQL language, and require continuous server processing for each query. Traditional SPARQL endpoints accept highly expressive queries, contributing to high server cost. Extending these endpoints for time-sensitive queries increases the server cost even further. To make continuous querying over real-time Linked Data more affordable, we extend the low-cost Triple Pattern Fragments (TPF) interface with support for time-sensitive queries. In this paper, we discuss a framework on top of TPF that allows clients to execute SPARQL queries with continuously updating results. Our experiments indicate that this extension significantly lowers the server complexity. The trade-off is an increase in the execution time per query. We prove that by moving the complexity of continuously evaluating real-time queries over Linked Data to the clients and thus increasing the bandwidth usage, the cost of server-side interfaces is significantly reduced. Our results show that this solution makes real-time querying more scalable in terms of CPU usage for a large amount of concurrent clients when compared to the alternatives.},
url = {http://ceur-ws.org/Vol-1585/mepdaw2016_paper_01.pdf},
}
@inproceedings{taelman_eswc_2016,
title = {Continuous Client-side Query Evaluation over Dynamic {Linked Data}},
author = {Taelman, Ruben and Verborgh, Ruben and Colpaert, Pieter and Mannens, Erik},
booktitle = {Proceedings of the 13th Extended Semantic Web Conference: Satellite events},
editor = {Sack, Harald and Rizzo, Giuseppe and Steinmetz, Nadine and Mladeni\'c, Dunja and Auer, S\"oren and Lange, Christoph},
series = {Lecture Notes in Computer Science},
volume = 9989,
publisher = {Springer},
pages = {273--289},
isbn = {978-3-319-47602-5},
doi = {10.1007/978-3-319-47602-5_44},
month = jun,
year = 2016,
abstract = {Existing solutions to query dynamic Linked Data sources extend the SPARQL language, and require continuous server processing for each query. Traditional SPARQL endpoints already accept highly expressive queries, so extending these endpoints for time-sensitive queries increases the server cost even further. To make continuous querying over dynamic Linked Data more affordable, we extend the low-cost Triple Pattern Fragments (TPF) interface with support for time-sensitive queries. In this paper, we introduce the TPF Query Streamer that allows clients to evaluate SPARQL queries with continuously updating results. Our experiments indicate that this extension significantly lowers the server complexity, at the expense of an increase in the execution time per query. We prove that by moving the complexity of continuously evaluating queries over dynamic Linked Data to the clients and thus increasing bandwidth usage, the cost at the server side is significantly reduced. Our results show that this solution makes real-time querying more scalable for a large amount of concurrent clients when compared to the alternatives.},
url = {http://rubensworks.net/raw/publications/2016/Continuous_Client-Side_Query_Evaluation_over_Dynamic_Linked_Data.pdf},
}
@inproceedings{verborgh_wadl_2016,
author = {Verborgh, Ruben and Vander Sande, Miel and Shankar, Harihar and Balakireva, Luda and Van de Sompel, Herbert},
title = {Devising Affordable and Functional {Linked Data} Archives},
year = 2016,
month = jun,
booktitle = {Proceedings of the Web Archiving and Digital Libraries workshop},
abstract = {Linked Data has become an integral part of the Web. Like any other web resource, Linked Data changes over time. Typically, only the most recent version of a Linked Data set can be accessed via Subject-URIs and queried by means of SPARQL. Sometimes, select archived versions are made available for bulk download. This archive access approach is cheap for the publisher but, unfortunately, very expensive for consumers. The entire data dump must be downloaded and ingested into infrastructure that supports subject-URI and/or SPARQL access. Comparing data across different archived versions is even harder. To address this publisher/consumer imbalance, we propose a solution for publication of archived Linked Data that is affordable for publishers and functional for consumers. It consists of two components: a static storage approach for archived Linked Data that exposes a lightweight RDF interface, and the subsequent extension of that interface to versioned data.},
}
@inproceedings{taelman_cold_2016,
title = {Multidimensional Interfaces for Selecting Data within Ordinal Ranges},
author = {Taelman, Ruben and Colpaert, Pieter and Verborgh, Ruben and Mannens, Erik},
booktitle = {Proceedings of the 7th International Workshop on Consuming Linked Data},
editor = {Hartig, Olaf and Sequeda, Juan and Hogan, Aidan},
year = 2016,
month = oct,
series = {CEUR Workshop Proceedings},
volume = 1666,
issn = {1613-0073},
abstract = {Linked Data interfaces exist in many flavours, as evidenced by subject pages, SPARQL endpoints, triple pattern interfaces, and data dumps. These interfaces are mostly used to retrieve parts of a complete dataset, such parts can for example be defined by ranges in one or more dimensions. Filtering Linked Data by dimensions such as time range, geospatial area, or genomic location, requires the lookup of data within ordinal ranges. To make retrieval by such ranges generic and cost-efficient, we propose a REST solution in-between looking up data within ordinal ranges entirely on the server, or entirely on the client. To this end, we introduce a method for extending any Linked Data interface with an n-dimensional interface-level index such that n-dimensional ordinal data can be selected using n-dimensional ranges. We formally define Range Gates and Range Fragments and theoretically evaluate the cost-efficiency of hosting such an interface. By adding a multidimensional index to a Linked Data interface for multidimensional ordinal data, we found that we can get benefits from both worlds: the expressivity of the server raises, yet remains more cost-efficient than an interface providing the full functionality on the server-side. Furthermore, the client now shares in the effort to filter the data. This makes query processing becomes more flexible to the end-user, because the query plan can be altered by the engine. In future work we hope to apply Range Gates and Range Fragments to real-world interfaces to give quicker access to data within ordinal ranges.},
url = {http://ceur-ws.org/Vol-1666/paper-03.pdf},
}
@inproceedings{colpaert_cold_2016,
title = {The Impact of an Extra Feature on the Scalability of {Linked Connections}},
author = {Colpaert, Pieter and Ballieu, Sander and Verborgh, Ruben and Mannens, Erik},
booktitle = {Proceedings of the 7th International Workshop on Consuming Linked Data},
editor = {Hartig, Olaf and Sequeda, Juan and Hogan, Aidan},
year = 2016,
month = oct,
series = {CEUR Workshop Proceedings},
volume = 1666,
issn = {1613-0073},
abstract = {Calculating a public transit route involves taking into account user preferences: e.g., one might prefer trams over buses, one might prefer a slight detour to pass by their favorite coffee bar or one might only be interested in wheelchair accessible journeys. Traditional route planning interfaces do not expose enough features for these kind of questions to be answered. In previous work, we proposed a Linked Data interface, called Linked Connections, which allows user-agents to evaluate the route planning queries on the client-side, and thus allow for extra features to be implemented by data reusers. In this work, we study how and where these new features can be added to the Linked Connections framework. We researched this by adding the feature of wheelchair-accessibility both on server and client, and comparing these two solution on query execution time, cache performance and CPU usage on server and client. We found that for the use case of wheelchair-accessibility, there is no advantage of adding this feature on the server: the query execution time does not improve, while the cache hit rate lowers.},
url = {http://ceur-ws.org/Vol-1666/paper-05.pdf},
}
@inproceedings{hartig_odbase_2016,
author = {Hartig, Olaf and Buil-Aranda, Carlos},
editor = {Debruyne, Christophe and Panetto, Herv{\'e} and Meersman, Robert and Dillon, Tharam and K{\"u}hn, eva and O'Sullivan, Declan and Ardagna, Claudio Agostino},
title = {Bindings-Restricted Triple Pattern Fragments},
bookTitle = {Proceedings of the 15th International Conference on Ontologies, DataBases, and Applications of Semantics},
year = 2016,
publisher = {Springer},
pages = {762--779},
isbn = {978-3-319-48472-3},
doi = {10.1007/978-3-319-48472-3_48},
abstract = {The Triple Pattern Fragment (TPF) interface is a recent proposal for reducing server load in Web-based approaches to execute SPARQL queries over public RDF datasets. The price for less overloaded servers is a higher client-side load and a substantial increase in network load (in terms of both the number of HTTP requests and data transfer). In this paper, we propose a slightly extended interface that allows clients to attach intermediate results to triple pattern requests. The response to such a request is expected to contain triples from the underlying dataset that do not only match the given triple pattern (as in the case of TPF), but that are guaranteed to contribute in a join with the given intermediate result. Our hypothesis is that a distributed query execution using this extended interface can reduce the network load (in comparison to a pure TPF-based query execution) without reducing the overall throughput of the client-server system significantly. Our main contribution in this paper is twofold: we empirically verify the hypothesis and provide an extensive experimental comparison of our proposal and TPF.},
url = {https://arxiv.org/pdf/1608.08148.pdf},
}
@inproceedings{taelman_ekaw_2016,
author = {Taelman, Ruben and Verborgh, Ruben and Mannens, Erik},
title = {Exposing {RDF} Archives using {Triple Pattern Fragments}},
booktitle = {Proceedings of the 20th International Conference on Knowledge Engineering and Knowledge Management: Posters and Demos},
year = 2016,
month = nov,
kind = {poster},
abstract = {Linked Datasets typically change over time, and knowledge of this historical information can be useful. This makes the storage and querying of Dynamic Linked Open Data an important area of research. With the current versioning solutions, publishing Dynamic Linked Open Data at Web-Scale is possible, but too expensive. We investigate the possibility of using the low-cost Triple Pattern Fragments (TPF) interface to publish versioned Linked Open Data. In this paper, we discuss requirements for supporting versioning in the TPF framework, on the level of the interface, storage and client, and investigate which trade-offs exist. These requirements lay the foundations for further research in the area of low-cost, Web-Scale dynamic Linked Open Data publication and querying.},
url = {http://rubensworks.net/raw/publications/2016/ExposingRdfArchivesUsingTpf.pdf},
}
@article{vandersande_jod_2017,
title = {Towards sustainable publishing and querying of distributed {Linked Data} archives},
author = {Vander Sande, Miel and Verborgh, Ruben and Hochstenbach, Patrick and Van de Sompel, Herbert},
journal = {Journal of Documentation},
year = 2017,
volume = 73,
number = 6,
publisher = {Emerald},
abstract = {This paper details a low-cost, low-maintenance publishing strategy aimed at unlocking the value of Linked Data collections held by libraries, archives and museums. The shortcomings of commonly used Linked Data publishing approaches are identified, and the current lack of substantial collections of Linked Data exposed by libraries, archives and museums is considered. To improve on the discussed status quo, a novel approach for publishing Linked Data is proposed and demonstrated by means of an archive of DBpedia versions, which is queried in combination with other Linked Data sources. We show that our approach makes publishing Linked Data archives easy and affordable, and supports distributed querying without causing untenable load on the Linked Data sources. The proposed approach significantly lowers the barrier for publishing, maintaining, and making Linked Data collections queryable. As such, it offers the potential to substantially grow the distributed network of queryable Linked Data sources. Because the approach supports querying without causing unacceptable load on the sources, the queryable interfaces are expected to be more reliable, allowing them to become integral building blocks of robust applications that leverage distributed Linked Data sources. The novel publishing strategy significantly lowers the technical and financial barriers that libraries, archives and museums face when attempting to publish Linked Data collections. The proposed approach yields Linked Data sources that can reliably be queried, paving the way for applications that leverage distributed Linked Data sources through federated querying.},
keywords = {Linked Data, Linked Data Fragments, publication, archiving},
note = {Accepted for publication},
published = false,
url = {http://linkeddatafragments.org/publications/jod2017.pdf},
}
@inproceedings{hartig_iswc_2017,
author = {Hartig, Olaf and Letter, Ian and P\'erez, Jorge},
booktitle = {Proceedings of the 16th International Semantic Web Conference},
title = {A Formal Framework for Comparing Linked Data Fragments},
year = 2017,
month = oct,
abstract = {The Linked Data Fragment (LDF) framework has been proposed as a uniform view to explore the trade-offs of consuming Linked Data when servers provide (possibly many) different interfaces to access their data. Every such interface has its own particular properties regarding performance, bandwidth needs, caching, etc. Several practical challenges arise. For example, before exposing a new type of LDFs in some server, can we formally say something about how this new LDF interface compares to other interfaces previously implemented in the same server? From the client side, given a client with some restricted capabilities in terms of time constraints, network connection, or computational power, which is the best type of LDFs to complete a given task? Today there are only a few formal theoretical tools to help answer these and other practical questions, and researchers have embarked in solving them mainly by experimentation. In this paper we propose the Linked Data Fragment Machine (LDFM) which is the first formalization to model LDF scenarios. LDFMs work as classical Turing Machines with extra features that model the server and client capabilities. By proving formal results based on LDFMs, we draw a fairly complete expressiveness lattice that shows the interplay between several combinations of client and server capabilities. We also show the usefulness of our model to formally analyze the fine-grain interplay between several metrics such as the number of requests sent to the server, and the bandwidth of communication between client and server.},
url = {http://olafhartig.de/files/HartigEtAl_ISWC2017_Preprint.pdf},
}
@inproceedings{moreau_iswc_2017,
title = {Querying non-RDF Datasets using Triple Patterns},
author = {Moreau, Benjamin and Serrano-Alvarado, Patricia and Desmontils, Emmanuel and Thoumas, David},
booktitle = {Proceedings of the 16th International Semantic Web Conference: Posters and Demos},
year = 2017,
month = oct,
abstract = {Triple Pattern Fragments (TPF) interface allows to query
Linked Data datasets with high data availability. But, data providers do
not integrate large amounts of datasets as Linked Data due to expensive
investments in terms of storage and maintenance. The problem we focus
on is how to integrate non-RDF datasets on-demand as Linked Data
simply and efficiently. In this demo, we present ODMTP, an On-Demand
Mapping using Triple Patterns over non-RDF datasets. ODMTP is implemented
over a TPF server. We showcase it with SPARQL queries over
Twitter.},
url = {https://iswc2017.semanticweb.org/wp-content/uploads/papers/PostersDemos/paper530.pdf},
}
@inproceedings{taelman_mepdaw_2017,
title = {Versioned Triple Pattern Fragments: A Low-cost {Linked} {Data} Interface Feature for {Web} Archives},
author = {Taelman, Ruben and Vander Sande, Miel and Verborgh, Ruben and Mannens, Erik},
booktitle = {Proceedings of the 3rd Workshop on Managing the Evolution and Preservation of the Data Web},
year = {2017},
month = may,
url = {http://rubensworks.net/raw/publications/2017/vtpf.pdf}
}
@inproceedings{taelman_eswc_demo_2017,
author = {Taelman, Ruben and Vander Sande, Miel and Verborgh, Ruben and Mannens, Erik},
title = {Live Storage and Querying of Versioned Datasets on the {Web}},
booktitle = {Proceedings of the 14th Extended Semantic Web Conference: Posters and Demos},
year = {2017},
month = may,
url = {http://rubensworks.net/raw/publications/2017/vtpf-demo.pdf}
}
@inproceedings{taelman_iswc_resources_comunica_2018,
author = {Taelman, Ruben and Van Herwegen, Joachim and Vander Sande, Miel and Verborgh, Ruben},
title = {Comunica: a Modular SPARQL Query Engine for the Web},
booktitle = {Proceedings of the 17th International Semantic Web Conference},
year = {2018},
month = {october},
url = {https://comunica.github.io/Article-ISWC2018-Resource/},
abstract = {
Query evaluation over Linked Data sources has become a complex story,
given the multitude of algorithms and techniques for single- and multi-source querying,
as well as the heterogeneity of Web interfaces through which data is published online.
Today’s query processors are insufficiently adaptable to test multiple query engine aspects in combination,
such as evaluating the performance of a certain join algorithm over a federation of heterogeneous interfaces.
The Semantic Web research community is in need of a flexible query engine that allows plugging in new components such as different algorithms,
new or experimental SPARQL features, and support for new Web interfaces.
We designed and developed a Web-friendly and modular meta query engine called Comunica that meets these specifications.
In this article, we introduce this query engine and explain the architectural choices behind its design.
We show how its modular nature makes it an ideal research platform for investigating new kinds of Linked Data interfaces and querying algorithms.
Comunica facilitates the development, testing, and evaluation of new query processing capabilities, both in isolation and in combination with others.
}
}
@inproceedings{taelman_ssws_amf_2020,
author = {Taelman, Ruben and Van Herwegen, Joachim and Vander Sande, Miel and Verborgh, Ruben},
title = {Optimizing Approximate Membership Metadata in Triple Pattern Fragments for Clients and Servers},
month = nov,
booktitle = {Proceedings of the 13th International Workshop on Scalable Semantic Web Knowledge Base Systems},
year = {2020},
url = {https://comunica.github.io/Article-SSWS2020-AMF/},
abstract = {Depending on the HTTP interface used for publishing Linked Data, the effort of evaluating a SPARQL query can be redistributed differently between clients and servers. For instance, lower server-side CPU usage can be realized at the expense of higher bandwidth consumption. Previous work has shown that complementing lightweight interfaces such as Triple Pattern Fragments (TPF) with additional metadata can positively impact the performance of clients and servers. Specifically, Approximate Membership Filters (AMFs)—data structures that are small and probabilistic—in the context of TPF were shown to reduce the number of HTTP requests, at the expense of increasing query execution times. In order to mitigate this significant drawback, we have investigated unexplored aspects of AMFs as metadata on TPF interfaces. In this article, we introduce and evaluate alternative approaches for server-side publication and client-side consumption of AMFs within TPF to achieve faster query execution, while maintaining low server-side effort. Our alternative client-side algorithm and the proposed server configurations significantly reduce both the number of HTTP requests and query execution time, with only a small increase in server load, thereby mitigating the major bottleneck of AMFs within TPF. Compared to regular TPF, average query execution is more than 2 times faster and requires only 10\% of the number of HTTP requests, at the cost of at most a 10\% increase in server load. These findings translate into a set of concrete guidelines for data publishers on how to configure AMF metadata on their servers. }
}