-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpipelines.py
42 lines (40 loc) · 1.51 KB
/
pipelines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
# import logging
#
# class PagecrawlerPipeline(object):
# def process_item(self, item, spider):
# item['firmenname'] = item['firmenname']
# # logging.DEBUG("item['strasse'] %s" % item['strasse'])
#
# # item['strasse'] = item['strasse'].replace(" ", "")
# # item['strasse'] = item['strasse'].replace("\n", "")
# item['strasse'] = item['strasse'].strip()
#
# # item['ort'] = item['ort'].replace(" ", "")
# # item['ort'] = item['ort'].replace("\n", "")
# item['ort'] = item['ort'].strip()
#
# # item['plz'] = item['plz'].replace(" ", "")
# # item['plz'] = item['plz'].replace("\n", "")
# item['plz'] = item['plz'].strip()
#
# # item['website'] = item['website'].replace(" ", "")
# # item['website'] = item['website'].replace("\n", "")
# item['website'] = item['website'].strip()
#
# # item['land'] = item['land'].replace(" ", "")
# # item['land'] = item['land'].replace("\n", "")
# item['land'] = item['land'].strip()
#
# if item['telefon']:
# # item['telefon'] = item['telefon'].replace(" ", "")
# # item['telefon'] = item['telefon'].replace("\n", "")
# item['telefon'] = item['telefon'].strip()
#
# # item['email'] = item['email'].strip()
#
# return item