1亚马逊日期采集是一个非常繁琐的事情,涵盖全部站点,所有语言US FR DE IT ES SE AE 包括阿拉伯语等
特别是阿拉伯语,最让人吐血
如下是解析US UK CA MX站点日期的代码,大家可以参考然后取解析其他站点的数据。
日期解析只能一个站点一个站点的解析,没有其他技巧
本文重点介绍如何解析 亚马逊评论日期,具体如何把完整评论数据采集下来,请移步我的其他文章
def reviewDateUK(self, reviewDate):
try:
reviewDate = re.sub(".*on ", "",reviewDate)
reviewDate = re.sub(",", "",reviewDate)
dates = reviewDate.split(" ")
reviewDate = dates[2] + "-" + self.LocalDateTimeUtils.transferMonth(dates[1]) + "-" + dates[0]
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_M_D)
if (parse == None):
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_MM_DD)
return parse
except BaseException:
return self.LocalDateTimeUtils.parse("2099-12-31", self.LocalDateTimeUtils.YYYY_MM_DD)
"""
* US->on February 26, 2016
"""
def reviewDateUS(self, reviewDate):
try:
reviewDate = re.sub( ".*on ", "",reviewDate)
reviewDate = re.sub(",", "",reviewDate)
dates = reviewDate.split(" ")
reviewDate = dates[2] + "-" + self.LocalDateTimeUtils.transferMonth(dates[0]) + "-" + dates[1]
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_M_D)
if (parse == None):
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_MM_DD)
return parse
except BaseException:
return self.LocalDateTimeUtils.parse("2099-12-31", self.LocalDateTimeUtils.YYYY_MM_DD)
"""
* AU->on 26 February, 2016
"""
def reviewDateAU(self, reviewDate):
try:
reviewDate = re.sub(".*on ", "",reviewDate)
reviewDate = re.sub(",", "",reviewDate)
dates = reviewDate.split(" ")
reviewDate = dates[2] + "-" + self.LocalDateTimeUtils.transferMonth(dates[1]) + "-" + dates[0]
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_M_D)
if (parse == None):
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_MM_DD)
return parse
except BaseException:
return self.LocalDateTimeUtils.parse("2099-12-31", self.LocalDateTimeUtils.YYYY_MM_DD)
"""
* US->on February 26, 2016
* CA->on March 7, 2016
"""
def reviewDateCA(self, reviewDate):
try:
reviewDate = re.sub(".*on ", "",reviewDate)
reviewDate = re.sub(",", "",reviewDate)
dates = reviewDate.split(" ")
reviewDate = dates[2] + "-" + self.LocalDateTimeUtils.transferMonth(dates[0]) + "-" + dates[1]
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_M_D)
if (parse == None):
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_MM_DD)
return parse
except BaseException:
return self.LocalDateTimeUtils.parse("2099-12-31", self.LocalDateTimeUtils.YYYY_MM_DD)
"""
* US->on February 26, 2016
* MX->en 8 de noviembre de 2016
"""
def reviewDateMX(self, reviewDate):
try:
reviewDate = re.sub( ".*el ", "",reviewDate)
dates = reviewDate.split(" de ")
reviewDate = dates[2] + "-" + self.LocalDateTimeUtils.transferMonth(dates[1]) + "-" + dates[0]
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_M_D)
if (parse == None):
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_MM_DD)
return parse
except BaseException:
return self.LocalDateTimeUtils.parse("2099-12-31", self.LocalDateTimeUtils.YYYY_MM_DD)
"""
*
* @param reviewDate
"""
def reviewDateJP(self, reviewDate):
try:
# 2021-3-18に本でレビュー済み
reviewDate = reviewDate.replace("に本でレビュー済み", "")
reviewDate = reviewDate.replace("に日本でレビュー済み", "")
reviewDate = reviewDate.replace("评论于", "")
reviewDate = reviewDate.replace("在日本 🇯🇵 发布", "")
reviewDate = re.sub("年", "-",reviewDate)
reviewDate = re.sub("月", "-",reviewDate)
reviewDate = re.sub("日", "",reviewDate)
reviewDate = reviewDate.strip()
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_M_D)
if (parse == None):
parse = self.LocalDateTimeUtils.parse(reviewDate, self.LocalDateTimeUtils.YYYY_MM_DD)
return parse
except BaseException as e:
print("s",e,"s")
return self.LocalDateTimeUtils.parse("2099-12-31", self.LocalDateTimeUtils.YYYY_MM_DD)