insertProducts

本文介绍了一个Perl脚本,用于预处理原始PSA文件,在加载到DataStage作业之前进行数据清洗和格式转换。该脚本将文件按记录类型拆分为多个小文件,并为后续加载到数据库做准备。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

#!/usr/bin/perl

#usage: perl insertProducts.pl inputPsaFileName cfgFilePath pPaddingRequired pPrefix pLength pPadChar numOfCategories
#for example: perl insertProducts.pl prd.txt pog.cfg 1 TGT 14 0 3
#Input file format: raw .psa file exported from Prospace software.
#    while all the fields are separated by comma: entry type,field1,field2,...
#Output format: rowid,required fields separated by comma. like
#Description: This script is used to pre-process the raw .psa file before loading in DataStage jobs.
# The script splits the file into several files based on the entry type (Planogram, Product, Fixture and Position).
#           All the other types of records will be discarded.  
# The pieces of smaller files will be further processed by the DataStage jobs and loaded into Database.

use strict;
use File::Basename;
use File::Spec;
use File::stat;
use DBD::DB2;
my $db_database = 'dbname';
my $db_user = 'dbuser';
my $db_password = 'passwd';

my $dbhDATABASE;


# Expected number of mapping fields in config file
my $numOfPrdCols = 9;

# Check the input parameters

my $requiredParameterNumber = 7; # Please modify this parameter in case new parameter is added.
my $actualInputParameterNumber = scalar @ARGV;
print "Input parameters: ", (join ' ', @ARGV), "\n" if($requiredParameterNumber == $actualInputParameterNumber) or die "Required number of parameters: $requiredParameterNumber; Actual number of parameters: $actualInputParameterNumber.";

# Check the files and directories
my $inFile = shift;
print "Input file: $inFile\n" if(-r $inFile) or die "The $inFile is not readable. <$!>";
my $fileName = basename $inFile;
print "Basename: $fileName\n";

my $cfgFile = shift;
print "Config File: $cfgFile\n" if (-r $cfgFile) or die "The $cfgFile is not readable. <$!>";

# DPCI Prefix string. This is required to prefixing and padding the DPCI before matching the UPC column in DTOPT.Product table.
my $pPaddingRequired = shift;
my $pPrefix = shift;
my $pLength = shift;
my $pPadChar = shift;
my $numOfCategories = shift;

if ($numOfCategories < 1) {
$numOfCategories = 1
}

# Start - Read fields Mapping info from config file

my %typesMap = ();
my $type;

my (%pogMap, %fixMap, %posMap, %prdMap);
# Hash key: Record Type; Hash value: Reference of field mapping hash.
$typesMap{'Planogram'} = \%pogMap;
$typesMap{'Fixture'} = \%fixMap;
$typesMap{'Position'} = \%posMap;
$typesMap{'Product'} = \%prdMap;

open(FH_CONFIG, "<$cfgFile") || die "Unable to open the field map config file : $cfgFile <$!>";

while(<FH_CONFIG>) {
    chomp;
    next if /^#/;        # skip comments
    next if /^\s*$/;     # skip empty lines
if(/^\s*\[\s*(\w+)\s*\]\s*$/){# This is the record type block, like [Planogram]
$type = $1;
next;
}
    # Store field name as key, index as value.
if (/^\s*(\w+)\s*=\s*(\w+)\s*$/){ # Format: FieldName = Index
$typesMap{$type}->{$1} = $2;
}

close CONFIG;
# End - Read fields Mapping info from config file

# Index of DPCI field in the array.
my $indDPCIProductInArray; # To be init in function initPrdColList();

# Init the array of field index keys
my @prdColList = initPrdColList();

# Get the field indices array of each type of records.
my @prdCol = fieldMap2IndArray(\%prdMap, \@prdColList, $numOfPrdCols, 'Product');

my ($PRODUCTBRANDID,$PRODUCTCATEGORYID,$DTCATEGORYID,$PRODUCTDEMANDGROUPID,$NAME,$UPC,$ISPREPRICED,$ISPRIVATELABEL,$SEASONAL,$CASEPACK,$ALLOWBACKHAUL,$ISAPPROVED,$STATUS,$ISACTIVE,$ISSTUB,$EQCFACTOR)=(20256,2,2,49,"InsertForPOGTest","",0,0,0,1,0,1,1,1,0,1);

&dbConnect();
my $stmt;
my $maxProductID;
my @row;
my $dbhSTATEMENT;
my $productID;
my @categoryNames=();
my @categoryIDs=();

$stmt = "select name from dtopt.productcategory fetch first $numOfCategories rows only";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
while(@row = $dbhSTATEMENT->fetchrow_array()){
push @categoryNames, $row[0];
}
$numOfCategories = scalar @categoryNames;


foreach my $categoryName (@categoryNames){
$stmt = "MERGE 
INTO         DTCTASM.ASSORTMENTMODELINGCATEGORY AS AMC 
USING        TABLE (
values  (\'$categoryName\',0,0,'Testing Category', current date)
) AMCNEW ( NAME, STATUS, ISMODELED, DESCRIPTION, LASTMODELINGDATE )
ON          AMC.NAME   = AMCNEW.NAME 
WHEN MATCHED THEN 
UPDATE 
SET 
(
NAME, STATUS, ISMODELED, DESCRIPTION, LASTMODELINGDATE
)
= ( AMCNEW.NAME, AMCNEW.STATUS, AMCNEW.ISMODELED, AMCNEW.DESCRIPTION, AMCNEW.LASTMODELINGDATE )
WHEN NOT MATCHED THEN 
INSERT 
  (
NAME, STATUS, ISMODELED, DESCRIPTION, LASTMODELINGDATE
 
  VALUES 
  (
AMCNEW.NAME, AMCNEW.STATUS, AMCNEW.ISMODELED, AMCNEW.DESCRIPTION, AMCNEW.LASTMODELINGDATE 
  )";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();

$stmt = "select ASSORTMENTMODELINGCATEGORYID from dtctasm.ASSORTMENTMODELINGCATEGORY where NAME=\'$categoryName\'";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
@row = $dbhSTATEMENT->fetchrow_array();
push @categoryIDs, $row[0];
}
$numOfCategories = scalar @categoryIDs;

$stmt = "select DTCATEGORYID from DTOPT.DTCATEGORY fetch first 1 row only";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
@row = $dbhSTATEMENT->fetchrow_array();
$DTCATEGORYID = $row[0];

$stmt = "select PRODUCTBRANDID from DTOPT.PRODUCTBRAND fetch first 1 row only";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
@row = $dbhSTATEMENT->fetchrow_array();
$PRODUCTBRANDID = $row[0];

$stmt = "select PRODUCTCATEGORYID from DTOPT.PRODUCTCATEGORY fetch first 1 row only";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
@row = $dbhSTATEMENT->fetchrow_array();
$PRODUCTCATEGORYID = $row[0];

$stmt = "select PRODUCTDEMANDGROUPID from DTOPT.PRODUCTDEMANDGROUP fetch first 1 row only";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
@row = $dbhSTATEMENT->fetchrow_array();
$PRODUCTDEMANDGROUPID = $row[0];


# Open the files
open(FH, "<$inFile") || die "Unable to open the file : $inFile <$!>";

print "Start processing the file: $fileName..\n";
my @products=();


while(<FH>){
chomp;
if (/^Product,.*/){
my @productArray = (split /,/)[@prdCol[0..$#prdCol]];
# Prefix the DPCI(ID) field with $pPrefix
if($productArray[$indDPCIProductInArray]){
$UPC = prefix($productArray[$indDPCIProductInArray]);
$stmt = "select max(productid) from dtopt.product";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
@row = $dbhSTATEMENT->fetchrow_array();
$maxProductID= $row[0];
$productID = $maxProductID +1;

$stmt = "select productid from dtopt.product where upc=\'$UPC\'";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
@row = $dbhSTATEMENT->fetchrow_array();
my $existingProductID= $row[0];
#print "Found an existing product with UPC: $UPC, ProductID: $existingProductID.\n" if($existingProductID);
$productID = $maxProductID +1;

if(!$existingProductID){
$stmt = "insert into dtopt.product(PRODUCTID,PRODUCTBRANDID,PRODUCTCATEGORYID,DTCATEGORYID,PRODUCTDEMANDGROUPID,NAME,UPC,ISPREPRICED,ISPRIVATELABEL,SEASONAL,CASEPACK,ALLOWBACKHAUL,ISAPPROVED,DATERECEIVED,STATUS,ISACTIVE,ISSTUB,EQCFACTOR) values ($productID,$PRODUCTBRANDID,$PRODUCTCATEGORYID,$DTCATEGORYID,$PRODUCTDEMANDGROUPID,\'$NAME\',\'$UPC\',$ISPREPRICED,$ISPRIVATELABEL,$SEASONAL,$CASEPACK,$ALLOWBACKHAUL,$ISAPPROVED,current date,$STATUS,$ISACTIVE,$ISSTUB,$EQCFACTOR)";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
print "Inserting UPC: $UPC..\n" ;
$dbhSTATEMENT->execute();
$stmt = "select productid from dtopt.product where upc=\'$UPC\'";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
@row = $dbhSTATEMENT->fetchrow_array();
$existingProductID = $row[0];
push @products, $UPC;
}
$stmt = "select ASSORTMENTMODELINGCATEGORYID from dtctasm.ASSORTMENTMODELINGCATEGORYDETAIL where productid=$existingProductID";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
@row = $dbhSTATEMENT->fetchrow_array();
my $existingASSORTMENTMODELINGCATEGORYID = $row[0];

if(!$existingASSORTMENTMODELINGCATEGORYID){
#print "Inserting into dtctasm.ASSORTMENTMODELINGCATEGORYDETAIL..\n";
my $categoryID = $categoryIDs[int(rand(@categoryIDs))%(scalar @categoryIDs)];
$stmt = "insert into dtctasm.ASSORTMENTMODELINGCATEGORYDETAIL(ASSORTMENTMODELINGCATEGORYID,productid) values ($categoryID,$existingProductID)";
$dbhSTATEMENT = $dbhDATABASE->prepare($stmt);
$dbhSTATEMENT->execute();
}
}
}
# Ignore all other type of records.
}

print "Complete processing the file: $fileName.\n";


# Config the final Output field list here. The fields order must be the same as the DataStage input file's.
# Any changes in the DataStage input interface should be reflected here.

sub initPrdColList{
$indDPCIProductInArray = 0; # The index of DPCI field. Index starts from 0.
return ('DPCI', 'Width', 'Height', 'Depth', 'TrayWidth', 'TrayHeight', 'TrayDepth', 'TrayPack', 'CasePack');
}


# Convert Field Mapping Index to Array. This function will accept 4 parameters.
# 1. scalar reference to a field mapping hash
# 2. scalar reference to a field keys array
# 3. expected number of field
# 4. record type: 'Planogram', 'Fixture', 'Position' or 'Product'.
sub fieldMap2IndArray {
my ($colMap, $colList, $expectedNum, $recType) = @_;
# To do: check the total count of the fields of each type of records, if the count is not equal to the expected number, then quit with error.
$expectedNum == (keys %$colMap) or die "Number of $recType Mapping fields in config file <$cfgFile> is not as expected! Expected: $expectedNum; Actual: ", scalar(keys %$colMap), ".";
my @indArray;

foreach my $key (@$colList){
push @indArray, $colMap->{$key};
}
@indArray;
}

# Prefix the passed-in string
sub prefix{
my $str = shift;
$str = trim($str);
if ($pPaddingRequired) {
if ($pLength - length($str) > 0) {   
$str = $pPrefix.($pPadChar x ($pLength- length($str) - length($pPrefix))).$str;
}
else { 
$str = $pPrefix.$str;
}
$str;
}

sub dbConnect(){
my $sthDB2;
my $dbhDB2;
my $dbhSTATEMENT;
my @row;
print "\nConnecting to $db_database user $db_user using $db_password\n";
$dbhDATABASE = DBI->connect( "dbi:DB2:$db_database","$db_user","$db_password" ) || die "Cannot connect to DB2.\n";
}

sub trim{
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值