#!/usr/bin/perl
#usage: perl preProcessPsa.pl inputPsaFileName baseStagingDir validationErrorDir cfgFilePath pPaddingRequired pPrefix pLength pPadChar
#for example: perl preProcessPsa.pl inputfile outputDir error cfgFile 1 TGT 15 0
#Input file format: raw .psa file exported from Prospace software.
#
while all the fields are separated by comma: entry type,field1,field2,...
#Output format: rowid,required fields separated by comma. like
#Description: This script is used to pre-process the raw .psa file before loading in DataStage jobs.
# The script splits the file into several files based on the entry type (Planogram, Product, Fixture and Position).
#
All the other types of records will be discarded.
# The pieces of smaller files will be further processed by the DataStage jobs and loaded into Database.
use strict;
use File::Basename;
use File::Spec;
# Validation Errors: The error code must be the same as the value in the DTCFG.DATAFEEDVALIDATION table.
# All the validation Error Codes except 3,4,5,16,17 will cause whole Planogram record rejected.
use constant EMPTY_PLANOGRAM_KEY => 1;
# Keep the same behavior now. The whole planogram is going to be rejected
use constant INVALID_PLANOGRAM => 2;
# - If planogram does not have valid fixture, reject it
# - If planogram does have valid fixtures but fixtures does not have valid positions, it is okay since now we allow empty fixture.
# - If no product in product list, reject all.
use constant EMPTY_FIXTURE_NAME => 3;
# Reject this fixture and its positions
use constant INVALID_FIXTURE => 4;
# If fixture is empty, still keep this fixture and load it.
use constant INVALID_FIXTURE_NAME => 5;
use constant DuplicatePlanogramInFile
=> 6;
use constant RejectedDueToInvalidPlan
ogram => 13;
use constant RejectedDueToInvalidFixt
ure => 14;
use constant INVALID_FIXTURE_TYPE => 16;
use constant SkipWithInvalidFixture => 17;
# Expected number of mapping fields in config file
my $numOfPogCols = 5;
my $numOfFixCols = 12;
my $numOfPosCols = 17;
my $numOfPrdCols = 9;
# Check the input parameters
my $requiredParameterNumber = 8; # Please modify this parameter in case new parameter is added.
my $actualInputParameterNumb
er = scalar @ARGV;
print "Input parameters: ", (join ' ', @ARGV), "\n" if($requiredParameterNumber == $actualInputParameterNumb
er) or die "Required number of parameters: $requiredParameterNumber; Actual number of parameters: $actualInputParameterNumb
er.";
# Check the files and directories
my $inFile = shift;
print "Input file: $inFile\n" if(-r $inFile) or die "The $inFile is not readable. <$!>";
my $fileName = basename $inFile;
print "Basename: $fileName\n";
my $outputDir = shift;
print "Output Dir: $outputDir\n" if (-x $outputDir) or die "The $outputDir is not accessable. <$!>";
my $errorDir = shift;
print "Error Dir: $errorDir\n" if (-x $errorDir) or die "The $errorDir is not accessable. <$!>";
my $cfgFile = shift;
print "Config File: $cfgFile\n" if (-r $cfgFile) or die "The $cfgFile is not readable. <$!>";
# DPCI Prefix string. This is required to prefixing and padding the DPCI before matching the UPC column in DTOPT.Product table.
my $pPaddingRequired = shift;
my $pPrefix = shift;
my $pLength = shift;
my $pPadChar = shift;
# Start - Read fields Mapping info from config file
my %typesMap = ();
my $type;
my (%pogMap, %fixMap, %posMap, %prdMap);
# Hash key: Record Type; Hash value: Reference of field mapping hash.
$typesMap{'Planogram'} = \%pogMap;
$typesMap{'Fixture'} = \%fixMap;
$typesMap{'Position'} = \%posMap;
$typesMap{'Product'} = \%prdMap;
open(FH_CONFIG, "<$cfgFile") || die "Unable to open the field map config file : $cfgFile <$!>";
while(<FH_CONFIG>) {
if(/^\s*\[\s*(\w+)\s*\]\s*$/){# This is the record type block, like [Planogram]
$type = $1;
next;
}
if (/^\s*(\w+)\s*=\s*(\w+)\s*$/){ # Format: FieldName = Index
$typesMap{$type}->{$1} = $2;
}
}
close FH_CONFIG;
# End - Read fields Mapping info from config file
# Init field index variables
my $indPlanogramKeyInFile = $pogMap{PlanogramKey};
my $indFixtureNameInFile = $fixMap{FixtureName};
my $indFixtureTypeInFile = $fixMap{FixtureType};
# Index of DPCI field in the array.
my $indDPCIPositionInArray; # To be init in function initPosColList();
my $indDPCIProductInArray; # To be init in function initPrdColList();
# Init the array of field index keys
my @pogColList = initPogColList();
my @fixColList = initFixColList();
my @posColList = initPosColList();
my @prdColList = initPrdColList();
# Get the field indices array of each type of records.
my @pogCol = fieldMap2IndArray(\%pogMap, \@pogColList, $numOfPogCols, 'Planogram');
my @fixCol = fieldMap2IndArray(\%fixMap, \@fixColList, $numOfFixCols, 'Fixture');
my @posCol = fieldMap2IndArray(\%posMap, \@posColList, $numOfPosCols, 'Position');
my @prdCol = fieldMap2IndArray(\%prdMap, \@prdColList, $numOfPrdCols, 'Product');
# Form the output file names
my $planogramFile = File::Spec->catfile($outputDir, ${fileName}.".planogram");
my $fixtureFile = File::Spec->catfile($outputDir, ${fileName}.".fixture");
my $productFile = File::Spec->catfile($outputDir, ${fileName}.".product");
my $positionFile = File::Spec->catfile($outputDir, ${fileName}.".position");
my $valErrFile = File::Spec->catfile($errorDir, ${fileName}.".valerr");
my $unifiedFile = File::Spec->catfile($outputDir, ${fileName}.".unified");
# Open the files
open(FH, "<$inFile") || die "Unable to open the file : $inFile <$!>";
open(FH_PLANOGRAM, ">$planogramFile") || die "Unable to open the file : $planogramFile <$!>";
open(FH_FIXTURE, ">$fixtureFile") || die "Unable to open the file : $fixtureFile <$!>";
open(FH_PRODUCT, ">$productFile") || die "Unable to open the file : $productFile <$!>";
open(FH_POSITION, ">$positionFile") || die "Unable to open the file : $positionFile <$!>";
open(FH_VALERR, ">$valErrFile") || die "Unable to open the file : $valErrFile <$!>";
open(FH_UNIFIED, ">$unifiedFile") || die "Unable to open the file : $unifiedFile <$!>";
my $currPlanogramKey; # Current PlanogramKey
my $currPlanogramRowID; # Current Planogram RowID
my $currFixtureName; # Current FixtureName
my $currFixtureType; # Current FixtureType
my $errCd; # Validation ErrorCode
my $rowid=0; # Line number
my %dupPlanogramKeyMap = (); # Hash which holds the duplicate Planogram Key count
my %dupFixtureNamesMap = (); # Hash which holds the duplicate fixture name count
my $modFixtureName; # Modified name when encounter duplicate. By add '_' and duplicate count
my $hasProduct=0; # Flag to indicate whether there is any product record in the psa file.
my $isValidPlanogram=0; # Flag to indicate whether current planogram is valid or not.
my $skipFixture=0; # Flag to indicate whether to skip position records under the current fixture.
my @currPlanogram=(); # Array to store current Planogram record, including rowid,raw line and processed output line
my @currFixture=(); # Array to store current Fixture record, including rowid,raw line and processed output line
my $hasPlanogramToProcess=0; # Flag to indicate whether there is any planogram record to be processed.
my $hasFixtureToProcess=0; # Flag to indicate whether there is any fixture record to be processed.
print "Start processing the file: $fileName..\n";
while(<FH>){
chomp;
$rowid++;
s/\\"/"/g; # Replace all '"'
if (/^Product,.*/){
# Print out the raw record
print FH_UNIFIED "$rowid|$_\n" ;
s/,/|/g; # Replace all comma's with pipe's
s/\Q\|\E/,/g; # Replace all (backslash+pipe)'s with comma's, because comma in '\,' was replace by '|'
# set flag to hasProduct
$hasProduct=1;
my @productArray = trimStringArray((split /\|/)[@prdCol[0..$#prdCol]]);
# Prefix the DPCI(ID) field with $pPrefix
if($productArray[$indDPCIProductInArray]){
$productArray[$indDPCIProductInArray] = prefix($productArray[$indDPCIProductInArray]);
}
my $product = join '|',@productArray;
# Add line number and print out the Product.
print FH_PRODUCT "$rowid|$product\n" ;
}
elsif (/^Planogram,.*/){
# Print out the raw record
print FH_UNIFIED "$rowid|$_\n" ;
s/,/|/g; # Replace all comma's with pipe's
s/\Q\|\E/,/g; # Replace all (backslash+pipe)'s with comma's, because comma in '\,' was replace by '|'
# New Planogram triggers processing of previous Planogram
if($hasFixtureToProcess){
&processCurrFixture();
}
if($hasPlanogramToProcess){
&processCurrPlanogram();
}
$hasPlanogramToProcess=1;
$currPlanogramRowID=$rowid;
my @planogramArray = trimStringArray((split /\|/)[@pogCol[0..$#pogCol]]);
# We need the index of the PlanogramKey field to retrieve the corresponding value
$currPlanogramKey = trim((split /\|/)[$indPlanogramKeyInFile]); # Do a trim to remove leading and trailing whitespaces
if ($currPlanogramKey){
# Check duplicate Planogram record by PlanogramKey
if (exists $dupPlanogramKeyMap{$currPlanogramKey}){
my $dupCount = $dupPlanogramKeyMap{$currPlanogramKey};
$dupPlanogramKeyMap{$currPlanogramKey} = $dupCount + 1;
$currPlanogramKey = '';
$errCd=DuplicatePlanogramInFile
;
&printReject($rowid,$errCd);
} else {
my $dupCount = 1;
# Put the current fixture name into hash
$dupPlanogramKeyMap{$currPlanogramKey} = $dupCount;
my $planogram = join '|',@planogramArray;
push @currPlanogram, ($rowid, $planogram);
}
}
else {
$currPlanogramKey = '';
$errCd=EMPTY_PLANOGRAM_KEY;
&printReject($rowid,$errCd);
}
}
elsif (/^Fixture,.*/){
# Print out the raw record
print FH_UNIFIED "$rowid|$_\n";
s/,/|/g; # Replace all comma's with pipe's
s/\Q\|\E/,/g; # Replace all (backslash+pipe)'s with comma's, because comma in '\,' was replace by '|'
# New Fixture triggers processing of previous Fixture
if($hasFixtureToProcess){
&processCurrFixture();
}
$hasFixtureToProcess=1;
if($currPlanogramKey){
# We need the index of the FixtureName field to retrieve the corresponding value
($currFixtureName, $currFixtureType) = (split /\|/,$_)[$indFixtureNameInFile, $indFixtureTypeInFile];
$currFixtureName = trim($currFixtureName);
# Reject all the invalid Fixture rows
if ($currFixtureType == 10 || $currFixtureType == 11 ){
$currFixtureName = '';
# ignore Fixtures with type Obstruction(10) and Sign(11).
$skipFixture = 1;
$errCd=INVALID_FIXTURE_TYPE;
&printReject($rowid,$errCd);
}
elsif ($currFixtureName =~ /^\s*$/){
$currFixtureName = '';
# ignore Fixtures with empty fixture name.
$skipFixture = 1;
$errCd=EMPTY_FIXTURE_NAME;
&printReject($rowid,$errCd);
}
elsif($currFixtureName =~ /^-.*/ || $currFixtureName =~ /^AUTONRSC.*/){
$currFixtureName = '';
# ignore Fixtures with invalid fixture name.
$skipFixture = 1;
$errCd=INVALID_FIXTURE_NAME;
&printReject($rowid,$errCd);
}
elsif($currFixtureName){
# This is a valid fixture record.
# Check if the current fixture name has duplicate record
if (exists $dupFixtureNamesMap{$currFixtureName}){
my $dupCount = $dupFixtureNamesMap{$currFixtureName};
$modFixtureName = $currFixtureName.'_DUP'.$dupCount;
$dupFixtureNamesMap{$currFixtureName} = $dupCount + 1;
} else {
my $dupCount = 1;
# First time in, no duplicate, so no need to modify fixture name
$modFixtureName = $currFixtureName;
# Put the current fixture name into hash
$dupFixtureNamesMap{$currFixtureName} = $dupCount;
}
# Replace current fixture name with modified value
$currFixtureName = $modFixtureName;
my @fixtureArray =trimStringArray((split /\|/)[@fixCol[0..$#fixCol]]);
my $fixture = join '|',@fixtureArray;
push @currFixture, ($rowid, $currPlanogramRowID, $currPlanogramKey, $modFixtureName, $fixture);
}
}
else{
$currFixtureName = '';
$errCd=RejectedDueToInvalidPlan
ogram;
&printReject($rowid,$errCd);
}
}
elsif (/^Position,.*/){
# Print out the raw record
print FH_UNIFIED "$rowid|$_\n" ;
s/,/|/g; # Replace all comma's with pipe's
s/\Q\|\E/,/g; # Replace all (backslash+pipe)'s with comma's, because comma in '\,' was replace by '|'
if ($skipFixture){
# ignore Positions whose fixture is skipped.
$errCd=SkipWithInvalidFixture;
&printReject($rowid,$errCd);
}
elsif($currPlanogramKey && $currFixtureName){
# Set current fixture as valid
#$isValidFixture=1;
my @positionArray = trimStringArray((split /\|/)[@posCol[0..$#posCol]]);
# Prefix the DPCI(ID) field with $pPrefix
if($positionArray[$indDPCIPositionInArray]){
$positionArray[$indDPCIPositionInArray] = prefix($positionArray[$indDPCIPositionInArray]);
}
my $position = join '|',@positionArray;
# Prefix with rowid, current planogram name, current fixture name and print out the Position
print FH_POSITION "$rowid|$currPlanogramRowID|$currPlanogramKey|$currFixtureName|$position\n" ;
}
else{
if (!$currPlanogramKey){
$errCd=RejectedDueToInvalidPlan
ogram;
&printReject($rowid,$errCd);
}
if (!$currFixtureName){
$errCd=RejectedDueToInvalidFixt
ure;
&printReject($rowid,$errCd);
}
}
}
# Ignore all other type of records.
}
# At EOF, process the remaining fixture and planogram
if($hasFixtureToProcess){
&processCurrFixture();
}
if($hasPlanogramToProcess){
&processCurrPlanogram();
}
die "There is no product in product list, reject all!" unless $hasProduct;
print "Complete processing the file: $fileName.\n";
# Print out the validation error. Separated by '|'
sub printReject{
my $str = join '|',@_;
print FH_VALERR "$str\n";
}
# Perl trim function to remove whitespace from the start and end of the string
sub trim{
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
# Left trim function to remove leading whitespace
sub ltrim{
my $string = shift;
$string =~ s/^\s+//;
return $string;
}
# Right trim function to remove trailing whitespace
sub rtrim{
my $string = shift;
$string =~ s/\s+$//;
return $string;
}
# Trim function to remove whitespace from a string array
sub trimStringArray{
my @resultArray;
foreach my $str (@_){
push @resultArray, trim($str);
}
@resultArray;
}
# Config the final Output field list here. The fields order must be the same as the DataStage input file's.
# Any changes in the DataStage input interface should be reflected here.
sub initPogColList{
return ('Name', 'PlanogramKey', 'Description', 'Width', 'Height');
}
sub initPrdColList{
$indDPCIProductInArray = 0; # The index of DPCI field. Index starts from 0.
return ('DPCI', 'Width', 'Height', 'Depth', 'TrayWidth', 'TrayHeight', 'TrayDepth', 'TrayPack', 'CasePack');
}
sub initFixColList{
return ('FixtureType', 'FixtureName', 'FixtureWidth', 'FixtureHeight', 'FixtureDepth', 'XLocation', 'YLocation', 'ZLocation', 'MerchandisableHeight', 'XFacingsOptions', 'YFacingsOptions', 'ZFacingsOptions');
}
sub initPosColList{
$indDPCIPositionInArray = 0; # The index of DPCI field. Index starts from 0.
return ('DPCI', 'MerchandiseStyle', 'XFacings', 'YFacings', 'ZFacings', 'XCappingNumber', 'XCappingOrientation', 'YCappingNumber', 'YCappingOrientation', 'ZCappingNumber', 'ZCappingOrientation', 'Orientation', 'UnitWidthonShelf', 'UnitHeightonShelf', 'UnitDepthonShelf', 'YFacingsOption', 'ZFacingsOption');
}
# Convert Field Mapping Index to Array. This function will accept 4 parameters.
# 1. scalar reference to a field mapping hash
# 2. scalar reference to a field keys array
# 3. expected number of field
# 4. record type: 'Planogram', 'Fixture', 'Position' or 'Product'.
sub fieldMap2IndArray {
my ($colMap, $colList, $expectedNum, $recType) = @_;
# To do: check the total count of the fields of each type of records, if the count is not equal to the expected number, then quit with error.
$expectedNum == (keys %$colMap) or die "Number of $recType Mapping fields in config file <$cfgFile> is not as expected! Expected: $expectedNum; Actual: ", scalar(keys %$colMap), ".";
my @indArray;
foreach my $key (@$colList){
push @indArray, $colMap->{$key};
}
@indArray;
}
# Prefix the passed-in string
sub prefix{
my $str = shift;
$str = trim($str);
if ($pPaddingRequired) {
if ($pLength - length($str) > 0) {
$str = $pPrefix.($pPadChar x ($pLength- length($str) - length($pPrefix))).$str;
}
}
else {
$str = $pPrefix.$str;
}
$str;
}
# Process Current Planogram
sub processCurrPlanogram{
if($currPlanogramKey){ # only if the current planogram key is not empty, we check whether the planogram have fixture, position and product record.
my $rowid = shift @currPlanogram;
my $planogram = shift @currPlanogram;
if($hasProduct && $isValidPlanogram){
# Prefix with line number and print out the Planogram.
print FH_PLANOGRAM "$rowid|$planogram\n" ;
}
else {
$currPlanogramKey = '';
$errCd=INVALID_PLANOGRAM;
&printReject($rowid,$errCd);
}
}
# Reset the variable after processing the planogram record
$hasPlanogramToProcess=0;
$isValidPlanogram=0;
@currPlanogram=();
%dupFixtureNamesMap=();
}
# Process Current Fixture
sub processCurrFixture{
if($currPlanogramKey && $currFixtureName){ # only if the current fixture name is not empty, we check whether the fixture have position and product record.
my $rowid = shift @currFixture;
my $currPlanogramRowID = shift @currFixture;
my $currPlanogramKey = shift @currFixture;
my $newFixtureName = shift @currFixture;
my $fixture = shift @currFixture;
if($hasProduct){
# Set Current Planogram as valid
$isValidPlanogram=1;
# Prefix with rowid, PlanogramKey and print out the Fixture.
print FH_FIXTURE "$rowid|$currPlanogramRowID|$currPlanogramKey|$newFixtureName|$fixture\n" ;
}
else {
$currFixtureName = '';
$errCd=INVALID_FIXTURE;
&printReject($rowid,$errCd);
}
}
# Reset the variable after processing the fixture record
$hasFixtureToProcess=0;
#$isValidFixture=0;
$skipFixture=0;
@currFixture=();
}