Concatenate Multiple Column Records

Tip of the Week
Tip for Week of October 16, 2006

Concatenate Multiple Column Records

This tip comes from Galina Petrenko, a Sr. Programmer Analyst at Harte-Hanks Inc. in Langhorne, PA.


In Oracle9i Database and later, it's possible to rotate the vertical presentation of data into horizontal presentation. The following demonstrates a simple way (without XML parsers or PL/SQL stored structures) to convert vertical presentation into horizontal presentation; it will concatenate multiple column records into a single string.


-- Temporary Data

create table TMP_TEST ( ID NUMBER, NumVal NUMBER, STRVAL VARCHAR2(32) );

insert into TMP_TEST values(1, 100, 'Hello');
insert into TMP_TEST values(1,-100, 'World');
insert into TMP_TEST values(2, 1, 'Concatenate');
insert into TMP_TEST values(2, 2, 'In String');
insert into TMP_TEST values(2, 3, 'using Connect By');
insert into TMP_TEST values(2, 4, 'Using SYS_CONNECT_BY_PATH');

commit;

-- Check Vertical Data Presentation

select * from TMP_TEST;

ID NUMVAL STRVAL
---------- ---------- --------------------------------
1 100 Hello
1 -100 World
2 1 Concatenate
2 2 In String
2 3 using Connect By
2 4 Using SYS_CONNECT_BY_PATH

6 rows selected.

-- Get Concatenated String with Specific Delimiter

def Delimiter=" "

SELECT
ID,
SUBSTR(MAX(REPLACE(
SYS_CONNECT_BY_PATH(STRVAL, '/')
,'/','&Delimiter')),2) Concatenated_String
FROM (
select A.*,
row_number() OVER (Partition by ID order by ID) ROW#
from TMP_TEST A)
START WITH ROW#=1
CONNECT BY PRIOR ID=ID AND PRIOR row# = row# -1
GROUP BY ID
;

ID CONCATENATED_STRING
---------- -----------------------------------------------------------------
1 Hello World
2 Concatenate In String using Connect By Using SYS_CONNECT_BY_PATH

-- More Generic:

def Column_Sort_Name=ID
def Column_Name=NumVal
def Delimiter="|"

SELECT
ID,
SUBSTR(MAX(REPLACE(
SYS_CONNECT_BY_PATH( &Column_Name , '/')
,'/','&Delimiter')),2) Concatenated_String
FROM (
select
A.*,
row_number() OVER (Partition by ID order by &Column_Sort_Name ) ROW#
from TMP_TEST A)
START WITH ROW#=1
CONNECT BY PRIOR &Column_Sort_Name = &Column_Sort_Name
AND PRIOR row# = row# -1
GROUP BY &Column_Sort_Name
;


ID CONCATENATED_STRING
---------- -----------------------------------------------------------------
1 100|-100
2 1|2|3|4


def Delimiter=","


ID CONCATENATED_STRING
---------- -----------------------------------------------------------------
1 100,-100
2 1,2,3,4
import glob import os import pickle import pandas as pd import numpy as np from sklearn.metrics import ( confusion_matrix, precision_recall_fscore_support ) def load_test_outputs(out_dir: str) -> pd.DataFrame: files = sorted(glob.glob(os.path.join(out_dir, "exp_pred.pkl"))) records = { "label": [], "pred": [], "label_sg": [], "pred_sg": []} for path in files: with open(path, "rb") as f: while True: try: batch = pickle.load(f) except EOFError: break # records["mat_id"].extend(batch["mat_ids"]) records["label"].extend(batch["labels"].tolist()) records["pred"].extend(batch["preds"].tolist()) records["label_sg"].extend(batch["labels_sg"].tolist()) records["pred_sg"].extend(batch["preds_sg"].tolist()) return pd.DataFrame(records) out_dir = "./" df = load_test_outputs(out_dir) c:\Users\AMDL\AppData\Local\Programs\Python\Python312\Lib\site-packages\torch\storage.py:414: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. return torch.load(io.BytesIO(b)) # Count predictions in the 'pred' column for labels 0 through 6 label_range = range(7) pred_counts = df['pred'].value_counts().reindex(label_range, fill_value=0) # Print the counts print("Prediction counts (label: count):") for label, count in pred_counts.items(): print(f"{label}: {count}") Prediction counts (label: count): 0: 0 1: 2 2: 241 3: 5229 4: 55 5: 27196 6: 29645 # print perentage of each label in the 'pred' column total_predictions = len(df['pred']) print("\nPercentage of each label in the 'pred' column:") for label, count in pred_counts.items(): percentage = (count / total_predictions) * 100 print(f"{label}: {percentage:.2f}%") Percentage of each label in the 'pred' column: 0: 0.00% 1: 0.00% 2: 0.39% 3: 8.38% 4: 0.09% 5: 43.61% 6: 47.53% import matplotlib.pyplot as plt def plot_top3_predicted_crystal_systems(df, font_size=12): # Crystal system names corresponding to labels 0–6 crystal_systems = { 0: 'triclinic', 1: 'Monoclinic', 2: 'Orthorhombic', 3: 'Tetragonal', 4: 'trigonal', 5: 'Hexagonal', 6: 'Cubic' } label_range = range(7) pred_counts = df['pred'].value_counts().reindex(label_range, fill_value=0) total_preds = pred_counts.sum() pred_percentages = (pred_counts / total_preds * 100).round(2) top3 = pred_percentages.sort_values(ascending=False).head(3) plt.figure(figsize=(8, 6)) bars = plt.bar( [crystal_systems[i] for i in top3.index], top3.values, color=['tab:red', 'tab:blue', 'tab:green'] ) # Annotate percentage values on top of bars for bar, pct in zip(bars, top3.values): plt.text( bar.get_x() + bar.get_width() / 2, bar.get_height(), f"{pct:.0f}%", ha='center', va='bottom', fontsize=font_size ) # plt.xlabel('crystal system', fontsize=font_size) plt.ylabel('percentage of pixels (%)', fontsize=font_size) #remove x-ticks plt.xticks([]) # Hide x-ticks # plt.xticks(fontsize=font_size) plt.yticks(fontsize=font_size) plt.legend(bars, [crystal_systems[i] for i in top3.index], title='Crystal system', fontsize=font_size) plt.grid(axis='y', linestyle='--', alpha=0.5) plt.tight_layout() plt.show() # Example usage: plot_top3_predicted_crystal_systems(df, font_size=18) import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import ListedColormap import matplotlib.patches as mpatches def plot_label_map_top3_colors(df, label_col='pred', size=256, font_size=14): num_pixels = size * size labels = np.array(df[label_col]) # Crop to size*size if len(labels) < num_pixels: labels = np.concatenate([labels, np.full(num_pixels - len(labels), -1)]) else: labels = labels[:num_pixels] # Reshape into image image = labels.reshape(size, size) # Find top 3 labels unique, counts = np.unique(labels[labels != -1], return_counts=True) top_labels = unique[np.argsort(counts)[-3:]][::-1] # most → least frequent # Map labels to color indices color_image = np.zeros_like(image, dtype=int) # default black = 0 label_to_color = {top_labels[0]: 1, top_labels[1]: 2, top_labels[2]: 3} for label, color_index in label_to_color.items(): color_image[image == label] = color_index # Define color map cmap = ListedColormap(['white', 'tab:red', 'tab:blue', 'tab:green']) # Plot plt.figure(figsize=(6, 6)) plt.imshow(color_image, cmap=cmap, origin='upper', vmin=0, vmax=3) plt.axis('off') plt.title('Top 3 Predicted Crystal Systems', fontsize=font_size) # Add legend legend_patches = [ mpatches.Patch(color='tab:blue', label=f'Top 1: {top_labels[0]}'), mpatches.Patch(color='tab:orange', label=f'Top 2: {top_labels[1]}'), mpatches.Patch(color='tab:green', label=f'Top 3: {top_labels[2]}'), mpatches.Patch(color='white', label='Other / Padding') ] # plt.legend(handles=legend_patches, loc='upper right', fontsize=font_size) plt.tight_layout() plt.show() # Example usage:解释这段代码
10-30
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值