我想按照自己的需求编写一个自定义serde,但卡在了一个点上,出现了类转换异常。
输入数据为:
john,miller
我希望将这些数据作为 fname string,lname string
插入到Hive中,所以我编写了一个定制的SerDe。
我只实现了SerDe接口的反序列化方法,如下所示:
package com.datametica.serde;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
public class CustomSerde implements SerDe {
int numColumns;
StructObjectInspector rowOI;
List<String> columnNames;
List<Object> rows;
List<TypeInfo> columnTypes;
@Override
public void initialize(Configuration conf, Properties tblProps)
throws SerDeException {
String columnNameProperty = tblProps
.getProperty(Constants.LIST_COLUMNS);
columnNames = Arrays.asList(columnNameProperty.split(","));
String columnTypeProperty = tblProps
.getProperty(Constants.LIST_COLUMN_TYPES);
columnTypes = TypeInfoUtils
.getTypeInfosFromTypeString(columnTypeProperty);
numColumns = columnNames.size();
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(
columnNames.size());
ObjectInspector oi;
for (int c = 0; c < numColumns; c++) {
oi = TypeInfoUtils
.getStandardJavaObjectInspectorFromTypeInfo(columnTypes
.get(c));
columnOIs.add(oi);
}
/*
* for (int c = 0; c < numColumns; c++) { rows.add(); }
*/
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(
columnNames, columnOIs);
}
@Override
public CustomDataFormat deserialize(Writable record) throws SerDeException {
Text text = (Text) record;
String[] valArray = text.toString().split(",");
System.out.println("----------------------------\n");
System.out.println("yo yo yo "+text.toString() + "\n");
System.out.println("----------------------------\n");
CustomDataFormat dataObject = new CustomDataFormat();
dataObject.setFname(valArray[0]);
dataObject.setLname(valArray[1]);
return dataObject;
}
@Override
public ObjectInspector getObjectInspector() throws SerDeException {
return rowOI;
}
@Override
public SerDeStats getSerDeStats() {
return null;
}
@Override
public Class<? extends Writable> getSerializedClass() {
return null;
}
@Override
public Writable serialize(Object arg0, ObjectInspector arg1)
throws SerDeException {
return null;
}
}
将保存数据的类
package com.datametica.serde;
import java.util.ArrayList;
import java.util.List;
public class CustomDataFormat {
String fname;
String lname;
/*List<LevelOneStruct> arrayOfLevelTwoStruct = new ArrayList<LevelOneStruct>();
public List<LevelOneStruct> getArrayOfLevelTwoStruct() {
return arrayOfLevelTwoStruct;
}
public void setArrayOfLevelTwoStruct(
List<LevelOneStruct> arrayOfLevelTwoStruct) {
this.arrayOfLevelTwoStruct = arrayOfLevelTwoStruct;
}*/
public String getFname() {
return fname;
}
public void setFname(String fname) {
this.fname = fname;
}
public String getLname() {
return lname;
}
public void setLname(String lname) {
this.lname = lname;
}
}
CustomDataFormat类的ObjectInspector
package com.datametica.serde;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
public class CustomStructObjectInspector extends StandardStructObjectInspector {
@Override
public Object getStructFieldData(Object data, StructField fieldRef) {
Object dataToReturn = new Object();
CustomDataFormat customSerde = (CustomDataFormat) data;
switch (fieldRef.getFieldName()) {
case "fname":
dataToReturn = customSerde.getFname();
break;
/*
* case "arrayOfLevelTwoStruct": dataToReturn =
* customSerde.getArrayOfLevelTwoStruct(); break;
*/
case "lname":
dataToReturn = customSerde.getLname();
break;
default:
dataToReturn = null;
}
return dataToReturn;
}
@Override
public List<Object> getStructFieldsDataAsList(Object data) {
List<Object> listOfData = new ArrayList<Object>();
CustomDataFormat customSerde = (CustomDataFormat) data;
listOfData.add(customSerde.getFname());
listOfData.add(customSerde.getLname());
return listOfData;
}
}
创建jar文件后,我正在创建Hive表,如下所示:
create table customserde (fname string,lname string) row format serde 'com.datametica.serde.CustomSerde';
将数据加载到表中,如下所示:
load data inpath '/user/dm3/tables_data/customserde' into table customserde;
到目前为止一切都很好,但是当我对表执行选择操作时
select * from customserde;
获取异常
Caused by: java.lang.ClassCastException: com.datametica.serde.CustomDataFormat cannot be cast to [Ljava.lang.Object;
非常感谢你的帮助,我在这一点上完全陷入了困境。
提前致谢。