My initial problem was to read and process large one line JSON file. with traditional approaches it took 3 hours to read and process the JSON file. So I have done research in this area and found a way to read the JSON file chunk by chunk, now I can do the same thing in 5 min
Bellow I have break the JSON which had 200000 elements in to 20 * 10000 chunks
public static void jsonFileReader() throws JsonParseException, IOException{
JsonFactory f = new MappingJsonFactory();
JsonParser jp = f.createJsonParser(new File("MyHugeJSonFile.json"));
JsonToken current;
current = jp.nextToken();
if (current != JsonToken.START_OBJECT) {
System.out.println("Error: root should be object: quiting.");
return;
}
int i = 0;
while (jp.nextToken() != JsonToken.END_OBJECT) {
String fieldName = jp.getCurrentName();
current = jp.nextToken();
if (fieldName.equals("employees")) {
if (current == JsonToken.START_ARRAY) {
List<String> strings = new ArrayList<String>();
String previousValue = "";
while (jp.nextToken() != JsonToken.END_ARRAY) {
JsonNode node = jp.readValueAsTree();
String valueAsText = node.get("id").getTextValue();
strings.add(valueAsText);
if((strings.size() == 100)) {
String valueAsText1 = null;
while (jp.nextToken() != JsonToken.END_ARRAY) {
JsonNode node1 = jp.readValueAsTree();
valueAsText1 = node1.get("id").getTextValue();
if(!previousValue.equals(valueAsText1)) {
break;
} else {
strings.add(valueAsText1);
}
}
int j =0;
for (Iterator<String> iterator = strings.iterator(); iterator.hasNext();) {
i = i + 1;
j = j + 1;
String string = (String) iterator .next();
System.out.println(i+" -- "+j+" --> "+string);
strings = new ArrayList<String>();
}
System.out.println("-------------------------------------------------------------");
strings.add(valueAsText1);
}
previousValue = valueAsText;
}
int j =0;
for (Iterator<String> iterator = strings.iterator(); iterator.hasNext();) {
i = i + 1;
j = j + 1;
String string = (String) iterator .next();
System.out.println(i+" -- "+j+" --> "+string);
strings = new ArrayList<String>();
}
System.out.println("-------------------------------------------------------------");
} else {
System.out.println("Error: records should be an array: skipping.");
jp.skipChildren();
}
} else {
System.out.println("Unprocessed property: " + fieldName);
jp.skipChildren();
}
}
System.out.println("Total Record size " + i);
}
No comments:
Post a Comment